diff --git "a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/par.subgraph.pre-dse.mlir" "b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/par.subgraph.pre-dse.mlir" new file mode 100644--- /dev/null +++ "b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/par.subgraph.pre-dse.mlir" @@ -0,0 +1,25253 @@ +#loc = loc(unknown) +module attributes { + llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", + llvm.target_triple = "x86_64-unknown-linux-gnu", + "onnx-mlir.symbol-postfix" = "onnxmodel.onnx.mlir", + vaimlconf.device = "stx", + vaimlconf.device_models = "${vaimlconf.install_dir}/data/deviceModels", + vaimlconf.install_dir = "/usr/local/lib/python3.10/dist-packages/flexml/flexml_extras", + vaimlconf.library_metadata = ["${vaimlconf.install_dir}/data/libraryMetadata/L1", "${vaimlconf.install_dir}/data/libraryMetadata/L2", "${vaimlconf.install_dir}/../../vitis_mllib/L1/metadata", "${vaimlconf.install_dir}/../../vitis_mllib/L2/metadata", "${vaimlconf.install_dir}/share/microkernel-tiling/tiling-recipe-specs"], + vaimlconf.single_core_compiler = "chess"} { + func.func private @forward(%arg0: tensor<1x180x320x4xbf16> loc(unknown), %arg1: tensor<1x16x90x160xbf16> loc(unknown), %arg2: tensor<1x20x45x80xbf16> loc(unknown), %arg3: tensor<1x40x23x40xbf16> loc(unknown), %arg4: tensor<1x64x12x20xbf16> loc(unknown)) -> (tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16>) attributes { + max_heap_size = 2240 : ui32, + max_stack_size = 2368 : ui32, + stack_heap_start_address = 45696 : ui32, + total_stack_heap_region_size = 6912 : ui32} { + %0 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_443/biases"} -> tensor<4xbf16> loc(#loc) + %1 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_443/weights"} -> tensor<4x16x1x1xbf16> loc(#loc) + %2 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_441/biases"} -> tensor<16xbf16> loc(#loc) + %3 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_441/weights"} -> tensor<16x16x3x3xbf16> loc(#loc) + %4 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_439/biases"} -> tensor<16xbf16> loc(#loc) + %5 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_439/weights"} -> tensor<16x35x3x3xbf16> loc(#loc) + %6 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_431/Constant_0_0"} -> tensor<1x16x90x160xbf16> loc(#loc2) + %7 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_428/biases"} -> tensor<16xbf16> loc(#loc) + %8 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_428/weights"} -> tensor<16x32x3x3xbf16> loc(#loc) + %9 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_423/biases"} -> tensor<32xbf16> loc(#loc) + %10 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_423/weights"} -> tensor<32x32x3x3xbf16> loc(#loc) + %11 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_419/biases"} -> tensor<32xbf16> loc(#loc) + %12 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_419/weights"} -> tensor<32x59x3x3xbf16> loc(#loc) + %13 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_411/Constant_0_0"} -> tensor<1x20x45x80xbf16> loc(#loc3) + %14 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_408/biases"} -> tensor<20xbf16> loc(#loc) + %15 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_408/weights"} -> tensor<20x40x3x3xbf16> loc(#loc) + %16 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_403/biases"} -> tensor<40xbf16> loc(#loc) + %17 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_403/weights"} -> tensor<40x40x3x3xbf16> loc(#loc) + %18 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_399/biases"} -> tensor<40xbf16> loc(#loc) + %19 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_399/weights"} -> tensor<40x107x3x3xbf16> loc(#loc) + %20 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_385/Constant_0_0"} -> tensor<1x40x23x40xbf16> loc(#loc4) + %21 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_382/biases"} -> tensor<40xbf16> loc(#loc) + %22 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_382/weights"} -> tensor<40x80x3x3xbf16> loc(#loc) + %23 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_377/biases"} -> tensor<80xbf16> loc(#loc) + %24 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_377/weights"} -> tensor<80x80x3x3xbf16> loc(#loc) + %25 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_373/biases"} -> tensor<80xbf16> loc(#loc) + %26 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_373/weights"} -> tensor<80x171x3x3xbf16> loc(#loc) + %27 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_359/Constant_0_0"} -> tensor<1x64x12x20xbf16> loc(#loc5) + %28 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_356/biases"} -> tensor<64xbf16> loc(#loc) + %29 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_356/weights"} -> tensor<64x128x3x3xbf16> loc(#loc) + %30 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_351/biases"} -> tensor<128xbf16> loc(#loc) + %31 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_351/weights"} -> tensor<128x128x3x3xbf16> loc(#loc) + %32 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_340/biases"} -> tensor<128xbf16> loc(#loc) + %33 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_340/weights"} -> tensor<128x960x1x1xbf16> loc(#loc) + %34 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_343/biases"} -> tensor<128xbf16> loc(#loc) + %35 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_343/weights"} -> tensor<128x960x1x1xbf16> loc(#loc) + %36 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_331/biases"} -> tensor<960xbf16> loc(#loc) + %37 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_331/weights"} -> tensor<960x160x1x1xbf16> loc(#loc) + %38 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_329/biases"} -> tensor<160xbf16> loc(#loc) + %39 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_329/weights"} -> tensor<160x960x1x1xbf16> loc(#loc) + %40 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_320/biases"} -> tensor<960xbf16> loc(#loc) + %41 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_320/weights"} -> tensor<960x240x1x1xbf16> loc(#loc) + %42 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_318/biases"} -> tensor<240xbf16> loc(#loc) + %43 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_318/weights"} -> tensor<240x960x1x1xbf16> loc(#loc) + %44 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_308/biases"} -> tensor<960xbf16> loc(#loc) + %45 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_308/weights"} -> tensor<960x1x9x9xbf16> loc(#loc) + %46 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_299/biases"} -> tensor<960xbf16> loc(#loc) + %47 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_299/weights"} -> tensor<960x160x1x1xbf16> loc(#loc) + %48 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_297/biases"} -> tensor<160xbf16> loc(#loc) + %49 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_297/weights"} -> tensor<160x960x1x1xbf16> loc(#loc) + %50 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_288/biases"} -> tensor<960xbf16> loc(#loc) + %51 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_288/weights"} -> tensor<960x240x1x1xbf16> loc(#loc) + %52 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_286/biases"} -> tensor<240xbf16> loc(#loc) + %53 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_286/weights"} -> tensor<240x960x1x1xbf16> loc(#loc) + %54 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_276/biases"} -> tensor<960xbf16> loc(#loc) + %55 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_276/weights"} -> tensor<960x1x9x9xbf16> loc(#loc) + %56 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_267/biases"} -> tensor<960xbf16> loc(#loc) + %57 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_267/weights"} -> tensor<960x160x1x1xbf16> loc(#loc) + %58 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_266/biases"} -> tensor<160xbf16> loc(#loc) + %59 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_266/weights"} -> tensor<160x672x1x1xbf16> loc(#loc) + %60 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_257/biases"} -> tensor<672xbf16> loc(#loc) + %61 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_257/weights"} -> tensor<672x168x1x1xbf16> loc(#loc) + %62 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_255/biases"} -> tensor<168xbf16> loc(#loc) + %63 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_255/weights"} -> tensor<168x672x1x1xbf16> loc(#loc) + %64 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_245/biases"} -> tensor<672xbf16> loc(#loc) + %65 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_245/weights"} -> tensor<672x1x9x9xbf16> loc(#loc) + %66 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_236/biases"} -> tensor<672xbf16> loc(#loc) + %67 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_236/weights"} -> tensor<672x112x1x1xbf16> loc(#loc) + %68 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_234/biases"} -> tensor<112xbf16> loc(#loc) + %69 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_234/weights"} -> tensor<112x672x1x1xbf16> loc(#loc) + %70 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_225/biases"} -> tensor<672xbf16> loc(#loc) + %71 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_225/weights"} -> tensor<672x168x1x1xbf16> loc(#loc) + %72 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_223/biases"} -> tensor<168xbf16> loc(#loc) + %73 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_223/weights"} -> tensor<168x672x1x1xbf16> loc(#loc) + %74 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_213/biases"} -> tensor<672xbf16> loc(#loc) + %75 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_213/weights"} -> tensor<672x1x3x3xbf16> loc(#loc) + %76 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_204/biases"} -> tensor<672xbf16> loc(#loc) + %77 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_204/weights"} -> tensor<672x112x1x1xbf16> loc(#loc) + %78 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_203/biases"} -> tensor<112xbf16> loc(#loc) + %79 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_203/weights"} -> tensor<112x480x1x1xbf16> loc(#loc) + %80 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_194/biases"} -> tensor<480xbf16> loc(#loc) + %81 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_194/weights"} -> tensor<480x120x1x1xbf16> loc(#loc) + %82 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_192/biases"} -> tensor<120xbf16> loc(#loc) + %83 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_192/weights"} -> tensor<120x480x1x1xbf16> loc(#loc) + %84 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_182/biases"} -> tensor<480xbf16> loc(#loc) + %85 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_182/weights"} -> tensor<480x1x3x3xbf16> loc(#loc) + %86 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_173/biases"} -> tensor<480xbf16> loc(#loc) + %87 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_173/weights"} -> tensor<480x80x1x1xbf16> loc(#loc) + %88 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_171/biases"} -> tensor<80xbf16> loc(#loc) + %89 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_171/weights"} -> tensor<80x184x1x1xbf16> loc(#loc) + %90 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_162/biases"} -> tensor<184xbf16> loc(#loc) + %91 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_162/weights"} -> tensor<184x1x3x3xbf16> loc(#loc) + %92 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_153/biases"} -> tensor<184xbf16> loc(#loc) + %93 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_153/weights"} -> tensor<184x80x1x1xbf16> loc(#loc) + %94 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_151/biases"} -> tensor<80xbf16> loc(#loc) + %95 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_151/weights"} -> tensor<80x184x1x1xbf16> loc(#loc) + %96 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_142/biases"} -> tensor<184xbf16> loc(#loc) + %97 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_142/weights"} -> tensor<184x1x3x3xbf16> loc(#loc) + %98 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_133/biases"} -> tensor<184xbf16> loc(#loc) + %99 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_133/weights"} -> tensor<184x80x1x1xbf16> loc(#loc) + %100 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_131/biases"} -> tensor<80xbf16> loc(#loc) + %101 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_131/weights"} -> tensor<80x200x1x1xbf16> loc(#loc) + %102 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_122/biases"} -> tensor<200xbf16> loc(#loc) + %103 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_122/weights"} -> tensor<200x1x3x3xbf16> loc(#loc) + %104 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_113/biases"} -> tensor<200xbf16> loc(#loc) + %105 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_113/weights"} -> tensor<200x80x1x1xbf16> loc(#loc) + %106 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_112/biases"} -> tensor<80xbf16> loc(#loc) + %107 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_112/weights"} -> tensor<80x240x1x1xbf16> loc(#loc) + %108 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_103/biases"} -> tensor<240xbf16> loc(#loc) + %109 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_103/weights"} -> tensor<240x1x3x3xbf16> loc(#loc) + %110 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_94/biases"} -> tensor<240xbf16> loc(#loc) + %111 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_94/weights"} -> tensor<240x40x1x1xbf16> loc(#loc) + %112 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_92/biases"} -> tensor<40xbf16> loc(#loc) + %113 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_92/weights"} -> tensor<40x120x1x1xbf16> loc(#loc) + %114 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_83/biases"} -> tensor<120xbf16> loc(#loc) + %115 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_83/weights"} -> tensor<120x32x1x1xbf16> loc(#loc) + %116 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_81/biases"} -> tensor<32xbf16> loc(#loc) + %117 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_81/weights"} -> tensor<32x120x1x1xbf16> loc(#loc) + %118 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_78/biases"} -> tensor<120xbf16> loc(#loc) + %119 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_78/weights"} -> tensor<120x1x5x5xbf16> loc(#loc) + %120 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_76/biases"} -> tensor<120xbf16> loc(#loc) + %121 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_76/weights"} -> tensor<120x40x1x1xbf16> loc(#loc) + %122 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_74/biases"} -> tensor<40xbf16> loc(#loc) + %123 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_74/weights"} -> tensor<40x120x1x1xbf16> loc(#loc) + %124 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_65/biases"} -> tensor<120xbf16> loc(#loc) + %125 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_65/weights"} -> tensor<120x32x1x1xbf16> loc(#loc) + %126 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_63/biases"} -> tensor<32xbf16> loc(#loc) + %127 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_63/weights"} -> tensor<32x120x1x1xbf16> loc(#loc) + %128 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_60/biases"} -> tensor<120xbf16> loc(#loc) + %129 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_60/weights"} -> tensor<120x1x5x5xbf16> loc(#loc) + %130 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_58/biases"} -> tensor<120xbf16> loc(#loc) + %131 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_58/weights"} -> tensor<120x40x1x1xbf16> loc(#loc) + %132 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_57/biases"} -> tensor<40xbf16> loc(#loc) + %133 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_57/weights"} -> tensor<40x72x1x1xbf16> loc(#loc) + %134 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_48/biases"} -> tensor<72xbf16> loc(#loc) + %135 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_48/weights"} -> tensor<72x24x1x1xbf16> loc(#loc) + %136 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_46/biases"} -> tensor<24xbf16> loc(#loc) + %137 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_46/weights"} -> tensor<24x72x1x1xbf16> loc(#loc) + %138 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_43/biases"} -> tensor<72xbf16> loc(#loc) + %139 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_43/weights"} -> tensor<72x1x5x5xbf16> loc(#loc) + %140 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_41/biases"} -> tensor<72xbf16> loc(#loc) + %141 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_41/weights"} -> tensor<72x24x1x1xbf16> loc(#loc) + %142 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_39/biases"} -> tensor<24xbf16> loc(#loc) + %143 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_39/weights"} -> tensor<24x72x1x1xbf16> loc(#loc) + %144 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_37/biases"} -> tensor<72xbf16> loc(#loc) + %145 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_37/weights"} -> tensor<72x1x3x3xbf16> loc(#loc) + %146 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_35/biases"} -> tensor<72xbf16> loc(#loc) + %147 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_35/weights"} -> tensor<72x24x1x1xbf16> loc(#loc) + %148 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_34/biases"} -> tensor<24xbf16> loc(#loc) + %149 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_34/weights"} -> tensor<24x64x1x1xbf16> loc(#loc) + %150 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_32/biases"} -> tensor<64xbf16> loc(#loc) + %151 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_32/weights"} -> tensor<64x1x3x3xbf16> loc(#loc) + %152 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_30/biases"} -> tensor<64xbf16> loc(#loc) + %153 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_30/weights"} -> tensor<64x16x1x1xbf16> loc(#loc) + %154 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_28/biases"} -> tensor<16xbf16> loc(#loc) + %155 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_28/weights"} -> tensor<16x16x1x1xbf16> loc(#loc) + %156 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_26/biases"} -> tensor<16xbf16> loc(#loc) + %157 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_26/weights"} -> tensor<16x1x3x3xbf16> loc(#loc) + %158 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_17/biases"} -> tensor<16xbf16> loc(#loc) + %159 = xten_nn.load_external_const {file = "constants.h5", key = "Conv_17/weights"} -> tensor<16x3x3x3xbf16> loc(#loc) + %160 = xten_nn.load_external_const {file = "constants.h5", key = "Div_16/Constant_1_0"} -> tensor<1x3x180x320xbf16> loc(#loc6) + %161 = xten_nn.load_external_const {file = "constants.h5", key = "Sub_14/Constant_1_0"} -> tensor<1x3x180x320xbf16> loc(#loc309) + %162 = xten_nn.subgraph (%arg5 = %arg0: tensor<1x180x320x4xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_2", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> + } + ], + OutputName = "Div_2", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x180x320x4xbf16>) attributes { + LayerName = "Div_2", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> + } + ], + OutputName = "Div_2", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.906250e-03 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.906250e-03> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc1) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_2", + OutputName = "Div_2", + shift = 0 : i8} : (tensor<1x180x320x4xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x180x320x4xbf16> loc(#loc1) + xten_nn.output %463 : tensor<1x180x320x4xbf16> loc(#loc1) + } -> tensor<1x180x320x4xbf16> loc(#loc1) + xten_nn.output %461 : tensor<1x180x320x4xbf16> loc(#loc1) + } -> tensor<1x180x320x4xbf16> loc(#loc1) + %163 = xten_nn.subgraph (%arg5 = %162: tensor<1x180x320x4xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Slice_7", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 4]> : vector<4xindex> + } + ], + OutputName = "Slice_7", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "W", + config.dim_c = 184 : ui32, + config.dim_h = 320 : ui32, + config.dim_w = 4 : ui32, + config.dtype = "bfloat16", + config.end = 3 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + LayerName = "Slice_7", + OutputName = "Slice_7", + size = array, + start = array} : (tensor<1x180x320x4xbf16>) -> tensor<1x180x320x3xbf16> loc(#loc9) + xten_nn.output %461 : tensor<1x180x320x3xbf16> loc(#loc9) + } -> tensor<1x180x320x3xbf16> loc(#loc9) + %164 = xten_nn.subgraph (%arg5 = %163: tensor<1x180x320x3xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> + } + ], + OutputName = "Generated-#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 5]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> + } + ], + Specializes = "BufferPadAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 320 : ui32, + config.dim_0_padded = 320 : ui32, + config.dim_1 = 23 : ui32, + config.dim_1_padded = 23 : ui32, + config.dim_2 = 3 : ui32, + config.dim_2_padded = 8 : ui32, + config.dim_3 = 8 : ui32, + config.dim_3_padded = 8 : ui32, + config.dtype = "bfloat16" + }} { + xten_nn.output %arg5 : tensor<1x180x320x3xbf16> loc(#loc10) + } -> tensor<1x180x320x3xbf16> loc(#loc10) + %165 = xten_nn.subgraph (%arg5 = %164: tensor<1x180x320x3xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#2", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 5]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 180, 320, 3]> : vector<4xindex> + } + ], + OutputName = "Generated-#3", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 4, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 320 : ui32, + config.dim_1 = 23 : ui32, + config.dim_2 = 8 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 10 : ui32 + }} { + %461 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc11) + %462 = tosa.transpose %arg5, %461 : (tensor<1x180x320x3xbf16>, tensor<4xi32>) -> tensor<1x3x180x320xbf16> loc(#loc311) + xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc311) + } -> tensor<1x3x180x320xbf16> loc(#loc310) + %166 = xten_nn.subgraph (%arg5 = %165: tensor<1x3x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#4", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 4, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Generated-#5", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "BufferUnpadAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 184 : ui32, + config.dim_0_unpadded = 180 : ui32, + config.dim_1 = 1 : ui32, + config.dim_1_unpadded = 1 : ui32, + config.dim_2 = 320 : ui32, + config.dim_2_unpadded = 320 : ui32, + config.dim_3 = 8 : ui32, + config.dim_3_unpadded = 8 : ui32, + config.dtype = "bfloat16" + }} { + xten_nn.output %arg5 : tensor<1x3x180x320xbf16> loc(#loc10) + } -> tensor<1x3x180x320xbf16> loc(#loc10) + %167 = xten_nn.subgraph (%arg5 = %166: tensor<1x3x180x320xbf16>, %arg6 = %161: tensor<1x3x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Sub_14", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Initializer_398", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>) attributes { + LayerName = "Sub_14", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Initializer_398", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.add %arg7, %arg8 {LayerName = "Sub_14", OutputName = "Initializer_398"} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc309) + xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc309) + } -> tensor<1x3x180x320xbf16> loc(#loc309) + xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc309) + } -> tensor<1x3x180x320xbf16> loc(#loc309) + %168 = xten_nn.subgraph (%arg5 = %167: tensor<1x3x180x320xbf16>, %arg6 = %160: tensor<1x3x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_16", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Div_16", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>) attributes { + LayerName = "Div_16", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Div_16", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + OutputName = "Div_16", + PartOfLayerName = "Div_16", + shift = 0 : i8} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc6) + xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc6) + } -> tensor<1x3x180x320xbf16> loc(#loc6) + xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc6) + } -> tensor<1x3x180x320xbf16> loc(#loc6) + %169 = xten_nn.subgraph (%arg5 = %168: tensor<1x3x180x320xbf16>, %arg6 = %159: tensor<16x3x3x3xbf16>, %arg7 = %158: tensor<16xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_17", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[16, 3, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_17", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x3x180x320xbf16>, %arg9 = %arg6: tensor<16x3x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 0], [1, 0]], + LayerName = "Conv_17", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[16, 3, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_17", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 2 : ui8, + config.stride_w = 2 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<16x3x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x3xbf16> loc(#loc13) + %465 = tosa.transpose %arg8, %463 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc13) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_17", + PartOfOutputName = "Conv_17", + dilation = array, + pad = array, + stride = array} : (tensor<1x180x320x3xbf16>, tensor<16x3x3x3xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc13) + %467 = tosa.transpose %466, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc13) + xten_nn.output %467 : tensor<1x16x90x160xbf16> loc(#loc13) + } -> tensor<1x16x90x160xbf16> loc(#loc13) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc13) + } -> tensor<1x16x90x160xbf16> loc(#loc13) + %170 = xten_nn.subgraph (%arg5 = %169: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_19", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Add_19", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Add_19", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Add_19", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_19", OutputName = "Add_19"} : (tensor<1x16x90x160xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc14) + xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc14) + } -> tensor<1x16x90x160xbf16> loc(#loc14) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc14) + } -> tensor<1x16x90x160xbf16> loc(#loc14) + %171 = xten_nn.subgraph (%arg5 = %170: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_22", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Clip_22", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Clip_22", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Clip_22", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_22", + OutputName = "Clip_22", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc15) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc15) + } -> tensor<1x16x90x160xbf16> loc(#loc15) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc15) + } -> tensor<1x16x90x160xbf16> loc(#loc15) + %172 = xten_nn.subgraph (%arg5 = %171: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_24", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Div_24", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Div_24", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Div_24", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_24", + OutputName = "Div_24", + shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc16) + xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc16) + } -> tensor<1x16x90x160xbf16> loc(#loc16) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc16) + } -> tensor<1x16x90x160xbf16> loc(#loc16) + %173 = xten_nn.subgraph (%arg5 = %169: tensor<1x16x90x160xbf16>, %arg6 = %172: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_25", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_25", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Mul_25", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_25", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_25", + OutputName = "Mul_25", + shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc17) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc17) + } -> tensor<1x16x90x160xbf16> loc(#loc17) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc17) + } -> tensor<1x16x90x160xbf16> loc(#loc17) + %174 = xten_nn.subgraph (%arg5 = %173: tensor<1x16x90x160xbf16>, %arg6 = %157: tensor<16x1x3x3xbf16>, %arg7 = %156: tensor<16xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_26", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_27", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x90x160xbf16>, %arg9 = %arg6: tensor<16x1x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_26", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_27", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + Traits = { + NonNegativeOut = true + }, + With = { + config.act = 1 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc312) + %465 = tosa.transpose %arg9, %464 : (tensor<16x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x16x1xbf16> loc(#loc312) + %466 = tosa.transpose %arg8, %463 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc312) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_26", + PartOfOutputName = "Conv_26", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x16xbf16>, tensor<3x3x16x1xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc18) + %468 = tosa.clamp %467 { + LayerName = "Relu_27", + OutputName = "Relu_27", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x90x160x16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc19) + %469 = tosa.transpose %468, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc312) + xten_nn.output %469 : tensor<1x16x90x160xbf16> loc(#loc19) + } -> tensor<1x16x90x160xbf16> loc(#loc312) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc312) + } -> tensor<1x16x90x160xbf16> loc(#loc312) + %175 = xten_nn.subgraph (%arg5 = %174: tensor<1x16x90x160xbf16>, %arg6 = %155: tensor<16x16x1x1xbf16>, %arg7 = %154: tensor<16xbf16>, %arg8 = %173: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_28", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 16, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Add_29", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x16x90x160xbf16>, %arg10 = %arg6: tensor<16x16x1x1xbf16>, %arg11 = %arg7: tensor<16xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_28", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 16, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_28", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc20) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<16x16x1x1xbf16>) -> tensor<16x1x1x16xbf16> loc(#loc20) + %466 = tosa.transpose %arg9, %464 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc20) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_28", + PartOfOutputName = "Conv_28", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x16xbf16>, tensor<16x1x1x16xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc20) + %468 = tosa.transpose %467, %463 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc20) + xten_nn.output %468 : tensor<1x16x90x160xbf16> loc(#loc20) + } -> tensor<1x16x90x160xbf16> loc(#loc20) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x16x90x160xbf16>, %arg10 = %arg8: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Add_29", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Add_29", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_29", OutputName = "Add_29"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc21) + xten_nn.output %463 : tensor<1x16x90x160xbf16> loc(#loc21) + } -> tensor<1x16x90x160xbf16> loc(#loc21) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc21) + } -> tensor<1x16x90x160xbf16> loc(#loc313) + %176 = xten_nn.subgraph (%arg5 = %175: tensor<1x16x90x160xbf16>, %arg6 = %153: tensor<64x16x1x1xbf16>, %arg7 = %152: tensor<64xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_30", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[64, 16, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_31", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x90x160xbf16>, %arg9 = %arg6: tensor<64x16x1x1xbf16>, %arg10 = %arg7: tensor<64xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_30", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[64, 16, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_31", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc314) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<64x16x1x1xbf16>) -> tensor<64x1x1x16xbf16> loc(#loc314) + %465 = tosa.transpose %arg8, %463 : (tensor<1x16x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x16xbf16> loc(#loc314) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_30", + PartOfOutputName = "Conv_30", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x16xbf16>, tensor<64x1x1x16xbf16>, tensor<64xbf16>) -> tensor<1x90x160x64xbf16> loc(#loc22) + %467 = tosa.clamp %466 { + LayerName = "Relu_31", + OutputName = "Relu_31", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x90x160x64xbf16>) -> tensor<1x90x160x64xbf16> loc(#loc23) + %468 = tosa.transpose %467, %462 : (tensor<1x90x160x64xbf16>, tensor<4xi32>) -> tensor<1x64x90x160xbf16> loc(#loc314) + xten_nn.output %468 : tensor<1x64x90x160xbf16> loc(#loc23) + } -> tensor<1x64x90x160xbf16> loc(#loc314) + xten_nn.output %461 : tensor<1x64x90x160xbf16> loc(#loc314) + } -> tensor<1x64x90x160xbf16> loc(#loc314) + %177 = xten_nn.subgraph (%arg5 = %176: tensor<1x64x90x160xbf16>, %arg6 = %151: tensor<64x1x3x3xbf16>, %arg7 = %150: tensor<64xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_32", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[64, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_33", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x64x90x160xbf16>, %arg9 = %arg6: tensor<64x1x3x3xbf16>, %arg10 = %arg7: tensor<64xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 0], [1, 0]], + LayerName = "Conv_32", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[64, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_33", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + Traits = { + NonNegativeOut = true + }, + With = { + config.act = 1 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 2 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc315) + %465 = tosa.transpose %arg9, %464 : (tensor<64x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x64x1xbf16> loc(#loc315) + %466 = tosa.transpose %arg8, %463 : (tensor<1x64x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x64xbf16> loc(#loc315) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_32", + PartOfOutputName = "Conv_32", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x64xbf16>, tensor<3x3x64x1xbf16>, tensor<64xbf16>) -> tensor<1x45x80x64xbf16> loc(#loc24) + %468 = tosa.clamp %467 { + LayerName = "Relu_33", + OutputName = "Relu_33", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x45x80x64xbf16>) -> tensor<1x45x80x64xbf16> loc(#loc25) + %469 = tosa.transpose %468, %462 : (tensor<1x45x80x64xbf16>, tensor<4xi32>) -> tensor<1x64x45x80xbf16> loc(#loc315) + xten_nn.output %469 : tensor<1x64x45x80xbf16> loc(#loc25) + } -> tensor<1x64x45x80xbf16> loc(#loc315) + xten_nn.output %461 : tensor<1x64x45x80xbf16> loc(#loc315) + } -> tensor<1x64x45x80xbf16> loc(#loc315) + %178 = xten_nn.subgraph (%arg5 = %177: tensor<1x64x45x80xbf16>, %arg6 = %149: tensor<24x64x1x1xbf16>, %arg7 = %148: tensor<24xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_34", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[24, 64, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_34", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x64x45x80xbf16>, %arg9 = %arg6: tensor<24x64x1x1xbf16>, %arg10 = %arg7: tensor<24xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_34", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[24, 64, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_34", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc26) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<24x64x1x1xbf16>) -> tensor<24x1x1x64xbf16> loc(#loc26) + %465 = tosa.transpose %arg8, %463 : (tensor<1x64x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x64xbf16> loc(#loc26) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_34", + PartOfOutputName = "Conv_34", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x64xbf16>, tensor<24x1x1x64xbf16>, tensor<24xbf16>) -> tensor<1x45x80x24xbf16> loc(#loc26) + %467 = tosa.transpose %466, %462 : (tensor<1x45x80x24xbf16>, tensor<4xi32>) -> tensor<1x24x45x80xbf16> loc(#loc26) + xten_nn.output %467 : tensor<1x24x45x80xbf16> loc(#loc26) + } -> tensor<1x24x45x80xbf16> loc(#loc26) + xten_nn.output %461 : tensor<1x24x45x80xbf16> loc(#loc26) + } -> tensor<1x24x45x80xbf16> loc(#loc26) + %179 = xten_nn.subgraph (%arg5 = %178: tensor<1x24x45x80xbf16>, %arg6 = %147: tensor<72x24x1x1xbf16>, %arg7 = %146: tensor<72xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_35", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_36", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x45x80xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_35", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_36", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc316) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc316) + %465 = tosa.transpose %arg8, %463 : (tensor<1x24x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x24xbf16> loc(#loc316) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_35", + PartOfOutputName = "Conv_35", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc27) + %467 = tosa.clamp %466 { + LayerName = "Relu_36", + OutputName = "Relu_36", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc28) + %468 = tosa.transpose %467, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc316) + xten_nn.output %468 : tensor<1x72x45x80xbf16> loc(#loc28) + } -> tensor<1x72x45x80xbf16> loc(#loc316) + xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc316) + } -> tensor<1x72x45x80xbf16> loc(#loc316) + %180 = xten_nn.subgraph (%arg5 = %179: tensor<1x72x45x80xbf16>, %arg6 = %145: tensor<72x1x3x3xbf16>, %arg7 = %144: tensor<72xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_37", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_38", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x45x80xbf16>, %arg9 = %arg6: tensor<72x1x3x3xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_37", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_38", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + Traits = { + NonNegativeOut = true + }, + With = { + config.act = 1 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc317) + %465 = tosa.transpose %arg9, %464 : (tensor<72x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x72x1xbf16> loc(#loc317) + %466 = tosa.transpose %arg8, %463 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc317) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_37", + PartOfOutputName = "Conv_37", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x72xbf16>, tensor<3x3x72x1xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc29) + %468 = tosa.clamp %467 { + LayerName = "Relu_38", + OutputName = "Relu_38", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc30) + %469 = tosa.transpose %468, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc317) + xten_nn.output %469 : tensor<1x72x45x80xbf16> loc(#loc30) + } -> tensor<1x72x45x80xbf16> loc(#loc317) + xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc317) + } -> tensor<1x72x45x80xbf16> loc(#loc317) + %181 = xten_nn.subgraph (%arg5 = %180: tensor<1x72x45x80xbf16>, %arg6 = %143: tensor<24x72x1x1xbf16>, %arg7 = %142: tensor<24xbf16>, %arg8 = %178: tensor<1x24x45x80xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_39", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Add_40", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x72x45x80xbf16>, %arg10 = %arg6: tensor<24x72x1x1xbf16>, %arg11 = %arg7: tensor<24xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_39", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_39", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc31) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<24x72x1x1xbf16>) -> tensor<24x1x1x72xbf16> loc(#loc31) + %466 = tosa.transpose %arg9, %464 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc31) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_39", + PartOfOutputName = "Conv_39", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x72xbf16>, tensor<24x1x1x72xbf16>, tensor<24xbf16>) -> tensor<1x45x80x24xbf16> loc(#loc31) + %468 = tosa.transpose %467, %463 : (tensor<1x45x80x24xbf16>, tensor<4xi32>) -> tensor<1x24x45x80xbf16> loc(#loc31) + xten_nn.output %468 : tensor<1x24x45x80xbf16> loc(#loc31) + } -> tensor<1x24x45x80xbf16> loc(#loc31) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x24x45x80xbf16>, %arg10 = %arg8: tensor<1x24x45x80xbf16>) attributes { + LayerName = "Add_40", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Add_40", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_40", OutputName = "Add_40"} : (tensor<1x24x45x80xbf16>, tensor<1x24x45x80xbf16>) -> tensor<1x24x45x80xbf16> loc(#loc32) + xten_nn.output %463 : tensor<1x24x45x80xbf16> loc(#loc32) + } -> tensor<1x24x45x80xbf16> loc(#loc32) + xten_nn.output %462 : tensor<1x24x45x80xbf16> loc(#loc32) + } -> tensor<1x24x45x80xbf16> loc(#loc318) + %182 = xten_nn.subgraph (%arg5 = %181: tensor<1x24x45x80xbf16>, %arg6 = %141: tensor<72x24x1x1xbf16>, %arg7 = %140: tensor<72xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_41", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_42", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x45x80xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_41", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_42", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc319) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc319) + %465 = tosa.transpose %arg8, %463 : (tensor<1x24x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x24xbf16> loc(#loc319) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_41", + PartOfOutputName = "Conv_41", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc33) + %467 = tosa.clamp %466 { + LayerName = "Relu_42", + OutputName = "Relu_42", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x45x80x72xbf16>) -> tensor<1x45x80x72xbf16> loc(#loc34) + %468 = tosa.transpose %467, %462 : (tensor<1x45x80x72xbf16>, tensor<4xi32>) -> tensor<1x72x45x80xbf16> loc(#loc319) + xten_nn.output %468 : tensor<1x72x45x80xbf16> loc(#loc34) + } -> tensor<1x72x45x80xbf16> loc(#loc319) + xten_nn.output %461 : tensor<1x72x45x80xbf16> loc(#loc319) + } -> tensor<1x72x45x80xbf16> loc(#loc319) + %183 = xten_nn.subgraph (%arg5 = %182: tensor<1x72x45x80xbf16>, %arg6 = %139: tensor<72x1x5x5xbf16>, %arg7 = %138: tensor<72xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_43", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 1, 5, 5]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_44", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x45x80xbf16>, %arg9 = %arg6: tensor<72x1x5x5xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { + Dilations = array, + HWPadding = [[2, 2], [2, 1]], + LayerName = "Conv_43", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 1, 5, 5]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_44", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + Traits = { + NonNegativeOut = true + }, + With = { + config.act = 1 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 5 : ui8, + config.kernel_width = 5 : ui8, + config.stride = 2 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc320) + %465 = tosa.transpose %arg9, %464 : (tensor<72x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x72x1xbf16> loc(#loc320) + %466 = tosa.transpose %arg8, %463 : (tensor<1x72x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x72xbf16> loc(#loc320) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_43", + PartOfOutputName = "Conv_43", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x72xbf16>, tensor<5x5x72x1xbf16>, tensor<72xbf16>) -> tensor<1x23x40x72xbf16> loc(#loc35) + %468 = tosa.clamp %467 { + LayerName = "Relu_44", + OutputName = "Relu_44", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x23x40x72xbf16>) -> tensor<1x23x40x72xbf16> loc(#loc36) + %469 = tosa.transpose %468, %462 : (tensor<1x23x40x72xbf16>, tensor<4xi32>) -> tensor<1x72x23x40xbf16> loc(#loc320) + xten_nn.output %469 : tensor<1x72x23x40xbf16> loc(#loc36) + } -> tensor<1x72x23x40xbf16> loc(#loc320) + xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc320) + } -> tensor<1x72x23x40xbf16> loc(#loc320) + %184 = xten_nn.subgraph (%arg5 = %183: tensor<1x72x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#6", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Generated-#7", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 23 : ui32, + config.dim_1 = 9 : ui32, + config.dim_2 = 40 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x72x23x40xbf16>) -> tensor<1x72x1x920xbf16> loc(#loc37) + xten_nn.output %461 : tensor<1x72x1x920xbf16> loc(#loc37) + } -> tensor<1x72x1x920xbf16> loc(#loc37) + %185 = xten_nn.subgraph (%arg5 = %184: tensor<1x72x1x920xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#8", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex> + } + ], + OutputName = "Generated-#9", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x920xbf16>) attributes { + LayerName = "Generated-#8", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 920]> : vector<4xindex> + } + ], + OutputName = "Generated-#9", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 72 : ui32, + config.full_height = 1 : ui32, + config.full_width = 920 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x72x1x920xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc37) + xten_nn.output %462 : tensor<1x72x1x1xbf16> loc(#loc37) + } -> tensor<1x72x1x1xbf16> loc(#loc37) + xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc37) + } -> tensor<1x72x1x1xbf16> loc(#loc37) + %186 = xten_nn.subgraph (%arg5 = %185: tensor<1x72x1x1xbf16>, %arg6 = %137: tensor<24x72x1x1xbf16>, %arg7 = %136: tensor<24xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_46", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_47", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x1x1xbf16>, %arg9 = %arg6: tensor<24x72x1x1xbf16>, %arg10 = %arg7: tensor<24xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_46", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[24, 72, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_47", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<24x72x1x1xbf16>) -> tensor<24x1x1x72xbf16> loc(#loc321) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x72x1x1xbf16>) -> tensor<1x1x1x72xbf16> loc(#loc321) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_46", + PartOfOutputName = "Conv_46", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x72xbf16>, tensor<24x1x1x72xbf16>, tensor<24xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc38) + %465 = tosa.clamp %464 { + LayerName = "Relu_47", + OutputName = "Relu_47", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x24xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc39) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x24xbf16>) -> tensor<1x24x1x1xbf16> loc(#loc321) + xten_nn.output %466 : tensor<1x24x1x1xbf16> loc(#loc39) + } -> tensor<1x24x1x1xbf16> loc(#loc321) + xten_nn.output %461 : tensor<1x24x1x1xbf16> loc(#loc321) + } -> tensor<1x24x1x1xbf16> loc(#loc321) + %187 = xten_nn.subgraph (%arg5 = %186: tensor<1x24x1x1xbf16>, %arg6 = %135: tensor<72x24x1x1xbf16>, %arg7 = %134: tensor<72xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_48", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_48", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x24x1x1xbf16>, %arg9 = %arg6: tensor<72x24x1x1xbf16>, %arg10 = %arg7: tensor<72xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_48", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[72, 24, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_48", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<72x24x1x1xbf16>) -> tensor<72x1x1x24xbf16> loc(#loc40) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x24x1x1xbf16>) -> tensor<1x1x1x24xbf16> loc(#loc40) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_48", + PartOfOutputName = "Conv_48", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x24xbf16>, tensor<72x1x1x24xbf16>, tensor<72xbf16>) -> tensor<1x1x1x72xbf16> loc(#loc40) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x72xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc40) + xten_nn.output %465 : tensor<1x72x1x1xbf16> loc(#loc40) + } -> tensor<1x72x1x1xbf16> loc(#loc40) + xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc40) + } -> tensor<1x72x1x1xbf16> loc(#loc40) + %188 = xten_nn.subgraph (%arg5 = %187: tensor<1x72x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_50", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_50", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>) attributes { + LayerName = "Add_50", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_50", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_50", OutputName = "Add_50"} : (tensor<1x72x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc41) + xten_nn.output %463 : tensor<1x72x1x1xbf16> loc(#loc41) + } -> tensor<1x72x1x1xbf16> loc(#loc41) + xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc41) + } -> tensor<1x72x1x1xbf16> loc(#loc41) + %189 = xten_nn.subgraph (%arg5 = %188: tensor<1x72x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_53", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_53", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>) attributes { + LayerName = "Clip_53", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_53", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_53", + OutputName = "Clip_53", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x72x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc42) + xten_nn.output %462 : tensor<1x72x1x1xbf16> loc(#loc42) + } -> tensor<1x72x1x1xbf16> loc(#loc42) + xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc42) + } -> tensor<1x72x1x1xbf16> loc(#loc42) + %190 = xten_nn.subgraph (%arg5 = %189: tensor<1x72x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_55", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_55", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x72x1x1xbf16>) attributes { + LayerName = "Div_55", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_55", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_55", + OutputName = "Div_55", + shift = 0 : i8} : (tensor<1x72x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x72x1x1xbf16> loc(#loc43) + xten_nn.output %463 : tensor<1x72x1x1xbf16> loc(#loc43) + } -> tensor<1x72x1x1xbf16> loc(#loc43) + xten_nn.output %461 : tensor<1x72x1x1xbf16> loc(#loc43) + } -> tensor<1x72x1x1xbf16> loc(#loc43) + %191 = xten_nn.subgraph (%arg5 = %190: tensor<1x72x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#10", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#11", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 72 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 23 : ui32, + config.rep_dim_w = 40 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x72x1x1xbf16>) -> tensor<1x72x23x40xbf16> loc(#loc44) + xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc44) + } -> tensor<1x72x23x40xbf16> loc(#loc44) + %192 = xten_nn.subgraph (%arg5 = %191: tensor<1x72x23x40xbf16>, %arg6 = %183: tensor<1x72x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_56", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_56", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x72x23x40xbf16>, %arg8 = %arg6: tensor<1x72x23x40xbf16>) attributes { + LayerName = "Mul_56", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_56", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_56", + OutputName = "Mul_56", + shift = 0 : i8} : (tensor<1x72x23x40xbf16>, tensor<1x72x23x40xbf16>) -> tensor<1x72x23x40xbf16> loc(#loc44) + xten_nn.output %462 : tensor<1x72x23x40xbf16> loc(#loc44) + } -> tensor<1x72x23x40xbf16> loc(#loc44) + xten_nn.output %461 : tensor<1x72x23x40xbf16> loc(#loc44) + } -> tensor<1x72x23x40xbf16> loc(#loc44) + %193 = xten_nn.subgraph (%arg5 = %192: tensor<1x72x23x40xbf16>, %arg6 = %133: tensor<40x72x1x1xbf16>, %arg7 = %132: tensor<40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_57", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 72, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_57", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x72x23x40xbf16>, %arg9 = %arg6: tensor<40x72x1x1xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_57", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 72, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 72, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_57", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc45) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<40x72x1x1xbf16>) -> tensor<40x1x1x72xbf16> loc(#loc45) + %465 = tosa.transpose %arg8, %463 : (tensor<1x72x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x72xbf16> loc(#loc45) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_57", + PartOfOutputName = "Conv_57", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x72xbf16>, tensor<40x1x1x72xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc45) + %467 = tosa.transpose %466, %462 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc45) + xten_nn.output %467 : tensor<1x40x23x40xbf16> loc(#loc45) + } -> tensor<1x40x23x40xbf16> loc(#loc45) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc45) + } -> tensor<1x40x23x40xbf16> loc(#loc45) + %194 = xten_nn.subgraph (%arg5 = %193: tensor<1x40x23x40xbf16>, %arg6 = %131: tensor<120x40x1x1xbf16>, %arg7 = %130: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_58", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_59", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<120x40x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_58", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_59", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc322) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x40x1x1xbf16>) -> tensor<120x1x1x40xbf16> loc(#loc322) + %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc322) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_58", + PartOfOutputName = "Conv_58", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x40xbf16>, tensor<120x1x1x40xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc46) + %467 = tosa.clamp %466 { + LayerName = "Relu_59", + OutputName = "Relu_59", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc47) + %468 = tosa.transpose %467, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc322) + xten_nn.output %468 : tensor<1x120x23x40xbf16> loc(#loc47) + } -> tensor<1x120x23x40xbf16> loc(#loc322) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc322) + } -> tensor<1x120x23x40xbf16> loc(#loc322) + %195 = xten_nn.subgraph (%arg5 = %194: tensor<1x120x23x40xbf16>, %arg6 = %129: tensor<120x1x5x5xbf16>, %arg7 = %128: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_60", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_61", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x23x40xbf16>, %arg9 = %arg6: tensor<120x1x5x5xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[2, 2], [2, 2]], + LayerName = "Conv_60", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_61", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + Traits = { + NonNegativeOut = true + }, + With = { + config.act = 1 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 5 : ui8, + config.kernel_width = 5 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc323) + %465 = tosa.transpose %arg9, %464 : (tensor<120x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x120x1xbf16> loc(#loc323) + %466 = tosa.transpose %arg8, %463 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc323) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_60", + PartOfOutputName = "Conv_60", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x120xbf16>, tensor<5x5x120x1xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc48) + %468 = tosa.clamp %467 { + LayerName = "Relu_61", + OutputName = "Relu_61", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc49) + %469 = tosa.transpose %468, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc323) + xten_nn.output %469 : tensor<1x120x23x40xbf16> loc(#loc49) + } -> tensor<1x120x23x40xbf16> loc(#loc323) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc323) + } -> tensor<1x120x23x40xbf16> loc(#loc323) + %196 = xten_nn.subgraph (%arg5 = %195: tensor<1x120x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#12", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Generated-#13", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 23 : ui32, + config.dim_1 = 15 : ui32, + config.dim_2 = 40 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x120x23x40xbf16>) -> tensor<1x120x1x920xbf16> loc(#loc50) + xten_nn.output %461 : tensor<1x120x1x920xbf16> loc(#loc50) + } -> tensor<1x120x1x920xbf16> loc(#loc50) + %197 = xten_nn.subgraph (%arg5 = %196: tensor<1x120x1x920xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#14", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> + } + ], + OutputName = "Generated-#15", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x920xbf16>) attributes { + LayerName = "Generated-#14", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> + } + ], + OutputName = "Generated-#15", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 120 : ui32, + config.full_height = 1 : ui32, + config.full_width = 920 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x120x1x920xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc50) + xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc50) + } -> tensor<1x120x1x1xbf16> loc(#loc50) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc50) + } -> tensor<1x120x1x1xbf16> loc(#loc50) + %198 = xten_nn.subgraph (%arg5 = %197: tensor<1x120x1x1xbf16>, %arg6 = %127: tensor<32x120x1x1xbf16>, %arg7 = %126: tensor<32xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_63", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_64", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<32x120x1x1xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_63", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_64", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<32x120x1x1xbf16>) -> tensor<32x1x1x120xbf16> loc(#loc324) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc324) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_63", + PartOfOutputName = "Conv_63", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x120xbf16>, tensor<32x1x1x120xbf16>, tensor<32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc51) + %465 = tosa.clamp %464 { + LayerName = "Relu_64", + OutputName = "Relu_64", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc52) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x32xbf16>) -> tensor<1x32x1x1xbf16> loc(#loc324) + xten_nn.output %466 : tensor<1x32x1x1xbf16> loc(#loc52) + } -> tensor<1x32x1x1xbf16> loc(#loc324) + xten_nn.output %461 : tensor<1x32x1x1xbf16> loc(#loc324) + } -> tensor<1x32x1x1xbf16> loc(#loc324) + %199 = xten_nn.subgraph (%arg5 = %198: tensor<1x32x1x1xbf16>, %arg6 = %125: tensor<120x32x1x1xbf16>, %arg7 = %124: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_65", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_65", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x1x1xbf16>, %arg9 = %arg6: tensor<120x32x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_65", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_65", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x32x1x1xbf16>) -> tensor<120x1x1x32xbf16> loc(#loc53) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x32x1x1xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc53) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_65", + PartOfOutputName = "Conv_65", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x32xbf16>, tensor<120x1x1x32xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc53) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc53) + xten_nn.output %465 : tensor<1x120x1x1xbf16> loc(#loc53) + } -> tensor<1x120x1x1xbf16> loc(#loc53) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc53) + } -> tensor<1x120x1x1xbf16> loc(#loc53) + %200 = xten_nn.subgraph (%arg5 = %199: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_67", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_67", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { + LayerName = "Add_67", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_67", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_67", OutputName = "Add_67"} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc54) + xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc54) + } -> tensor<1x120x1x1xbf16> loc(#loc54) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc54) + } -> tensor<1x120x1x1xbf16> loc(#loc54) + %201 = xten_nn.subgraph (%arg5 = %200: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_70", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_70", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { + LayerName = "Clip_70", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_70", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_70", + OutputName = "Clip_70", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc55) + xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc55) + } -> tensor<1x120x1x1xbf16> loc(#loc55) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc55) + } -> tensor<1x120x1x1xbf16> loc(#loc55) + %202 = xten_nn.subgraph (%arg5 = %201: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_72", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_72", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { + LayerName = "Div_72", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_72", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_72", + OutputName = "Div_72", + shift = 0 : i8} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc56) + xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc56) + } -> tensor<1x120x1x1xbf16> loc(#loc56) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc56) + } -> tensor<1x120x1x1xbf16> loc(#loc56) + %203 = xten_nn.subgraph (%arg5 = %202: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#16", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#17", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 120 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 23 : ui32, + config.rep_dim_w = 40 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc57) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc57) + } -> tensor<1x120x23x40xbf16> loc(#loc57) + %204 = xten_nn.subgraph (%arg5 = %203: tensor<1x120x23x40xbf16>, %arg6 = %195: tensor<1x120x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_73", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_73", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x120x23x40xbf16>, %arg8 = %arg6: tensor<1x120x23x40xbf16>) attributes { + LayerName = "Mul_73", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_73", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_73", + OutputName = "Mul_73", + shift = 0 : i8} : (tensor<1x120x23x40xbf16>, tensor<1x120x23x40xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc57) + xten_nn.output %462 : tensor<1x120x23x40xbf16> loc(#loc57) + } -> tensor<1x120x23x40xbf16> loc(#loc57) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc57) + } -> tensor<1x120x23x40xbf16> loc(#loc57) + %205 = xten_nn.subgraph (%arg5 = %204: tensor<1x120x23x40xbf16>, %arg6 = %123: tensor<40x120x1x1xbf16>, %arg7 = %122: tensor<40xbf16>, %arg8 = %193: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_74", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_75", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x120x23x40xbf16>, %arg10 = %arg6: tensor<40x120x1x1xbf16>, %arg11 = %arg7: tensor<40xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_74", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_74", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc58) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<40x120x1x1xbf16>) -> tensor<40x1x1x120xbf16> loc(#loc58) + %466 = tosa.transpose %arg9, %464 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc58) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_74", + PartOfOutputName = "Conv_74", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x120xbf16>, tensor<40x1x1x120xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc58) + %468 = tosa.transpose %467, %463 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc58) + xten_nn.output %468 : tensor<1x40x23x40xbf16> loc(#loc58) + } -> tensor<1x40x23x40xbf16> loc(#loc58) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x40x23x40xbf16>, %arg10 = %arg8: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Add_75", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_75", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_75", OutputName = "Add_75"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc59) + xten_nn.output %463 : tensor<1x40x23x40xbf16> loc(#loc59) + } -> tensor<1x40x23x40xbf16> loc(#loc59) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc59) + } -> tensor<1x40x23x40xbf16> loc(#loc325) + %206 = xten_nn.subgraph (%arg5 = %205: tensor<1x40x23x40xbf16>, %arg6 = %121: tensor<120x40x1x1xbf16>, %arg7 = %120: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_76", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_77", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<120x40x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_76", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 40, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_77", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc326) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x40x1x1xbf16>) -> tensor<120x1x1x40xbf16> loc(#loc326) + %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc326) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_76", + PartOfOutputName = "Conv_76", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x40xbf16>, tensor<120x1x1x40xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc60) + %467 = tosa.clamp %466 { + LayerName = "Relu_77", + OutputName = "Relu_77", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc61) + %468 = tosa.transpose %467, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc326) + xten_nn.output %468 : tensor<1x120x23x40xbf16> loc(#loc61) + } -> tensor<1x120x23x40xbf16> loc(#loc326) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc326) + } -> tensor<1x120x23x40xbf16> loc(#loc326) + %207 = xten_nn.subgraph (%arg5 = %206: tensor<1x120x23x40xbf16>, %arg6 = %119: tensor<120x1x5x5xbf16>, %arg7 = %118: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_78", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_79", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x23x40xbf16>, %arg9 = %arg6: tensor<120x1x5x5xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[2, 2], [2, 2]], + LayerName = "Conv_78", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 1, 5, 5]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_79", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + Traits = { + NonNegativeOut = true + }, + With = { + config.act = 1 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 5 : ui8, + config.kernel_width = 5 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc327) + %465 = tosa.transpose %arg9, %464 : (tensor<120x1x5x5xbf16>, tensor<4xi32>) -> tensor<5x5x120x1xbf16> loc(#loc327) + %466 = tosa.transpose %arg8, %463 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc327) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_78", + PartOfOutputName = "Conv_78", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x120xbf16>, tensor<5x5x120x1xbf16>, tensor<120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc62) + %468 = tosa.clamp %467 { + LayerName = "Relu_79", + OutputName = "Relu_79", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x23x40x120xbf16>) -> tensor<1x23x40x120xbf16> loc(#loc63) + %469 = tosa.transpose %468, %462 : (tensor<1x23x40x120xbf16>, tensor<4xi32>) -> tensor<1x120x23x40xbf16> loc(#loc327) + xten_nn.output %469 : tensor<1x120x23x40xbf16> loc(#loc63) + } -> tensor<1x120x23x40xbf16> loc(#loc327) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc327) + } -> tensor<1x120x23x40xbf16> loc(#loc327) + %208 = xten_nn.subgraph (%arg5 = %207: tensor<1x120x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#18", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Generated-#19", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 23 : ui32, + config.dim_1 = 15 : ui32, + config.dim_2 = 40 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x120x23x40xbf16>) -> tensor<1x120x1x920xbf16> loc(#loc64) + xten_nn.output %461 : tensor<1x120x1x920xbf16> loc(#loc64) + } -> tensor<1x120x1x920xbf16> loc(#loc64) + %209 = xten_nn.subgraph (%arg5 = %208: tensor<1x120x1x920xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#20", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> + } + ], + OutputName = "Generated-#21", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x920xbf16>) attributes { + LayerName = "Generated-#20", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 920]> : vector<4xindex> + } + ], + OutputName = "Generated-#21", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 120 : ui32, + config.full_height = 1 : ui32, + config.full_width = 920 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x120x1x920xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc64) + xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc64) + } -> tensor<1x120x1x1xbf16> loc(#loc64) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc64) + } -> tensor<1x120x1x1xbf16> loc(#loc64) + %210 = xten_nn.subgraph (%arg5 = %209: tensor<1x120x1x1xbf16>, %arg6 = %117: tensor<32x120x1x1xbf16>, %arg7 = %116: tensor<32xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_81", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_82", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<32x120x1x1xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_81", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[32, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_82", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<32x120x1x1xbf16>) -> tensor<32x1x1x120xbf16> loc(#loc328) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc328) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_81", + PartOfOutputName = "Conv_81", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x120xbf16>, tensor<32x1x1x120xbf16>, tensor<32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc65) + %465 = tosa.clamp %464 { + LayerName = "Relu_82", + OutputName = "Relu_82", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x32xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc66) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x32xbf16>) -> tensor<1x32x1x1xbf16> loc(#loc328) + xten_nn.output %466 : tensor<1x32x1x1xbf16> loc(#loc66) + } -> tensor<1x32x1x1xbf16> loc(#loc328) + xten_nn.output %461 : tensor<1x32x1x1xbf16> loc(#loc328) + } -> tensor<1x32x1x1xbf16> loc(#loc328) + %211 = xten_nn.subgraph (%arg5 = %210: tensor<1x32x1x1xbf16>, %arg6 = %115: tensor<120x32x1x1xbf16>, %arg7 = %114: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_83", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_83", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x1x1xbf16>, %arg9 = %arg6: tensor<120x32x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_83", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 32, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_83", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x32x1x1xbf16>) -> tensor<120x1x1x32xbf16> loc(#loc67) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x32x1x1xbf16>) -> tensor<1x1x1x32xbf16> loc(#loc67) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_83", + PartOfOutputName = "Conv_83", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x32xbf16>, tensor<120x1x1x32xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc67) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc67) + xten_nn.output %465 : tensor<1x120x1x1xbf16> loc(#loc67) + } -> tensor<1x120x1x1xbf16> loc(#loc67) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc67) + } -> tensor<1x120x1x1xbf16> loc(#loc67) + %212 = xten_nn.subgraph (%arg5 = %211: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_85", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_85", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { + LayerName = "Add_85", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_85", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_85", OutputName = "Add_85"} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc68) + xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc68) + } -> tensor<1x120x1x1xbf16> loc(#loc68) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc68) + } -> tensor<1x120x1x1xbf16> loc(#loc68) + %213 = xten_nn.subgraph (%arg5 = %212: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_88", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_88", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { + LayerName = "Clip_88", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_88", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_88", + OutputName = "Clip_88", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc69) + xten_nn.output %462 : tensor<1x120x1x1xbf16> loc(#loc69) + } -> tensor<1x120x1x1xbf16> loc(#loc69) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc69) + } -> tensor<1x120x1x1xbf16> loc(#loc69) + %214 = xten_nn.subgraph (%arg5 = %213: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_90", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_90", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x120x1x1xbf16>) attributes { + LayerName = "Div_90", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_90", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_90", + OutputName = "Div_90", + shift = 0 : i8} : (tensor<1x120x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc70) + xten_nn.output %463 : tensor<1x120x1x1xbf16> loc(#loc70) + } -> tensor<1x120x1x1xbf16> loc(#loc70) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc70) + } -> tensor<1x120x1x1xbf16> loc(#loc70) + %215 = xten_nn.subgraph (%arg5 = %214: tensor<1x120x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#22", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#23", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 120 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 23 : ui32, + config.rep_dim_w = 40 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc71) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc71) + } -> tensor<1x120x23x40xbf16> loc(#loc71) + %216 = xten_nn.subgraph (%arg5 = %215: tensor<1x120x23x40xbf16>, %arg6 = %207: tensor<1x120x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_91", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_91", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x120x23x40xbf16>, %arg8 = %arg6: tensor<1x120x23x40xbf16>) attributes { + LayerName = "Mul_91", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_91", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_91", + OutputName = "Mul_91", + shift = 0 : i8} : (tensor<1x120x23x40xbf16>, tensor<1x120x23x40xbf16>) -> tensor<1x120x23x40xbf16> loc(#loc71) + xten_nn.output %462 : tensor<1x120x23x40xbf16> loc(#loc71) + } -> tensor<1x120x23x40xbf16> loc(#loc71) + xten_nn.output %461 : tensor<1x120x23x40xbf16> loc(#loc71) + } -> tensor<1x120x23x40xbf16> loc(#loc71) + %217 = xten_nn.subgraph (%arg5 = %216: tensor<1x120x23x40xbf16>, %arg6 = %113: tensor<40x120x1x1xbf16>, %arg7 = %112: tensor<40xbf16>, %arg8 = %205: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_92", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_93", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x120x23x40xbf16>, %arg10 = %arg6: tensor<40x120x1x1xbf16>, %arg11 = %arg7: tensor<40xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_92", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_92", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc72) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<40x120x1x1xbf16>) -> tensor<40x1x1x120xbf16> loc(#loc72) + %466 = tosa.transpose %arg9, %464 : (tensor<1x120x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x120xbf16> loc(#loc72) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_92", + PartOfOutputName = "Conv_92", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x120xbf16>, tensor<40x1x1x120xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc72) + %468 = tosa.transpose %467, %463 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc72) + xten_nn.output %468 : tensor<1x40x23x40xbf16> loc(#loc72) + } -> tensor<1x40x23x40xbf16> loc(#loc72) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x40x23x40xbf16>, %arg10 = %arg8: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Add_93", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_93", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_93", OutputName = "Add_93"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc73) + xten_nn.output %463 : tensor<1x40x23x40xbf16> loc(#loc73) + } -> tensor<1x40x23x40xbf16> loc(#loc73) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc73) + } -> tensor<1x40x23x40xbf16> loc(#loc329) + %218 = xten_nn.subgraph (%arg5 = %217: tensor<1x40x23x40xbf16>, %arg6 = %111: tensor<240x40x1x1xbf16>, %arg7 = %110: tensor<240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_94", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 40, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_94", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x23x40xbf16>, %arg9 = %arg6: tensor<240x40x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_94", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 40, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_94", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc74) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<240x40x1x1xbf16>) -> tensor<240x1x1x40xbf16> loc(#loc74) + %465 = tosa.transpose %arg8, %463 : (tensor<1x40x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x40xbf16> loc(#loc74) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_94", + PartOfOutputName = "Conv_94", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x40xbf16>, tensor<240x1x1x40xbf16>, tensor<240xbf16>) -> tensor<1x23x40x240xbf16> loc(#loc74) + %467 = tosa.transpose %466, %462 : (tensor<1x23x40x240xbf16>, tensor<4xi32>) -> tensor<1x240x23x40xbf16> loc(#loc74) + xten_nn.output %467 : tensor<1x240x23x40xbf16> loc(#loc74) + } -> tensor<1x240x23x40xbf16> loc(#loc74) + xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc74) + } -> tensor<1x240x23x40xbf16> loc(#loc74) + %219 = xten_nn.subgraph (%arg5 = %218: tensor<1x240x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_96", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_96", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>) attributes { + LayerName = "Add_96", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_96", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_96", OutputName = "Add_96"} : (tensor<1x240x23x40xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc75) + xten_nn.output %463 : tensor<1x240x23x40xbf16> loc(#loc75) + } -> tensor<1x240x23x40xbf16> loc(#loc75) + xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc75) + } -> tensor<1x240x23x40xbf16> loc(#loc75) + %220 = xten_nn.subgraph (%arg5 = %219: tensor<1x240x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_99", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Clip_99", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>) attributes { + LayerName = "Clip_99", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Clip_99", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_99", + OutputName = "Clip_99", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x240x23x40xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc76) + xten_nn.output %462 : tensor<1x240x23x40xbf16> loc(#loc76) + } -> tensor<1x240x23x40xbf16> loc(#loc76) + xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc76) + } -> tensor<1x240x23x40xbf16> loc(#loc76) + %221 = xten_nn.subgraph (%arg5 = %220: tensor<1x240x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_101", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Div_101", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x23x40xbf16>) attributes { + LayerName = "Div_101", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Div_101", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_101", + OutputName = "Div_101", + shift = 0 : i8} : (tensor<1x240x23x40xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc77) + xten_nn.output %463 : tensor<1x240x23x40xbf16> loc(#loc77) + } -> tensor<1x240x23x40xbf16> loc(#loc77) + xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc77) + } -> tensor<1x240x23x40xbf16> loc(#loc77) + %222 = xten_nn.subgraph (%arg5 = %218: tensor<1x240x23x40xbf16>, %arg6 = %221: tensor<1x240x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_102", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_102", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x240x23x40xbf16>, %arg8 = %arg6: tensor<1x240x23x40xbf16>) attributes { + LayerName = "Mul_102", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_102", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_102", + OutputName = "Mul_102", + shift = 0 : i8} : (tensor<1x240x23x40xbf16>, tensor<1x240x23x40xbf16>) -> tensor<1x240x23x40xbf16> loc(#loc78) + xten_nn.output %462 : tensor<1x240x23x40xbf16> loc(#loc78) + } -> tensor<1x240x23x40xbf16> loc(#loc78) + xten_nn.output %461 : tensor<1x240x23x40xbf16> loc(#loc78) + } -> tensor<1x240x23x40xbf16> loc(#loc78) + %223 = xten_nn.subgraph (%arg5 = %222: tensor<1x240x23x40xbf16>, %arg6 = %109: tensor<240x1x3x3xbf16>, %arg7 = %108: tensor<240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_103", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_103", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x23x40xbf16>, %arg9 = %arg6: tensor<240x1x3x3xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 0]], + LayerName = "Conv_103", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_103", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 2 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc79) + %465 = tosa.transpose %arg9, %464 : (tensor<240x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x240x1xbf16> loc(#loc79) + %466 = tosa.transpose %arg8, %463 : (tensor<1x240x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x240xbf16> loc(#loc79) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_103", + PartOfOutputName = "Conv_103", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x240xbf16>, tensor<3x3x240x1xbf16>, tensor<240xbf16>) -> tensor<1x12x20x240xbf16> loc(#loc79) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x240xbf16>, tensor<4xi32>) -> tensor<1x240x12x20xbf16> loc(#loc79) + xten_nn.output %468 : tensor<1x240x12x20xbf16> loc(#loc79) + } -> tensor<1x240x12x20xbf16> loc(#loc79) + xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc79) + } -> tensor<1x240x12x20xbf16> loc(#loc79) + %224 = xten_nn.subgraph (%arg5 = %223: tensor<1x240x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_105", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_105", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>) attributes { + LayerName = "Add_105", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_105", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_105", OutputName = "Add_105"} : (tensor<1x240x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc80) + xten_nn.output %463 : tensor<1x240x12x20xbf16> loc(#loc80) + } -> tensor<1x240x12x20xbf16> loc(#loc80) + xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc80) + } -> tensor<1x240x12x20xbf16> loc(#loc80) + %225 = xten_nn.subgraph (%arg5 = %224: tensor<1x240x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_108", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_108", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>) attributes { + LayerName = "Clip_108", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_108", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_108", + OutputName = "Clip_108", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x240x12x20xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc81) + xten_nn.output %462 : tensor<1x240x12x20xbf16> loc(#loc81) + } -> tensor<1x240x12x20xbf16> loc(#loc81) + xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc81) + } -> tensor<1x240x12x20xbf16> loc(#loc81) + %226 = xten_nn.subgraph (%arg5 = %225: tensor<1x240x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_110", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_110", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x240x12x20xbf16>) attributes { + LayerName = "Div_110", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_110", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_110", + OutputName = "Div_110", + shift = 0 : i8} : (tensor<1x240x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc82) + xten_nn.output %463 : tensor<1x240x12x20xbf16> loc(#loc82) + } -> tensor<1x240x12x20xbf16> loc(#loc82) + xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc82) + } -> tensor<1x240x12x20xbf16> loc(#loc82) + %227 = xten_nn.subgraph (%arg5 = %223: tensor<1x240x12x20xbf16>, %arg6 = %226: tensor<1x240x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_111", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_111", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x240x12x20xbf16>, %arg8 = %arg6: tensor<1x240x12x20xbf16>) attributes { + LayerName = "Mul_111", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_111", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_111", + OutputName = "Mul_111", + shift = 0 : i8} : (tensor<1x240x12x20xbf16>, tensor<1x240x12x20xbf16>) -> tensor<1x240x12x20xbf16> loc(#loc83) + xten_nn.output %462 : tensor<1x240x12x20xbf16> loc(#loc83) + } -> tensor<1x240x12x20xbf16> loc(#loc83) + xten_nn.output %461 : tensor<1x240x12x20xbf16> loc(#loc83) + } -> tensor<1x240x12x20xbf16> loc(#loc83) + %228 = xten_nn.subgraph (%arg5 = %227: tensor<1x240x12x20xbf16>, %arg6 = %107: tensor<80x240x1x1xbf16>, %arg7 = %106: tensor<80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_112", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 240, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_112", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x12x20xbf16>, %arg9 = %arg6: tensor<80x240x1x1xbf16>, %arg10 = %arg7: tensor<80xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_112", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 240, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_112", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc84) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<80x240x1x1xbf16>) -> tensor<80x1x1x240xbf16> loc(#loc84) + %465 = tosa.transpose %arg8, %463 : (tensor<1x240x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x240xbf16> loc(#loc84) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_112", + PartOfOutputName = "Conv_112", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x240xbf16>, tensor<80x1x1x240xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc84) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc84) + xten_nn.output %467 : tensor<1x80x12x20xbf16> loc(#loc84) + } -> tensor<1x80x12x20xbf16> loc(#loc84) + xten_nn.output %461 : tensor<1x80x12x20xbf16> loc(#loc84) + } -> tensor<1x80x12x20xbf16> loc(#loc84) + %229 = xten_nn.subgraph (%arg5 = %228: tensor<1x80x12x20xbf16>, %arg6 = %105: tensor<200x80x1x1xbf16>, %arg7 = %104: tensor<200xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_113", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[200, 80, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_113", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<200x80x1x1xbf16>, %arg10 = %arg7: tensor<200xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_113", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[200, 80, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_113", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc85) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<200x80x1x1xbf16>) -> tensor<200x1x1x80xbf16> loc(#loc85) + %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc85) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_113", + PartOfOutputName = "Conv_113", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x80xbf16>, tensor<200x1x1x80xbf16>, tensor<200xbf16>) -> tensor<1x12x20x200xbf16> loc(#loc85) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x200xbf16>, tensor<4xi32>) -> tensor<1x200x12x20xbf16> loc(#loc85) + xten_nn.output %467 : tensor<1x200x12x20xbf16> loc(#loc85) + } -> tensor<1x200x12x20xbf16> loc(#loc85) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc85) + } -> tensor<1x200x12x20xbf16> loc(#loc85) + %230 = xten_nn.subgraph (%arg5 = %229: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_115", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_115", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Add_115", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_115", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_115", OutputName = "Add_115"} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc86) + xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc86) + } -> tensor<1x200x12x20xbf16> loc(#loc86) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc86) + } -> tensor<1x200x12x20xbf16> loc(#loc86) + %231 = xten_nn.subgraph (%arg5 = %230: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_118", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_118", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Clip_118", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_118", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_118", + OutputName = "Clip_118", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc87) + xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc87) + } -> tensor<1x200x12x20xbf16> loc(#loc87) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc87) + } -> tensor<1x200x12x20xbf16> loc(#loc87) + %232 = xten_nn.subgraph (%arg5 = %231: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_120", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_120", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Div_120", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_120", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_120", + OutputName = "Div_120", + shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc88) + xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc88) + } -> tensor<1x200x12x20xbf16> loc(#loc88) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc88) + } -> tensor<1x200x12x20xbf16> loc(#loc88) + %233 = xten_nn.subgraph (%arg5 = %229: tensor<1x200x12x20xbf16>, %arg6 = %232: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_121", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_121", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x200x12x20xbf16>, %arg8 = %arg6: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Mul_121", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_121", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_121", + OutputName = "Mul_121", + shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc89) + xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc89) + } -> tensor<1x200x12x20xbf16> loc(#loc89) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc89) + } -> tensor<1x200x12x20xbf16> loc(#loc89) + %234 = xten_nn.subgraph (%arg5 = %233: tensor<1x200x12x20xbf16>, %arg6 = %103: tensor<200x1x3x3xbf16>, %arg7 = %102: tensor<200xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_122", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[200, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_122", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x200x12x20xbf16>, %arg9 = %arg6: tensor<200x1x3x3xbf16>, %arg10 = %arg7: tensor<200xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_122", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[200, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_122", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc90) + %465 = tosa.transpose %arg9, %464 : (tensor<200x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x200x1xbf16> loc(#loc90) + %466 = tosa.transpose %arg8, %463 : (tensor<1x200x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x200xbf16> loc(#loc90) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_122", + PartOfOutputName = "Conv_122", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x200xbf16>, tensor<3x3x200x1xbf16>, tensor<200xbf16>) -> tensor<1x12x20x200xbf16> loc(#loc90) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x200xbf16>, tensor<4xi32>) -> tensor<1x200x12x20xbf16> loc(#loc90) + xten_nn.output %468 : tensor<1x200x12x20xbf16> loc(#loc90) + } -> tensor<1x200x12x20xbf16> loc(#loc90) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc90) + } -> tensor<1x200x12x20xbf16> loc(#loc90) + %235 = xten_nn.subgraph (%arg5 = %234: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_124", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_124", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Add_124", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_124", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_124", OutputName = "Add_124"} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc91) + xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc91) + } -> tensor<1x200x12x20xbf16> loc(#loc91) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc91) + } -> tensor<1x200x12x20xbf16> loc(#loc91) + %236 = xten_nn.subgraph (%arg5 = %235: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_127", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_127", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Clip_127", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_127", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_127", + OutputName = "Clip_127", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc92) + xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc92) + } -> tensor<1x200x12x20xbf16> loc(#loc92) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc92) + } -> tensor<1x200x12x20xbf16> loc(#loc92) + %237 = xten_nn.subgraph (%arg5 = %236: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_129", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_129", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Div_129", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_129", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_129", + OutputName = "Div_129", + shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc93) + xten_nn.output %463 : tensor<1x200x12x20xbf16> loc(#loc93) + } -> tensor<1x200x12x20xbf16> loc(#loc93) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc93) + } -> tensor<1x200x12x20xbf16> loc(#loc93) + %238 = xten_nn.subgraph (%arg5 = %234: tensor<1x200x12x20xbf16>, %arg6 = %237: tensor<1x200x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_130", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_130", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x200x12x20xbf16>, %arg8 = %arg6: tensor<1x200x12x20xbf16>) attributes { + LayerName = "Mul_130", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_130", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_130", + OutputName = "Mul_130", + shift = 0 : i8} : (tensor<1x200x12x20xbf16>, tensor<1x200x12x20xbf16>) -> tensor<1x200x12x20xbf16> loc(#loc94) + xten_nn.output %462 : tensor<1x200x12x20xbf16> loc(#loc94) + } -> tensor<1x200x12x20xbf16> loc(#loc94) + xten_nn.output %461 : tensor<1x200x12x20xbf16> loc(#loc94) + } -> tensor<1x200x12x20xbf16> loc(#loc94) + %239 = xten_nn.subgraph (%arg5 = %238: tensor<1x200x12x20xbf16>, %arg6 = %101: tensor<80x200x1x1xbf16>, %arg7 = %100: tensor<80xbf16>, %arg8 = %228: tensor<1x80x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_131", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 200, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_132", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x200x12x20xbf16>, %arg10 = %arg6: tensor<80x200x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_131", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 200, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 200, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_131", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc95) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<80x200x1x1xbf16>) -> tensor<80x1x1x200xbf16> loc(#loc95) + %466 = tosa.transpose %arg9, %464 : (tensor<1x200x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x200xbf16> loc(#loc95) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_131", + PartOfOutputName = "Conv_131", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x200xbf16>, tensor<80x1x1x200xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc95) + %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc95) + xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc95) + } -> tensor<1x80x12x20xbf16> loc(#loc95) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>) attributes { + LayerName = "Add_132", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_132", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_132", OutputName = "Add_132"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc96) + xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc96) + } -> tensor<1x80x12x20xbf16> loc(#loc96) + xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc96) + } -> tensor<1x80x12x20xbf16> loc(#loc330) + %240 = xten_nn.subgraph (%arg5 = %239: tensor<1x80x12x20xbf16>, %arg6 = %99: tensor<184x80x1x1xbf16>, %arg7 = %98: tensor<184xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_133", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_133", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<184x80x1x1xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_133", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_133", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc97) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<184x80x1x1xbf16>) -> tensor<184x1x1x80xbf16> loc(#loc97) + %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc97) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_133", + PartOfOutputName = "Conv_133", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x80xbf16>, tensor<184x1x1x80xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc97) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc97) + xten_nn.output %467 : tensor<1x184x12x20xbf16> loc(#loc97) + } -> tensor<1x184x12x20xbf16> loc(#loc97) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc97) + } -> tensor<1x184x12x20xbf16> loc(#loc97) + %241 = xten_nn.subgraph (%arg5 = %240: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_135", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_135", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Add_135", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_135", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_135", OutputName = "Add_135"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc98) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc98) + } -> tensor<1x184x12x20xbf16> loc(#loc98) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc98) + } -> tensor<1x184x12x20xbf16> loc(#loc98) + %242 = xten_nn.subgraph (%arg5 = %241: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_138", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_138", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Clip_138", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_138", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_138", + OutputName = "Clip_138", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc99) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc99) + } -> tensor<1x184x12x20xbf16> loc(#loc99) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc99) + } -> tensor<1x184x12x20xbf16> loc(#loc99) + %243 = xten_nn.subgraph (%arg5 = %242: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_140", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_140", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Div_140", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_140", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_140", + OutputName = "Div_140", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc100) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc100) + } -> tensor<1x184x12x20xbf16> loc(#loc100) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc100) + } -> tensor<1x184x12x20xbf16> loc(#loc100) + %244 = xten_nn.subgraph (%arg5 = %240: tensor<1x184x12x20xbf16>, %arg6 = %243: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_141", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_141", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Mul_141", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_141", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_141", + OutputName = "Mul_141", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc101) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc101) + } -> tensor<1x184x12x20xbf16> loc(#loc101) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc101) + } -> tensor<1x184x12x20xbf16> loc(#loc101) + %245 = xten_nn.subgraph (%arg5 = %244: tensor<1x184x12x20xbf16>, %arg6 = %97: tensor<184x1x3x3xbf16>, %arg7 = %96: tensor<184xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_142", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_142", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x184x12x20xbf16>, %arg9 = %arg6: tensor<184x1x3x3xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_142", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_142", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc102) + %465 = tosa.transpose %arg9, %464 : (tensor<184x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x184x1xbf16> loc(#loc102) + %466 = tosa.transpose %arg8, %463 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc102) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_142", + PartOfOutputName = "Conv_142", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x184xbf16>, tensor<3x3x184x1xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc102) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc102) + xten_nn.output %468 : tensor<1x184x12x20xbf16> loc(#loc102) + } -> tensor<1x184x12x20xbf16> loc(#loc102) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc102) + } -> tensor<1x184x12x20xbf16> loc(#loc102) + %246 = xten_nn.subgraph (%arg5 = %245: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_144", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_144", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Add_144", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_144", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_144", OutputName = "Add_144"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc103) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc103) + } -> tensor<1x184x12x20xbf16> loc(#loc103) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc103) + } -> tensor<1x184x12x20xbf16> loc(#loc103) + %247 = xten_nn.subgraph (%arg5 = %246: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_147", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_147", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Clip_147", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_147", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_147", + OutputName = "Clip_147", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc104) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc104) + } -> tensor<1x184x12x20xbf16> loc(#loc104) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc104) + } -> tensor<1x184x12x20xbf16> loc(#loc104) + %248 = xten_nn.subgraph (%arg5 = %247: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_149", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_149", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Div_149", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_149", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_149", + OutputName = "Div_149", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc105) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc105) + } -> tensor<1x184x12x20xbf16> loc(#loc105) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc105) + } -> tensor<1x184x12x20xbf16> loc(#loc105) + %249 = xten_nn.subgraph (%arg5 = %245: tensor<1x184x12x20xbf16>, %arg6 = %248: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_150", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_150", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Mul_150", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_150", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_150", + OutputName = "Mul_150", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc106) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc106) + } -> tensor<1x184x12x20xbf16> loc(#loc106) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc106) + } -> tensor<1x184x12x20xbf16> loc(#loc106) + %250 = xten_nn.subgraph (%arg5 = %249: tensor<1x184x12x20xbf16>, %arg6 = %95: tensor<80x184x1x1xbf16>, %arg7 = %94: tensor<80xbf16>, %arg8 = %239: tensor<1x80x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_151", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_152", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x184x12x20xbf16>, %arg10 = %arg6: tensor<80x184x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_151", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_151", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc107) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<80x184x1x1xbf16>) -> tensor<80x1x1x184xbf16> loc(#loc107) + %466 = tosa.transpose %arg9, %464 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc107) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_151", + PartOfOutputName = "Conv_151", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x184xbf16>, tensor<80x1x1x184xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc107) + %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc107) + xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc107) + } -> tensor<1x80x12x20xbf16> loc(#loc107) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>) attributes { + LayerName = "Add_152", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_152", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_152", OutputName = "Add_152"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc108) + xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc108) + } -> tensor<1x80x12x20xbf16> loc(#loc108) + xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc108) + } -> tensor<1x80x12x20xbf16> loc(#loc331) + %251 = xten_nn.subgraph (%arg5 = %250: tensor<1x80x12x20xbf16>, %arg6 = %93: tensor<184x80x1x1xbf16>, %arg7 = %92: tensor<184xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_153", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_153", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<184x80x1x1xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_153", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 80, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_153", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc109) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<184x80x1x1xbf16>) -> tensor<184x1x1x80xbf16> loc(#loc109) + %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc109) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_153", + PartOfOutputName = "Conv_153", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x80xbf16>, tensor<184x1x1x80xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc109) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc109) + xten_nn.output %467 : tensor<1x184x12x20xbf16> loc(#loc109) + } -> tensor<1x184x12x20xbf16> loc(#loc109) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc109) + } -> tensor<1x184x12x20xbf16> loc(#loc109) + %252 = xten_nn.subgraph (%arg5 = %251: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_155", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_155", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Add_155", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_155", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_155", OutputName = "Add_155"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc110) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc110) + } -> tensor<1x184x12x20xbf16> loc(#loc110) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc110) + } -> tensor<1x184x12x20xbf16> loc(#loc110) + %253 = xten_nn.subgraph (%arg5 = %252: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_158", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_158", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Clip_158", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_158", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_158", + OutputName = "Clip_158", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc111) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc111) + } -> tensor<1x184x12x20xbf16> loc(#loc111) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc111) + } -> tensor<1x184x12x20xbf16> loc(#loc111) + %254 = xten_nn.subgraph (%arg5 = %253: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_160", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_160", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Div_160", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_160", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_160", + OutputName = "Div_160", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc112) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc112) + } -> tensor<1x184x12x20xbf16> loc(#loc112) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc112) + } -> tensor<1x184x12x20xbf16> loc(#loc112) + %255 = xten_nn.subgraph (%arg5 = %251: tensor<1x184x12x20xbf16>, %arg6 = %254: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_161", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_161", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Mul_161", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_161", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_161", + OutputName = "Mul_161", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc113) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc113) + } -> tensor<1x184x12x20xbf16> loc(#loc113) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc113) + } -> tensor<1x184x12x20xbf16> loc(#loc113) + %256 = xten_nn.subgraph (%arg5 = %255: tensor<1x184x12x20xbf16>, %arg6 = %91: tensor<184x1x3x3xbf16>, %arg7 = %90: tensor<184xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_162", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_162", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x184x12x20xbf16>, %arg9 = %arg6: tensor<184x1x3x3xbf16>, %arg10 = %arg7: tensor<184xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_162", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[184, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_162", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc114) + %465 = tosa.transpose %arg9, %464 : (tensor<184x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x184x1xbf16> loc(#loc114) + %466 = tosa.transpose %arg8, %463 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc114) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_162", + PartOfOutputName = "Conv_162", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x184xbf16>, tensor<3x3x184x1xbf16>, tensor<184xbf16>) -> tensor<1x12x20x184xbf16> loc(#loc114) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x184xbf16>, tensor<4xi32>) -> tensor<1x184x12x20xbf16> loc(#loc114) + xten_nn.output %468 : tensor<1x184x12x20xbf16> loc(#loc114) + } -> tensor<1x184x12x20xbf16> loc(#loc114) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc114) + } -> tensor<1x184x12x20xbf16> loc(#loc114) + %257 = xten_nn.subgraph (%arg5 = %256: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_164", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_164", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Add_164", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_164", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_164", OutputName = "Add_164"} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc115) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc115) + } -> tensor<1x184x12x20xbf16> loc(#loc115) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc115) + } -> tensor<1x184x12x20xbf16> loc(#loc115) + %258 = xten_nn.subgraph (%arg5 = %257: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_167", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_167", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Clip_167", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_167", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_167", + OutputName = "Clip_167", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc116) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc116) + } -> tensor<1x184x12x20xbf16> loc(#loc116) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc116) + } -> tensor<1x184x12x20xbf16> loc(#loc116) + %259 = xten_nn.subgraph (%arg5 = %258: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_169", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_169", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Div_169", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_169", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_169", + OutputName = "Div_169", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc117) + xten_nn.output %463 : tensor<1x184x12x20xbf16> loc(#loc117) + } -> tensor<1x184x12x20xbf16> loc(#loc117) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc117) + } -> tensor<1x184x12x20xbf16> loc(#loc117) + %260 = xten_nn.subgraph (%arg5 = %256: tensor<1x184x12x20xbf16>, %arg6 = %259: tensor<1x184x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_170", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_170", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x184x12x20xbf16>, %arg8 = %arg6: tensor<1x184x12x20xbf16>) attributes { + LayerName = "Mul_170", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_170", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_170", + OutputName = "Mul_170", + shift = 0 : i8} : (tensor<1x184x12x20xbf16>, tensor<1x184x12x20xbf16>) -> tensor<1x184x12x20xbf16> loc(#loc118) + xten_nn.output %462 : tensor<1x184x12x20xbf16> loc(#loc118) + } -> tensor<1x184x12x20xbf16> loc(#loc118) + xten_nn.output %461 : tensor<1x184x12x20xbf16> loc(#loc118) + } -> tensor<1x184x12x20xbf16> loc(#loc118) + %261 = xten_nn.subgraph (%arg5 = %260: tensor<1x184x12x20xbf16>, %arg6 = %89: tensor<80x184x1x1xbf16>, %arg7 = %88: tensor<80xbf16>, %arg8 = %250: tensor<1x80x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_171", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_172", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x184x12x20xbf16>, %arg10 = %arg6: tensor<80x184x1x1xbf16>, %arg11 = %arg7: tensor<80xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_171", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 184, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 184, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_171", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc119) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<80x184x1x1xbf16>) -> tensor<80x1x1x184xbf16> loc(#loc119) + %466 = tosa.transpose %arg9, %464 : (tensor<1x184x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x184xbf16> loc(#loc119) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_171", + PartOfOutputName = "Conv_171", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x184xbf16>, tensor<80x1x1x184xbf16>, tensor<80xbf16>) -> tensor<1x12x20x80xbf16> loc(#loc119) + %468 = tosa.transpose %467, %463 : (tensor<1x12x20x80xbf16>, tensor<4xi32>) -> tensor<1x80x12x20xbf16> loc(#loc119) + xten_nn.output %468 : tensor<1x80x12x20xbf16> loc(#loc119) + } -> tensor<1x80x12x20xbf16> loc(#loc119) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x80x12x20xbf16>, %arg10 = %arg8: tensor<1x80x12x20xbf16>) attributes { + LayerName = "Add_172", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_172", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_172", OutputName = "Add_172"} : (tensor<1x80x12x20xbf16>, tensor<1x80x12x20xbf16>) -> tensor<1x80x12x20xbf16> loc(#loc120) + xten_nn.output %463 : tensor<1x80x12x20xbf16> loc(#loc120) + } -> tensor<1x80x12x20xbf16> loc(#loc120) + xten_nn.output %462 : tensor<1x80x12x20xbf16> loc(#loc120) + } -> tensor<1x80x12x20xbf16> loc(#loc332) + %262 = xten_nn.subgraph (%arg5 = %261: tensor<1x80x12x20xbf16>, %arg6 = %87: tensor<480x80x1x1xbf16>, %arg7 = %86: tensor<480xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_173", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[480, 80, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_173", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x12x20xbf16>, %arg9 = %arg6: tensor<480x80x1x1xbf16>, %arg10 = %arg7: tensor<480xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_173", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[480, 80, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_173", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc121) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<480x80x1x1xbf16>) -> tensor<480x1x1x80xbf16> loc(#loc121) + %465 = tosa.transpose %arg8, %463 : (tensor<1x80x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x80xbf16> loc(#loc121) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_173", + PartOfOutputName = "Conv_173", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x80xbf16>, tensor<480x1x1x80xbf16>, tensor<480xbf16>) -> tensor<1x12x20x480xbf16> loc(#loc121) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x480xbf16>, tensor<4xi32>) -> tensor<1x480x12x20xbf16> loc(#loc121) + xten_nn.output %467 : tensor<1x480x12x20xbf16> loc(#loc121) + } -> tensor<1x480x12x20xbf16> loc(#loc121) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc121) + } -> tensor<1x480x12x20xbf16> loc(#loc121) + %263 = xten_nn.subgraph (%arg5 = %262: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_175", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_175", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Add_175", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_175", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_175", OutputName = "Add_175"} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc122) + xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc122) + } -> tensor<1x480x12x20xbf16> loc(#loc122) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc122) + } -> tensor<1x480x12x20xbf16> loc(#loc122) + %264 = xten_nn.subgraph (%arg5 = %263: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_178", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_178", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Clip_178", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_178", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_178", + OutputName = "Clip_178", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc123) + xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc123) + } -> tensor<1x480x12x20xbf16> loc(#loc123) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc123) + } -> tensor<1x480x12x20xbf16> loc(#loc123) + %265 = xten_nn.subgraph (%arg5 = %264: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_180", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_180", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Div_180", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_180", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_180", + OutputName = "Div_180", + shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc124) + xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc124) + } -> tensor<1x480x12x20xbf16> loc(#loc124) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc124) + } -> tensor<1x480x12x20xbf16> loc(#loc124) + %266 = xten_nn.subgraph (%arg5 = %262: tensor<1x480x12x20xbf16>, %arg6 = %265: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_181", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_181", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Mul_181", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_181", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_181", + OutputName = "Mul_181", + shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc125) + xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc125) + } -> tensor<1x480x12x20xbf16> loc(#loc125) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc125) + } -> tensor<1x480x12x20xbf16> loc(#loc125) + %267 = xten_nn.subgraph (%arg5 = %266: tensor<1x480x12x20xbf16>, %arg6 = %85: tensor<480x1x3x3xbf16>, %arg7 = %84: tensor<480xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_182", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[480, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_182", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x12x20xbf16>, %arg9 = %arg6: tensor<480x1x3x3xbf16>, %arg10 = %arg7: tensor<480xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_182", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[480, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_182", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc126) + %465 = tosa.transpose %arg9, %464 : (tensor<480x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x480x1xbf16> loc(#loc126) + %466 = tosa.transpose %arg8, %463 : (tensor<1x480x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x480xbf16> loc(#loc126) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_182", + PartOfOutputName = "Conv_182", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x480xbf16>, tensor<3x3x480x1xbf16>, tensor<480xbf16>) -> tensor<1x12x20x480xbf16> loc(#loc126) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x480xbf16>, tensor<4xi32>) -> tensor<1x480x12x20xbf16> loc(#loc126) + xten_nn.output %468 : tensor<1x480x12x20xbf16> loc(#loc126) + } -> tensor<1x480x12x20xbf16> loc(#loc126) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc126) + } -> tensor<1x480x12x20xbf16> loc(#loc126) + %268 = xten_nn.subgraph (%arg5 = %267: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_184", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_184", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Add_184", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_184", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_184", OutputName = "Add_184"} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc127) + xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc127) + } -> tensor<1x480x12x20xbf16> loc(#loc127) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc127) + } -> tensor<1x480x12x20xbf16> loc(#loc127) + %269 = xten_nn.subgraph (%arg5 = %268: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_187", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_187", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Clip_187", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_187", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_187", + OutputName = "Clip_187", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc128) + xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc128) + } -> tensor<1x480x12x20xbf16> loc(#loc128) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc128) + } -> tensor<1x480x12x20xbf16> loc(#loc128) + %270 = xten_nn.subgraph (%arg5 = %269: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_189", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_189", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Div_189", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_189", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_189", + OutputName = "Div_189", + shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc129) + xten_nn.output %463 : tensor<1x480x12x20xbf16> loc(#loc129) + } -> tensor<1x480x12x20xbf16> loc(#loc129) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc129) + } -> tensor<1x480x12x20xbf16> loc(#loc129) + %271 = xten_nn.subgraph (%arg5 = %267: tensor<1x480x12x20xbf16>, %arg6 = %270: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_190", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_190", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Mul_190", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_190", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_190", + OutputName = "Mul_190", + shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc130) + xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc130) + } -> tensor<1x480x12x20xbf16> loc(#loc130) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc130) + } -> tensor<1x480x12x20xbf16> loc(#loc130) + %272 = xten_nn.subgraph (%arg5 = %271: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#24", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Generated-#25", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 12 : ui32, + config.dim_1 = 60 : ui32, + config.dim_2 = 20 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x480x12x20xbf16>) -> tensor<1x480x1x240xbf16> loc(#loc333) + xten_nn.output %461 : tensor<1x480x1x240xbf16> loc(#loc333) + } -> tensor<1x480x1x240xbf16> loc(#loc333) + %273 = xten_nn.subgraph (%arg5 = %272: tensor<1x480x1x240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#26", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#27", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x240xbf16>) attributes { + LayerName = "Generated-#26", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#27", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 480 : ui32, + config.full_height = 1 : ui32, + config.full_width = 240 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x480x1x240xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc131) + xten_nn.output %462 : tensor<1x480x1x1xbf16> loc(#loc131) + } -> tensor<1x480x1x1xbf16> loc(#loc131) + xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc131) + } -> tensor<1x480x1x1xbf16> loc(#loc131) + %274 = xten_nn.subgraph (%arg5 = %273: tensor<1x480x1x1xbf16>, %arg6 = %83: tensor<120x480x1x1xbf16>, %arg7 = %82: tensor<120xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_192", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 480, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_193", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x1x1xbf16>, %arg9 = %arg6: tensor<120x480x1x1xbf16>, %arg10 = %arg7: tensor<120xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_192", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[120, 480, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_193", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<120x480x1x1xbf16>) -> tensor<120x1x1x480xbf16> loc(#loc334) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x480x1x1xbf16>) -> tensor<1x1x1x480xbf16> loc(#loc334) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_192", + PartOfOutputName = "Conv_192", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x480xbf16>, tensor<120x1x1x480xbf16>, tensor<120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc132) + %465 = tosa.clamp %464 { + LayerName = "Relu_193", + OutputName = "Relu_193", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x120xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc133) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x120xbf16>) -> tensor<1x120x1x1xbf16> loc(#loc334) + xten_nn.output %466 : tensor<1x120x1x1xbf16> loc(#loc133) + } -> tensor<1x120x1x1xbf16> loc(#loc334) + xten_nn.output %461 : tensor<1x120x1x1xbf16> loc(#loc334) + } -> tensor<1x120x1x1xbf16> loc(#loc334) + %275 = xten_nn.subgraph (%arg5 = %274: tensor<1x120x1x1xbf16>, %arg6 = %81: tensor<480x120x1x1xbf16>, %arg7 = %80: tensor<480xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_194", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[480, 120, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_194", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x120x1x1xbf16>, %arg9 = %arg6: tensor<480x120x1x1xbf16>, %arg10 = %arg7: tensor<480xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_194", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[480, 120, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_194", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<480x120x1x1xbf16>) -> tensor<480x1x1x120xbf16> loc(#loc134) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x120x1x1xbf16>) -> tensor<1x1x1x120xbf16> loc(#loc134) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_194", + PartOfOutputName = "Conv_194", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x120xbf16>, tensor<480x1x1x120xbf16>, tensor<480xbf16>) -> tensor<1x1x1x480xbf16> loc(#loc134) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x480xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc134) + xten_nn.output %465 : tensor<1x480x1x1xbf16> loc(#loc134) + } -> tensor<1x480x1x1xbf16> loc(#loc134) + xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc134) + } -> tensor<1x480x1x1xbf16> loc(#loc134) + %276 = xten_nn.subgraph (%arg5 = %275: tensor<1x480x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_196", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_196", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>) attributes { + LayerName = "Add_196", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_196", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_196", OutputName = "Add_196"} : (tensor<1x480x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc135) + xten_nn.output %463 : tensor<1x480x1x1xbf16> loc(#loc135) + } -> tensor<1x480x1x1xbf16> loc(#loc135) + xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc135) + } -> tensor<1x480x1x1xbf16> loc(#loc135) + %277 = xten_nn.subgraph (%arg5 = %276: tensor<1x480x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_199", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_199", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>) attributes { + LayerName = "Clip_199", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_199", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_199", + OutputName = "Clip_199", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x480x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc136) + xten_nn.output %462 : tensor<1x480x1x1xbf16> loc(#loc136) + } -> tensor<1x480x1x1xbf16> loc(#loc136) + xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc136) + } -> tensor<1x480x1x1xbf16> loc(#loc136) + %278 = xten_nn.subgraph (%arg5 = %277: tensor<1x480x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_201", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_201", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x480x1x1xbf16>) attributes { + LayerName = "Div_201", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_201", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_201", + OutputName = "Div_201", + shift = 0 : i8} : (tensor<1x480x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x480x1x1xbf16> loc(#loc137) + xten_nn.output %463 : tensor<1x480x1x1xbf16> loc(#loc137) + } -> tensor<1x480x1x1xbf16> loc(#loc137) + xten_nn.output %461 : tensor<1x480x1x1xbf16> loc(#loc137) + } -> tensor<1x480x1x1xbf16> loc(#loc137) + %279 = xten_nn.subgraph (%arg5 = %278: tensor<1x480x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#28", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#29", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 480 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 12 : ui32, + config.rep_dim_w = 20 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x480x1x1xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc138) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc138) + } -> tensor<1x480x12x20xbf16> loc(#loc138) + %280 = xten_nn.subgraph (%arg5 = %279: tensor<1x480x12x20xbf16>, %arg6 = %271: tensor<1x480x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_202", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_202", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x480x12x20xbf16>, %arg8 = %arg6: tensor<1x480x12x20xbf16>) attributes { + LayerName = "Mul_202", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_202", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_202", + OutputName = "Mul_202", + shift = 0 : i8} : (tensor<1x480x12x20xbf16>, tensor<1x480x12x20xbf16>) -> tensor<1x480x12x20xbf16> loc(#loc138) + xten_nn.output %462 : tensor<1x480x12x20xbf16> loc(#loc138) + } -> tensor<1x480x12x20xbf16> loc(#loc138) + xten_nn.output %461 : tensor<1x480x12x20xbf16> loc(#loc138) + } -> tensor<1x480x12x20xbf16> loc(#loc138) + %281 = xten_nn.subgraph (%arg5 = %280: tensor<1x480x12x20xbf16>, %arg6 = %79: tensor<112x480x1x1xbf16>, %arg7 = %78: tensor<112xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_203", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[112, 480, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_203", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x480x12x20xbf16>, %arg9 = %arg6: tensor<112x480x1x1xbf16>, %arg10 = %arg7: tensor<112xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_203", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 480, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[112, 480, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_203", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc139) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<112x480x1x1xbf16>) -> tensor<112x1x1x480xbf16> loc(#loc139) + %465 = tosa.transpose %arg8, %463 : (tensor<1x480x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x480xbf16> loc(#loc139) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_203", + PartOfOutputName = "Conv_203", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x480xbf16>, tensor<112x1x1x480xbf16>, tensor<112xbf16>) -> tensor<1x12x20x112xbf16> loc(#loc139) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x112xbf16>, tensor<4xi32>) -> tensor<1x112x12x20xbf16> loc(#loc139) + xten_nn.output %467 : tensor<1x112x12x20xbf16> loc(#loc139) + } -> tensor<1x112x12x20xbf16> loc(#loc139) + xten_nn.output %461 : tensor<1x112x12x20xbf16> loc(#loc139) + } -> tensor<1x112x12x20xbf16> loc(#loc139) + %282 = xten_nn.subgraph (%arg5 = %281: tensor<1x112x12x20xbf16>, %arg6 = %77: tensor<672x112x1x1xbf16>, %arg7 = %76: tensor<672xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_204", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_204", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x112x12x20xbf16>, %arg9 = %arg6: tensor<672x112x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_204", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_204", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc140) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x112x1x1xbf16>) -> tensor<672x1x1x112xbf16> loc(#loc140) + %465 = tosa.transpose %arg8, %463 : (tensor<1x112x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x112xbf16> loc(#loc140) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_204", + PartOfOutputName = "Conv_204", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x112xbf16>, tensor<672x1x1x112xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc140) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc140) + xten_nn.output %467 : tensor<1x672x12x20xbf16> loc(#loc140) + } -> tensor<1x672x12x20xbf16> loc(#loc140) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc140) + } -> tensor<1x672x12x20xbf16> loc(#loc140) + %283 = xten_nn.subgraph (%arg5 = %282: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_206", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_206", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Add_206", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_206", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_206", OutputName = "Add_206"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc141) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc141) + } -> tensor<1x672x12x20xbf16> loc(#loc141) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc141) + } -> tensor<1x672x12x20xbf16> loc(#loc141) + %284 = xten_nn.subgraph (%arg5 = %283: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_209", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_209", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Clip_209", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_209", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_209", + OutputName = "Clip_209", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc142) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc142) + } -> tensor<1x672x12x20xbf16> loc(#loc142) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc142) + } -> tensor<1x672x12x20xbf16> loc(#loc142) + %285 = xten_nn.subgraph (%arg5 = %284: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_211", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_211", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Div_211", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_211", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_211", + OutputName = "Div_211", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc143) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc143) + } -> tensor<1x672x12x20xbf16> loc(#loc143) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc143) + } -> tensor<1x672x12x20xbf16> loc(#loc143) + %286 = xten_nn.subgraph (%arg5 = %282: tensor<1x672x12x20xbf16>, %arg6 = %285: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_212", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_212", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Mul_212", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_212", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_212", + OutputName = "Mul_212", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc144) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc144) + } -> tensor<1x672x12x20xbf16> loc(#loc144) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc144) + } -> tensor<1x672x12x20xbf16> loc(#loc144) + %287 = xten_nn.subgraph (%arg5 = %286: tensor<1x672x12x20xbf16>, %arg6 = %75: tensor<672x1x3x3xbf16>, %arg7 = %74: tensor<672xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_213", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 1, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_213", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<672x1x3x3xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_213", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 1, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_213", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 3 : ui8, + config.kernel_width = 3 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc145) + %465 = tosa.transpose %arg9, %464 : (tensor<672x1x3x3xbf16>, tensor<4xi32>) -> tensor<3x3x672x1xbf16> loc(#loc145) + %466 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc145) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_213", + PartOfOutputName = "Conv_213", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x672xbf16>, tensor<3x3x672x1xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc145) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc145) + xten_nn.output %468 : tensor<1x672x12x20xbf16> loc(#loc145) + } -> tensor<1x672x12x20xbf16> loc(#loc145) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc145) + } -> tensor<1x672x12x20xbf16> loc(#loc145) + %288 = xten_nn.subgraph (%arg5 = %287: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_215", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_215", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Add_215", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_215", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_215", OutputName = "Add_215"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc146) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc146) + } -> tensor<1x672x12x20xbf16> loc(#loc146) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc146) + } -> tensor<1x672x12x20xbf16> loc(#loc146) + %289 = xten_nn.subgraph (%arg5 = %288: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_218", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_218", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Clip_218", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_218", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_218", + OutputName = "Clip_218", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc147) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc147) + } -> tensor<1x672x12x20xbf16> loc(#loc147) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc147) + } -> tensor<1x672x12x20xbf16> loc(#loc147) + %290 = xten_nn.subgraph (%arg5 = %289: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_220", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_220", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Div_220", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_220", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_220", + OutputName = "Div_220", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc148) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc148) + } -> tensor<1x672x12x20xbf16> loc(#loc148) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc148) + } -> tensor<1x672x12x20xbf16> loc(#loc148) + %291 = xten_nn.subgraph (%arg5 = %287: tensor<1x672x12x20xbf16>, %arg6 = %290: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_221", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_221", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Mul_221", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_221", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_221", + OutputName = "Mul_221", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc149) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc149) + } -> tensor<1x672x12x20xbf16> loc(#loc149) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc149) + } -> tensor<1x672x12x20xbf16> loc(#loc149) + %292 = xten_nn.subgraph (%arg5 = %291: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#30", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Generated-#31", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 12 : ui32, + config.dim_1 = 84 : ui32, + config.dim_2 = 20 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x1x240xbf16> loc(#loc335) + xten_nn.output %461 : tensor<1x672x1x240xbf16> loc(#loc335) + } -> tensor<1x672x1x240xbf16> loc(#loc335) + %293 = xten_nn.subgraph (%arg5 = %292: tensor<1x672x1x240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#32", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#33", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x240xbf16>) attributes { + LayerName = "Generated-#32", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#33", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 672 : ui32, + config.full_height = 1 : ui32, + config.full_width = 240 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x672x1x240xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc150) + xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc150) + } -> tensor<1x672x1x1xbf16> loc(#loc150) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc150) + } -> tensor<1x672x1x1xbf16> loc(#loc150) + %294 = xten_nn.subgraph (%arg5 = %293: tensor<1x672x1x1xbf16>, %arg6 = %73: tensor<168x672x1x1xbf16>, %arg7 = %72: tensor<168xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_223", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_224", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x1x1xbf16>, %arg9 = %arg6: tensor<168x672x1x1xbf16>, %arg10 = %arg7: tensor<168xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_223", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_224", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<168x672x1x1xbf16>) -> tensor<168x1x1x672xbf16> loc(#loc336) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc336) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_223", + PartOfOutputName = "Conv_223", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x672xbf16>, tensor<168x1x1x672xbf16>, tensor<168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc151) + %465 = tosa.clamp %464 { + LayerName = "Relu_224", + OutputName = "Relu_224", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc152) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x168xbf16>) -> tensor<1x168x1x1xbf16> loc(#loc336) + xten_nn.output %466 : tensor<1x168x1x1xbf16> loc(#loc152) + } -> tensor<1x168x1x1xbf16> loc(#loc336) + xten_nn.output %461 : tensor<1x168x1x1xbf16> loc(#loc336) + } -> tensor<1x168x1x1xbf16> loc(#loc336) + %295 = xten_nn.subgraph (%arg5 = %294: tensor<1x168x1x1xbf16>, %arg6 = %71: tensor<672x168x1x1xbf16>, %arg7 = %70: tensor<672xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_225", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_225", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x168x1x1xbf16>, %arg9 = %arg6: tensor<672x168x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_225", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_225", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x168x1x1xbf16>) -> tensor<672x1x1x168xbf16> loc(#loc153) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x168x1x1xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc153) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_225", + PartOfOutputName = "Conv_225", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x168xbf16>, tensor<672x1x1x168xbf16>, tensor<672xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc153) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x672xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc153) + xten_nn.output %465 : tensor<1x672x1x1xbf16> loc(#loc153) + } -> tensor<1x672x1x1xbf16> loc(#loc153) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc153) + } -> tensor<1x672x1x1xbf16> loc(#loc153) + %296 = xten_nn.subgraph (%arg5 = %295: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_227", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_227", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { + LayerName = "Add_227", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_227", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_227", OutputName = "Add_227"} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc154) + xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc154) + } -> tensor<1x672x1x1xbf16> loc(#loc154) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc154) + } -> tensor<1x672x1x1xbf16> loc(#loc154) + %297 = xten_nn.subgraph (%arg5 = %296: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_230", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_230", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { + LayerName = "Clip_230", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_230", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_230", + OutputName = "Clip_230", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc155) + xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc155) + } -> tensor<1x672x1x1xbf16> loc(#loc155) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc155) + } -> tensor<1x672x1x1xbf16> loc(#loc155) + %298 = xten_nn.subgraph (%arg5 = %297: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_232", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_232", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { + LayerName = "Div_232", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_232", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_232", + OutputName = "Div_232", + shift = 0 : i8} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc156) + xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc156) + } -> tensor<1x672x1x1xbf16> loc(#loc156) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc156) + } -> tensor<1x672x1x1xbf16> loc(#loc156) + %299 = xten_nn.subgraph (%arg5 = %298: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#34", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#35", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 672 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 12 : ui32, + config.rep_dim_w = 20 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc157) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc157) + } -> tensor<1x672x12x20xbf16> loc(#loc157) + %300 = xten_nn.subgraph (%arg5 = %299: tensor<1x672x12x20xbf16>, %arg6 = %291: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_233", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_233", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Mul_233", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_233", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_233", + OutputName = "Mul_233", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc157) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc157) + } -> tensor<1x672x12x20xbf16> loc(#loc157) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc157) + } -> tensor<1x672x12x20xbf16> loc(#loc157) + %301 = xten_nn.subgraph (%arg5 = %300: tensor<1x672x12x20xbf16>, %arg6 = %69: tensor<112x672x1x1xbf16>, %arg7 = %68: tensor<112xbf16>, %arg8 = %281: tensor<1x112x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_234", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[112, 672, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_235", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x672x12x20xbf16>, %arg10 = %arg6: tensor<112x672x1x1xbf16>, %arg11 = %arg7: tensor<112xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_234", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[112, 672, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_234", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc158) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<112x672x1x1xbf16>) -> tensor<112x1x1x672xbf16> loc(#loc158) + %466 = tosa.transpose %arg9, %464 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc158) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_234", + PartOfOutputName = "Conv_234", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x672xbf16>, tensor<112x1x1x672xbf16>, tensor<112xbf16>) -> tensor<1x12x20x112xbf16> loc(#loc158) + %468 = tosa.transpose %467, %463 : (tensor<1x12x20x112xbf16>, tensor<4xi32>) -> tensor<1x112x12x20xbf16> loc(#loc158) + xten_nn.output %468 : tensor<1x112x12x20xbf16> loc(#loc158) + } -> tensor<1x112x12x20xbf16> loc(#loc158) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x112x12x20xbf16>, %arg10 = %arg8: tensor<1x112x12x20xbf16>) attributes { + LayerName = "Add_235", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_235", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_235", OutputName = "Add_235"} : (tensor<1x112x12x20xbf16>, tensor<1x112x12x20xbf16>) -> tensor<1x112x12x20xbf16> loc(#loc159) + xten_nn.output %463 : tensor<1x112x12x20xbf16> loc(#loc159) + } -> tensor<1x112x12x20xbf16> loc(#loc159) + xten_nn.output %462 : tensor<1x112x12x20xbf16> loc(#loc159) + } -> tensor<1x112x12x20xbf16> loc(#loc337) + %302 = xten_nn.subgraph (%arg5 = %301: tensor<1x112x12x20xbf16>, %arg6 = %67: tensor<672x112x1x1xbf16>, %arg7 = %66: tensor<672xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_236", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_236", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x112x12x20xbf16>, %arg9 = %arg6: tensor<672x112x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_236", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 112, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 112, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_236", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc160) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x112x1x1xbf16>) -> tensor<672x1x1x112xbf16> loc(#loc160) + %465 = tosa.transpose %arg8, %463 : (tensor<1x112x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x112xbf16> loc(#loc160) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_236", + PartOfOutputName = "Conv_236", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x112xbf16>, tensor<672x1x1x112xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc160) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc160) + xten_nn.output %467 : tensor<1x672x12x20xbf16> loc(#loc160) + } -> tensor<1x672x12x20xbf16> loc(#loc160) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc160) + } -> tensor<1x672x12x20xbf16> loc(#loc160) + %303 = xten_nn.subgraph (%arg5 = %302: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_238", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_238", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Add_238", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_238", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_238", OutputName = "Add_238"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc161) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc161) + } -> tensor<1x672x12x20xbf16> loc(#loc161) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc161) + } -> tensor<1x672x12x20xbf16> loc(#loc161) + %304 = xten_nn.subgraph (%arg5 = %303: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_241", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_241", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Clip_241", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_241", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_241", + OutputName = "Clip_241", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc162) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc162) + } -> tensor<1x672x12x20xbf16> loc(#loc162) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc162) + } -> tensor<1x672x12x20xbf16> loc(#loc162) + %305 = xten_nn.subgraph (%arg5 = %304: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_243", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_243", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Div_243", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_243", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_243", + OutputName = "Div_243", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc163) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc163) + } -> tensor<1x672x12x20xbf16> loc(#loc163) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc163) + } -> tensor<1x672x12x20xbf16> loc(#loc163) + %306 = xten_nn.subgraph (%arg5 = %302: tensor<1x672x12x20xbf16>, %arg6 = %305: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_244", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_244", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Mul_244", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_244", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_244", + OutputName = "Mul_244", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc164) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc164) + } -> tensor<1x672x12x20xbf16> loc(#loc164) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc164) + } -> tensor<1x672x12x20xbf16> loc(#loc164) + %307 = xten_nn.subgraph (%arg5 = %306: tensor<1x672x12x20xbf16>, %arg6 = %65: tensor<672x1x9x9xbf16>, %arg7 = %64: tensor<672xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_245", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 1, 9, 9]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_245", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<672x1x9x9xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { + Dilations = array, + HWPadding = [[4, 4], [4, 4]], + LayerName = "Conv_245", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 1, 9, 9]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_245", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 9 : ui8, + config.kernel_width = 9 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc165) + %465 = tosa.transpose %arg9, %464 : (tensor<672x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x672x1xbf16> loc(#loc165) + %466 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc165) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_245", + PartOfOutputName = "Conv_245", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x672xbf16>, tensor<9x9x672x1xbf16>, tensor<672xbf16>) -> tensor<1x12x20x672xbf16> loc(#loc165) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x672xbf16>, tensor<4xi32>) -> tensor<1x672x12x20xbf16> loc(#loc165) + xten_nn.output %468 : tensor<1x672x12x20xbf16> loc(#loc165) + } -> tensor<1x672x12x20xbf16> loc(#loc165) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc165) + } -> tensor<1x672x12x20xbf16> loc(#loc165) + %308 = xten_nn.subgraph (%arg5 = %307: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_247", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_247", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Add_247", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_247", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_247", OutputName = "Add_247"} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc166) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc166) + } -> tensor<1x672x12x20xbf16> loc(#loc166) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc166) + } -> tensor<1x672x12x20xbf16> loc(#loc166) + %309 = xten_nn.subgraph (%arg5 = %308: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_250", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_250", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Clip_250", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_250", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_250", + OutputName = "Clip_250", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc167) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc167) + } -> tensor<1x672x12x20xbf16> loc(#loc167) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc167) + } -> tensor<1x672x12x20xbf16> loc(#loc167) + %310 = xten_nn.subgraph (%arg5 = %309: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_252", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_252", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Div_252", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_252", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_252", + OutputName = "Div_252", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc168) + xten_nn.output %463 : tensor<1x672x12x20xbf16> loc(#loc168) + } -> tensor<1x672x12x20xbf16> loc(#loc168) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc168) + } -> tensor<1x672x12x20xbf16> loc(#loc168) + %311 = xten_nn.subgraph (%arg5 = %307: tensor<1x672x12x20xbf16>, %arg6 = %310: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_253", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_253", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Mul_253", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_253", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_253", + OutputName = "Mul_253", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc169) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc169) + } -> tensor<1x672x12x20xbf16> loc(#loc169) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc169) + } -> tensor<1x672x12x20xbf16> loc(#loc169) + %312 = xten_nn.subgraph (%arg5 = %311: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#36", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Generated-#37", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 12 : ui32, + config.dim_1 = 84 : ui32, + config.dim_2 = 20 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x672x12x20xbf16>) -> tensor<1x672x1x240xbf16> loc(#loc338) + xten_nn.output %461 : tensor<1x672x1x240xbf16> loc(#loc338) + } -> tensor<1x672x1x240xbf16> loc(#loc338) + %313 = xten_nn.subgraph (%arg5 = %312: tensor<1x672x1x240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#38", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#39", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x240xbf16>) attributes { + LayerName = "Generated-#38", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#39", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 672 : ui32, + config.full_height = 1 : ui32, + config.full_width = 240 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x672x1x240xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc170) + xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc170) + } -> tensor<1x672x1x1xbf16> loc(#loc170) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc170) + } -> tensor<1x672x1x1xbf16> loc(#loc170) + %314 = xten_nn.subgraph (%arg5 = %313: tensor<1x672x1x1xbf16>, %arg6 = %63: tensor<168x672x1x1xbf16>, %arg7 = %62: tensor<168xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_255", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_256", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x1x1xbf16>, %arg9 = %arg6: tensor<168x672x1x1xbf16>, %arg10 = %arg7: tensor<168xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_255", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[168, 672, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_256", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<168x672x1x1xbf16>) -> tensor<168x1x1x672xbf16> loc(#loc339) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc339) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_255", + PartOfOutputName = "Conv_255", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x672xbf16>, tensor<168x1x1x672xbf16>, tensor<168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc171) + %465 = tosa.clamp %464 { + LayerName = "Relu_256", + OutputName = "Relu_256", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x168xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc172) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x168xbf16>) -> tensor<1x168x1x1xbf16> loc(#loc339) + xten_nn.output %466 : tensor<1x168x1x1xbf16> loc(#loc172) + } -> tensor<1x168x1x1xbf16> loc(#loc339) + xten_nn.output %461 : tensor<1x168x1x1xbf16> loc(#loc339) + } -> tensor<1x168x1x1xbf16> loc(#loc339) + %315 = xten_nn.subgraph (%arg5 = %314: tensor<1x168x1x1xbf16>, %arg6 = %61: tensor<672x168x1x1xbf16>, %arg7 = %60: tensor<672xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_257", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_257", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x168x1x1xbf16>, %arg9 = %arg6: tensor<672x168x1x1xbf16>, %arg10 = %arg7: tensor<672xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_257", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 168, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[672, 168, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_257", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<672x168x1x1xbf16>) -> tensor<672x1x1x168xbf16> loc(#loc173) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x168x1x1xbf16>) -> tensor<1x1x1x168xbf16> loc(#loc173) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_257", + PartOfOutputName = "Conv_257", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x168xbf16>, tensor<672x1x1x168xbf16>, tensor<672xbf16>) -> tensor<1x1x1x672xbf16> loc(#loc173) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x672xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc173) + xten_nn.output %465 : tensor<1x672x1x1xbf16> loc(#loc173) + } -> tensor<1x672x1x1xbf16> loc(#loc173) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc173) + } -> tensor<1x672x1x1xbf16> loc(#loc173) + %316 = xten_nn.subgraph (%arg5 = %315: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_259", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_259", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { + LayerName = "Add_259", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_259", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_259", OutputName = "Add_259"} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc174) + xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc174) + } -> tensor<1x672x1x1xbf16> loc(#loc174) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc174) + } -> tensor<1x672x1x1xbf16> loc(#loc174) + %317 = xten_nn.subgraph (%arg5 = %316: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_262", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_262", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { + LayerName = "Clip_262", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_262", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_262", + OutputName = "Clip_262", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc175) + xten_nn.output %462 : tensor<1x672x1x1xbf16> loc(#loc175) + } -> tensor<1x672x1x1xbf16> loc(#loc175) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc175) + } -> tensor<1x672x1x1xbf16> loc(#loc175) + %318 = xten_nn.subgraph (%arg5 = %317: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_264", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_264", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x672x1x1xbf16>) attributes { + LayerName = "Div_264", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_264", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_264", + OutputName = "Div_264", + shift = 0 : i8} : (tensor<1x672x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x672x1x1xbf16> loc(#loc176) + xten_nn.output %463 : tensor<1x672x1x1xbf16> loc(#loc176) + } -> tensor<1x672x1x1xbf16> loc(#loc176) + xten_nn.output %461 : tensor<1x672x1x1xbf16> loc(#loc176) + } -> tensor<1x672x1x1xbf16> loc(#loc176) + %319 = xten_nn.subgraph (%arg5 = %318: tensor<1x672x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#40", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#41", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 672 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 12 : ui32, + config.rep_dim_w = 20 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x672x1x1xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc177) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc177) + } -> tensor<1x672x12x20xbf16> loc(#loc177) + %320 = xten_nn.subgraph (%arg5 = %319: tensor<1x672x12x20xbf16>, %arg6 = %311: tensor<1x672x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_265", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_265", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x672x12x20xbf16>, %arg8 = %arg6: tensor<1x672x12x20xbf16>) attributes { + LayerName = "Mul_265", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_265", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_265", + OutputName = "Mul_265", + shift = 0 : i8} : (tensor<1x672x12x20xbf16>, tensor<1x672x12x20xbf16>) -> tensor<1x672x12x20xbf16> loc(#loc177) + xten_nn.output %462 : tensor<1x672x12x20xbf16> loc(#loc177) + } -> tensor<1x672x12x20xbf16> loc(#loc177) + xten_nn.output %461 : tensor<1x672x12x20xbf16> loc(#loc177) + } -> tensor<1x672x12x20xbf16> loc(#loc177) + %321 = xten_nn.subgraph (%arg5 = %320: tensor<1x672x12x20xbf16>, %arg6 = %59: tensor<160x672x1x1xbf16>, %arg7 = %58: tensor<160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_266", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[160, 672, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_266", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x672x12x20xbf16>, %arg9 = %arg6: tensor<160x672x1x1xbf16>, %arg10 = %arg7: tensor<160xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_266", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 672, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[160, 672, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_266", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc178) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<160x672x1x1xbf16>) -> tensor<160x1x1x672xbf16> loc(#loc178) + %465 = tosa.transpose %arg8, %463 : (tensor<1x672x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x672xbf16> loc(#loc178) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_266", + PartOfOutputName = "Conv_266", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x672xbf16>, tensor<160x1x1x672xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc178) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc178) + xten_nn.output %467 : tensor<1x160x12x20xbf16> loc(#loc178) + } -> tensor<1x160x12x20xbf16> loc(#loc178) + xten_nn.output %461 : tensor<1x160x12x20xbf16> loc(#loc178) + } -> tensor<1x160x12x20xbf16> loc(#loc178) + %322 = xten_nn.subgraph (%arg5 = %321: tensor<1x160x12x20xbf16>, %arg6 = %57: tensor<960x160x1x1xbf16>, %arg7 = %56: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_267", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_267", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_267", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_267", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc179) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc179) + %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc179) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_267", + PartOfOutputName = "Conv_267", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc179) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc179) + xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc179) + } -> tensor<1x960x12x20xbf16> loc(#loc179) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc179) + } -> tensor<1x960x12x20xbf16> loc(#loc179) + %323 = xten_nn.subgraph (%arg5 = %322: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_269", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_269", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Add_269", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_269", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_269", OutputName = "Add_269"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc180) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc180) + } -> tensor<1x960x12x20xbf16> loc(#loc180) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc180) + } -> tensor<1x960x12x20xbf16> loc(#loc180) + %324 = xten_nn.subgraph (%arg5 = %323: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_272", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_272", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Clip_272", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_272", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_272", + OutputName = "Clip_272", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc181) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc181) + } -> tensor<1x960x12x20xbf16> loc(#loc181) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc181) + } -> tensor<1x960x12x20xbf16> loc(#loc181) + %325 = xten_nn.subgraph (%arg5 = %324: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_274", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_274", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Div_274", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_274", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_274", + OutputName = "Div_274", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc182) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc182) + } -> tensor<1x960x12x20xbf16> loc(#loc182) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc182) + } -> tensor<1x960x12x20xbf16> loc(#loc182) + %326 = xten_nn.subgraph (%arg5 = %322: tensor<1x960x12x20xbf16>, %arg6 = %325: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_275", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_275", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_275", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_275", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_275", + OutputName = "Mul_275", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc183) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc183) + } -> tensor<1x960x12x20xbf16> loc(#loc183) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc183) + } -> tensor<1x960x12x20xbf16> loc(#loc183) + %327 = xten_nn.subgraph (%arg5 = %326: tensor<1x960x12x20xbf16>, %arg6 = %55: tensor<960x1x9x9xbf16>, %arg7 = %54: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_276", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_276", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x12x20xbf16>, %arg9 = %arg6: tensor<960x1x9x9xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[4, 4], [4, 4]], + LayerName = "Conv_276", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_276", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 9 : ui8, + config.kernel_width = 9 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc184) + %465 = tosa.transpose %arg9, %464 : (tensor<960x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x960x1xbf16> loc(#loc184) + %466 = tosa.transpose %arg8, %463 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc184) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_276", + PartOfOutputName = "Conv_276", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x960xbf16>, tensor<9x9x960x1xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc184) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc184) + xten_nn.output %468 : tensor<1x960x12x20xbf16> loc(#loc184) + } -> tensor<1x960x12x20xbf16> loc(#loc184) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc184) + } -> tensor<1x960x12x20xbf16> loc(#loc184) + %328 = xten_nn.subgraph (%arg5 = %327: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_278", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_278", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Add_278", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_278", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_278", OutputName = "Add_278"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc185) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc185) + } -> tensor<1x960x12x20xbf16> loc(#loc185) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc185) + } -> tensor<1x960x12x20xbf16> loc(#loc185) + %329 = xten_nn.subgraph (%arg5 = %328: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_281", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_281", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Clip_281", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_281", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_281", + OutputName = "Clip_281", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc186) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc186) + } -> tensor<1x960x12x20xbf16> loc(#loc186) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc186) + } -> tensor<1x960x12x20xbf16> loc(#loc186) + %330 = xten_nn.subgraph (%arg5 = %329: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_283", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_283", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Div_283", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_283", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_283", + OutputName = "Div_283", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc187) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc187) + } -> tensor<1x960x12x20xbf16> loc(#loc187) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc187) + } -> tensor<1x960x12x20xbf16> loc(#loc187) + %331 = xten_nn.subgraph (%arg5 = %327: tensor<1x960x12x20xbf16>, %arg6 = %330: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_284", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_284", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_284", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_284", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_284", + OutputName = "Mul_284", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc188) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc188) + } -> tensor<1x960x12x20xbf16> loc(#loc188) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc188) + } -> tensor<1x960x12x20xbf16> loc(#loc188) + %332 = xten_nn.subgraph (%arg5 = %331: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#42", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Generated-#43", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 12 : ui32, + config.dim_1 = 120 : ui32, + config.dim_2 = 20 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc340) + xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc340) + } -> tensor<1x960x1x240xbf16> loc(#loc340) + %333 = xten_nn.subgraph (%arg5 = %332: tensor<1x960x1x240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#44", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#45", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>) attributes { + LayerName = "Generated-#44", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#45", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 960 : ui32, + config.full_height = 1 : ui32, + config.full_width = 240 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc189) + xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc189) + } -> tensor<1x960x1x1xbf16> loc(#loc189) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc189) + } -> tensor<1x960x1x1xbf16> loc(#loc189) + %334 = xten_nn.subgraph (%arg5 = %333: tensor<1x960x1x1xbf16>, %arg6 = %53: tensor<240x960x1x1xbf16>, %arg7 = %52: tensor<240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_286", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_287", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<240x960x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_286", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_287", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<240x960x1x1xbf16>) -> tensor<240x1x1x960xbf16> loc(#loc341) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc341) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_286", + PartOfOutputName = "Conv_286", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x960xbf16>, tensor<240x1x1x960xbf16>, tensor<240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc190) + %465 = tosa.clamp %464 { + LayerName = "Relu_287", + OutputName = "Relu_287", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc191) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x240xbf16>) -> tensor<1x240x1x1xbf16> loc(#loc341) + xten_nn.output %466 : tensor<1x240x1x1xbf16> loc(#loc191) + } -> tensor<1x240x1x1xbf16> loc(#loc341) + xten_nn.output %461 : tensor<1x240x1x1xbf16> loc(#loc341) + } -> tensor<1x240x1x1xbf16> loc(#loc341) + %335 = xten_nn.subgraph (%arg5 = %334: tensor<1x240x1x1xbf16>, %arg6 = %51: tensor<960x240x1x1xbf16>, %arg7 = %50: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_288", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_288", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x1x1xbf16>, %arg9 = %arg6: tensor<960x240x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_288", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_288", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x240x1x1xbf16>) -> tensor<960x1x1x240xbf16> loc(#loc192) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x240x1x1xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc192) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_288", + PartOfOutputName = "Conv_288", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x240xbf16>, tensor<960x1x1x240xbf16>, tensor<960xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc192) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x960xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc192) + xten_nn.output %465 : tensor<1x960x1x1xbf16> loc(#loc192) + } -> tensor<1x960x1x1xbf16> loc(#loc192) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc192) + } -> tensor<1x960x1x1xbf16> loc(#loc192) + %336 = xten_nn.subgraph (%arg5 = %335: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_290", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_290", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { + LayerName = "Add_290", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_290", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_290", OutputName = "Add_290"} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc193) + xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc193) + } -> tensor<1x960x1x1xbf16> loc(#loc193) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc193) + } -> tensor<1x960x1x1xbf16> loc(#loc193) + %337 = xten_nn.subgraph (%arg5 = %336: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_293", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_293", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { + LayerName = "Clip_293", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_293", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_293", + OutputName = "Clip_293", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc194) + xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc194) + } -> tensor<1x960x1x1xbf16> loc(#loc194) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc194) + } -> tensor<1x960x1x1xbf16> loc(#loc194) + %338 = xten_nn.subgraph (%arg5 = %337: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_295", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_295", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { + LayerName = "Div_295", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_295", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_295", + OutputName = "Div_295", + shift = 0 : i8} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc195) + xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc195) + } -> tensor<1x960x1x1xbf16> loc(#loc195) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc195) + } -> tensor<1x960x1x1xbf16> loc(#loc195) + %339 = xten_nn.subgraph (%arg5 = %338: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#46", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#47", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 960 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 12 : ui32, + config.rep_dim_w = 20 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc196) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc196) + } -> tensor<1x960x12x20xbf16> loc(#loc196) + %340 = xten_nn.subgraph (%arg5 = %339: tensor<1x960x12x20xbf16>, %arg6 = %331: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_296", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_296", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_296", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_296", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_296", + OutputName = "Mul_296", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc196) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc196) + } -> tensor<1x960x12x20xbf16> loc(#loc196) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc196) + } -> tensor<1x960x12x20xbf16> loc(#loc196) + %341 = xten_nn.subgraph (%arg5 = %340: tensor<1x960x12x20xbf16>, %arg6 = %49: tensor<160x960x1x1xbf16>, %arg7 = %48: tensor<160xbf16>, %arg8 = %321: tensor<1x160x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_297", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_298", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<160x960x1x1xbf16>, %arg11 = %arg7: tensor<160xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_297", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_297", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc197) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<160x960x1x1xbf16>) -> tensor<160x1x1x960xbf16> loc(#loc197) + %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc197) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_297", + PartOfOutputName = "Conv_297", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x960xbf16>, tensor<160x1x1x960xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc197) + %468 = tosa.transpose %467, %463 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc197) + xten_nn.output %468 : tensor<1x160x12x20xbf16> loc(#loc197) + } -> tensor<1x160x12x20xbf16> loc(#loc197) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x160x12x20xbf16>, %arg10 = %arg8: tensor<1x160x12x20xbf16>) attributes { + LayerName = "Add_298", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_298", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_298", OutputName = "Add_298"} : (tensor<1x160x12x20xbf16>, tensor<1x160x12x20xbf16>) -> tensor<1x160x12x20xbf16> loc(#loc198) + xten_nn.output %463 : tensor<1x160x12x20xbf16> loc(#loc198) + } -> tensor<1x160x12x20xbf16> loc(#loc198) + xten_nn.output %462 : tensor<1x160x12x20xbf16> loc(#loc198) + } -> tensor<1x160x12x20xbf16> loc(#loc342) + %342 = xten_nn.subgraph (%arg5 = %341: tensor<1x160x12x20xbf16>, %arg6 = %47: tensor<960x160x1x1xbf16>, %arg7 = %46: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_299", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_299", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_299", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_299", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc199) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc199) + %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc199) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_299", + PartOfOutputName = "Conv_299", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc199) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc199) + xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc199) + } -> tensor<1x960x12x20xbf16> loc(#loc199) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc199) + } -> tensor<1x960x12x20xbf16> loc(#loc199) + %343 = xten_nn.subgraph (%arg5 = %342: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_301", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_301", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Add_301", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_301", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_301", OutputName = "Add_301"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc200) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc200) + } -> tensor<1x960x12x20xbf16> loc(#loc200) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc200) + } -> tensor<1x960x12x20xbf16> loc(#loc200) + %344 = xten_nn.subgraph (%arg5 = %343: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_304", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_304", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Clip_304", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_304", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_304", + OutputName = "Clip_304", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc201) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc201) + } -> tensor<1x960x12x20xbf16> loc(#loc201) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc201) + } -> tensor<1x960x12x20xbf16> loc(#loc201) + %345 = xten_nn.subgraph (%arg5 = %344: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_306", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_306", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Div_306", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_306", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_306", + OutputName = "Div_306", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc202) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc202) + } -> tensor<1x960x12x20xbf16> loc(#loc202) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc202) + } -> tensor<1x960x12x20xbf16> loc(#loc202) + %346 = xten_nn.subgraph (%arg5 = %342: tensor<1x960x12x20xbf16>, %arg6 = %345: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_307", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_307", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_307", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_307", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_307", + OutputName = "Mul_307", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc203) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc203) + } -> tensor<1x960x12x20xbf16> loc(#loc203) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc203) + } -> tensor<1x960x12x20xbf16> loc(#loc203) + %347 = xten_nn.subgraph (%arg5 = %346: tensor<1x960x12x20xbf16>, %arg6 = %45: tensor<960x1x9x9xbf16>, %arg7 = %44: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_308", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_308", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x12x20xbf16>, %arg9 = %arg6: tensor<960x1x9x9xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[4, 4], [4, 4]], + LayerName = "Conv_308", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "CMHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.wts", + SubPort = "wts_data", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 1, 9, 9]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_308", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "DepthwiseConv2dBf16", + With = { + config.act = 0 : ui8, + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.kernel_height = 9 : ui8, + config.kernel_width = 9 : ui8, + config.stride = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[2, 3, 0, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc204) + %465 = tosa.transpose %arg9, %464 : (tensor<960x1x9x9xbf16>, tensor<4xi32>) -> tensor<9x9x960x1xbf16> loc(#loc204) + %466 = tosa.transpose %arg8, %463 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc204) + %467 = tosa.depthwise_conv2d %466, %465, %arg10 { + PartOfLayerName = "Conv_308", + PartOfOutputName = "Conv_308", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x960xbf16>, tensor<9x9x960x1xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc204) + %468 = tosa.transpose %467, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc204) + xten_nn.output %468 : tensor<1x960x12x20xbf16> loc(#loc204) + } -> tensor<1x960x12x20xbf16> loc(#loc204) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc204) + } -> tensor<1x960x12x20xbf16> loc(#loc204) + %348 = xten_nn.subgraph (%arg5 = %347: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_310", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_310", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Add_310", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_310", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_310", OutputName = "Add_310"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc205) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc205) + } -> tensor<1x960x12x20xbf16> loc(#loc205) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc205) + } -> tensor<1x960x12x20xbf16> loc(#loc205) + %349 = xten_nn.subgraph (%arg5 = %348: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_313", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_313", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Clip_313", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_313", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_313", + OutputName = "Clip_313", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc206) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc206) + } -> tensor<1x960x12x20xbf16> loc(#loc206) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc206) + } -> tensor<1x960x12x20xbf16> loc(#loc206) + %350 = xten_nn.subgraph (%arg5 = %349: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_315", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_315", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Div_315", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_315", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_315", + OutputName = "Div_315", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc207) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc207) + } -> tensor<1x960x12x20xbf16> loc(#loc207) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc207) + } -> tensor<1x960x12x20xbf16> loc(#loc207) + %351 = xten_nn.subgraph (%arg5 = %347: tensor<1x960x12x20xbf16>, %arg6 = %350: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_316", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_316", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_316", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_316", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_316", + OutputName = "Mul_316", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc208) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc208) + } -> tensor<1x960x12x20xbf16> loc(#loc208) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc208) + } -> tensor<1x960x12x20xbf16> loc(#loc208) + %352 = xten_nn.subgraph (%arg5 = %351: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#48", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Generated-#49", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 12 : ui32, + config.dim_1 = 120 : ui32, + config.dim_2 = 20 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc343) + xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc343) + } -> tensor<1x960x1x240xbf16> loc(#loc343) + %353 = xten_nn.subgraph (%arg5 = %352: tensor<1x960x1x240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#50", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#51", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>) attributes { + LayerName = "Generated-#50", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#51", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 960 : ui32, + config.full_height = 1 : ui32, + config.full_width = 240 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc209) + xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc209) + } -> tensor<1x960x1x1xbf16> loc(#loc209) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc209) + } -> tensor<1x960x1x1xbf16> loc(#loc209) + %354 = xten_nn.subgraph (%arg5 = %353: tensor<1x960x1x1xbf16>, %arg6 = %43: tensor<240x960x1x1xbf16>, %arg7 = %42: tensor<240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_318", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_319", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<240x960x1x1xbf16>, %arg10 = %arg7: tensor<240xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_318", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[240, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_319", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<240x960x1x1xbf16>) -> tensor<240x1x1x960xbf16> loc(#loc344) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc344) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_318", + PartOfOutputName = "Conv_318", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x960xbf16>, tensor<240x1x1x960xbf16>, tensor<240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc210) + %465 = tosa.clamp %464 { + LayerName = "Relu_319", + OutputName = "Relu_319", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x1x240xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc211) + %466 = tosa.reshape %465 {new_shape = array} : (tensor<1x1x1x240xbf16>) -> tensor<1x240x1x1xbf16> loc(#loc344) + xten_nn.output %466 : tensor<1x240x1x1xbf16> loc(#loc211) + } -> tensor<1x240x1x1xbf16> loc(#loc344) + xten_nn.output %461 : tensor<1x240x1x1xbf16> loc(#loc344) + } -> tensor<1x240x1x1xbf16> loc(#loc344) + %355 = xten_nn.subgraph (%arg5 = %354: tensor<1x240x1x1xbf16>, %arg6 = %41: tensor<960x240x1x1xbf16>, %arg7 = %40: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_320", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_320", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x240x1x1xbf16>, %arg9 = %arg6: tensor<960x240x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_320", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 240, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 240, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_320", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x240x1x1xbf16>) -> tensor<960x1x1x240xbf16> loc(#loc212) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x240x1x1xbf16>) -> tensor<1x1x1x240xbf16> loc(#loc212) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_320", + PartOfOutputName = "Conv_320", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x240xbf16>, tensor<960x1x1x240xbf16>, tensor<960xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc212) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x960xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc212) + xten_nn.output %465 : tensor<1x960x1x1xbf16> loc(#loc212) + } -> tensor<1x960x1x1xbf16> loc(#loc212) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc212) + } -> tensor<1x960x1x1xbf16> loc(#loc212) + %356 = xten_nn.subgraph (%arg5 = %355: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_322", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_322", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { + LayerName = "Add_322", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Add_322", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_322", OutputName = "Add_322"} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc213) + xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc213) + } -> tensor<1x960x1x1xbf16> loc(#loc213) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc213) + } -> tensor<1x960x1x1xbf16> loc(#loc213) + %357 = xten_nn.subgraph (%arg5 = %356: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_325", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_325", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { + LayerName = "Clip_325", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Clip_325", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_325", + OutputName = "Clip_325", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc214) + xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc214) + } -> tensor<1x960x1x1xbf16> loc(#loc214) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc214) + } -> tensor<1x960x1x1xbf16> loc(#loc214) + %358 = xten_nn.subgraph (%arg5 = %357: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_327", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_327", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x1xbf16>) attributes { + LayerName = "Div_327", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Div_327", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_327", + OutputName = "Div_327", + shift = 0 : i8} : (tensor<1x960x1x1xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc215) + xten_nn.output %463 : tensor<1x960x1x1xbf16> loc(#loc215) + } -> tensor<1x960x1x1xbf16> loc(#loc215) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc215) + } -> tensor<1x960x1x1xbf16> loc(#loc215) + %359 = xten_nn.subgraph (%arg5 = %358: tensor<1x960x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#52", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#53", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 960 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 12 : ui32, + config.rep_dim_w = 20 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc216) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc216) + } -> tensor<1x960x12x20xbf16> loc(#loc216) + %360 = xten_nn.subgraph (%arg5 = %359: tensor<1x960x12x20xbf16>, %arg6 = %351: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_328", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_328", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_328", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_328", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_328", + OutputName = "Mul_328", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc216) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc216) + } -> tensor<1x960x12x20xbf16> loc(#loc216) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc216) + } -> tensor<1x960x12x20xbf16> loc(#loc216) + %361 = xten_nn.subgraph (%arg5 = %360: tensor<1x960x12x20xbf16>, %arg6 = %39: tensor<160x960x1x1xbf16>, %arg7 = %38: tensor<160xbf16>, %arg8 = %341: tensor<1x160x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_329", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_330", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<160x960x1x1xbf16>, %arg11 = %arg7: tensor<160xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_329", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[160, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_329", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc217) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<160x960x1x1xbf16>) -> tensor<160x1x1x960xbf16> loc(#loc217) + %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc217) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_329", + PartOfOutputName = "Conv_329", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x960xbf16>, tensor<160x1x1x960xbf16>, tensor<160xbf16>) -> tensor<1x12x20x160xbf16> loc(#loc217) + %468 = tosa.transpose %467, %463 : (tensor<1x12x20x160xbf16>, tensor<4xi32>) -> tensor<1x160x12x20xbf16> loc(#loc217) + xten_nn.output %468 : tensor<1x160x12x20xbf16> loc(#loc217) + } -> tensor<1x160x12x20xbf16> loc(#loc217) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x160x12x20xbf16>, %arg10 = %arg8: tensor<1x160x12x20xbf16>) attributes { + LayerName = "Add_330", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_330", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.add %arg9, %arg10 {LayerName = "Add_330", OutputName = "Add_330"} : (tensor<1x160x12x20xbf16>, tensor<1x160x12x20xbf16>) -> tensor<1x160x12x20xbf16> loc(#loc218) + xten_nn.output %463 : tensor<1x160x12x20xbf16> loc(#loc218) + } -> tensor<1x160x12x20xbf16> loc(#loc218) + xten_nn.output %462 : tensor<1x160x12x20xbf16> loc(#loc218) + } -> tensor<1x160x12x20xbf16> loc(#loc345) + %362 = xten_nn.subgraph (%arg5 = %361: tensor<1x160x12x20xbf16>, %arg6 = %37: tensor<960x160x1x1xbf16>, %arg7 = %36: tensor<960xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_331", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_331", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x160x12x20xbf16>, %arg9 = %arg6: tensor<960x160x1x1xbf16>, %arg10 = %arg7: tensor<960xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_331", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 160, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[960, 160, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_331", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc219) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<960x160x1x1xbf16>) -> tensor<960x1x1x160xbf16> loc(#loc219) + %465 = tosa.transpose %arg8, %463 : (tensor<1x160x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x160xbf16> loc(#loc219) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_331", + PartOfOutputName = "Conv_331", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x160xbf16>, tensor<960x1x1x160xbf16>, tensor<960xbf16>) -> tensor<1x12x20x960xbf16> loc(#loc219) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x960xbf16>, tensor<4xi32>) -> tensor<1x960x12x20xbf16> loc(#loc219) + xten_nn.output %467 : tensor<1x960x12x20xbf16> loc(#loc219) + } -> tensor<1x960x12x20xbf16> loc(#loc219) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc219) + } -> tensor<1x960x12x20xbf16> loc(#loc219) + %363 = xten_nn.subgraph (%arg5 = %362: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Add_333", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_333", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Add_333", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_333", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 3.000000e+00 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<3.000000e+00> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.add %arg6, %462 {LayerName = "Add_333", OutputName = "Add_333"} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc220) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc220) + } -> tensor<1x960x12x20xbf16> loc(#loc220) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc220) + } -> tensor<1x960x12x20xbf16> loc(#loc220) + %364 = xten_nn.subgraph (%arg5 = %363: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_336", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_336", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Clip_336", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Clip_336", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 6.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_336", + OutputName = "Clip_336", + max_fp = 6.000000e+00 : f32, + max_int = 6 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc221) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc221) + } -> tensor<1x960x12x20xbf16> loc(#loc221) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc221) + } -> tensor<1x960x12x20xbf16> loc(#loc221) + %365 = xten_nn.subgraph (%arg5 = %364: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Div_338", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_338", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Div_338", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Div_338", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulAttributeBroadcastingBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.num_kernel_iters = 0 : ui16, + config.scalar = 1.660160e-01 : bf16, + config.scalar_position = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<1.660160e-01> : tensor<1x1x1x1xbf16>}> : () -> tensor<1x1x1x1xbf16> loc(#loc) + %463 = tosa.mul %arg6, %462 { + LayerName = "Div_338", + OutputName = "Div_338", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x1x1x1xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc222) + xten_nn.output %463 : tensor<1x960x12x20xbf16> loc(#loc222) + } -> tensor<1x960x12x20xbf16> loc(#loc222) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc222) + } -> tensor<1x960x12x20xbf16> loc(#loc222) + %366 = xten_nn.subgraph (%arg5 = %362: tensor<1x960x12x20xbf16>, %arg6 = %365: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_339", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_339", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x960x12x20xbf16>, %arg8 = %arg6: tensor<1x960x12x20xbf16>) attributes { + LayerName = "Mul_339", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_339", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_339", + OutputName = "Mul_339", + shift = 0 : i8} : (tensor<1x960x12x20xbf16>, tensor<1x960x12x20xbf16>) -> tensor<1x960x12x20xbf16> loc(#loc223) + xten_nn.output %462 : tensor<1x960x12x20xbf16> loc(#loc223) + } -> tensor<1x960x12x20xbf16> loc(#loc223) + xten_nn.output %461 : tensor<1x960x12x20xbf16> loc(#loc223) + } -> tensor<1x960x12x20xbf16> loc(#loc223) + %367 = xten_nn.subgraph (%arg5 = %366: tensor<1x960x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#54", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Generated-#55", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + Specializes = "Transpose4dAdf", + With = { + config.aie_arch = "aie2p", + config.dim_0 = 12 : ui32, + config.dim_1 = 120 : ui32, + config.dim_2 = 20 : ui32, + config.dim_3 = 8 : ui32, + config.dtype = "bfloat16", + config.perm = 6 : ui32 + }} { + %461 = tosa.reshape %arg5 {new_shape = array} : (tensor<1x960x12x20xbf16>) -> tensor<1x960x1x240xbf16> loc(#loc346) + xten_nn.output %461 : tensor<1x960x1x240xbf16> loc(#loc346) + } -> tensor<1x960x1x240xbf16> loc(#loc346) + %368 = xten_nn.subgraph (%arg5 = %367: tensor<1x960x1x240xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#56", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#57", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x960x1x240xbf16>) attributes { + LayerName = "Generated-#56", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 240]> : vector<4xindex> + } + ], + OutputName = "Generated-#57", + PadValue = 0.000000e+00 : bf16, + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + } + ], + Specializes = "ReduceMeanC8Bf16", + Traits = { + Reduce = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.full_channel = 960 : ui32, + config.full_height = 1 : ui32, + config.full_width = 240 : ui32, + config.reduce_dim = "W" + }} { + %462 = xten_nn.reduce_mean %arg6 {axes = array, keepdims = 1 : i64} : (tensor<1x960x1x240xbf16>) -> tensor<1x960x1x1xbf16> loc(#loc224) + xten_nn.output %462 : tensor<1x960x1x1xbf16> loc(#loc224) + } -> tensor<1x960x1x1xbf16> loc(#loc224) + xten_nn.output %461 : tensor<1x960x1x1xbf16> loc(#loc224) + } -> tensor<1x960x1x1xbf16> loc(#loc224) + %369 = xten_nn.subgraph (%arg5 = %368: tensor<1x960x1x1xbf16>, %arg6 = %35: tensor<128x960x1x1xbf16>, %arg7 = %34: tensor<128xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_343", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_343", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x960x1x1xbf16>, %arg9 = %arg6: tensor<128x960x1x1xbf16>, %arg10 = %arg7: tensor<128xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_343", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_343", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = tosa.reshape %arg9 {new_shape = array} : (tensor<128x960x1x1xbf16>) -> tensor<128x1x1x960xbf16> loc(#loc225) + %463 = tosa.reshape %arg8 {new_shape = array} : (tensor<1x960x1x1xbf16>) -> tensor<1x1x1x960xbf16> loc(#loc225) + %464 = tosa.conv2d %463, %462, %arg10 { + PartOfLayerName = "Conv_343", + PartOfOutputName = "Conv_343", + dilation = array, + pad = array, + stride = array} : (tensor<1x1x1x960xbf16>, tensor<128x1x1x960xbf16>, tensor<128xbf16>) -> tensor<1x1x1x128xbf16> loc(#loc225) + %465 = tosa.reshape %464 {new_shape = array} : (tensor<1x1x1x128xbf16>) -> tensor<1x128x1x1xbf16> loc(#loc225) + xten_nn.output %465 : tensor<1x128x1x1xbf16> loc(#loc225) + } -> tensor<1x128x1x1xbf16> loc(#loc225) + xten_nn.output %461 : tensor<1x128x1x1xbf16> loc(#loc225) + } -> tensor<1x128x1x1xbf16> loc(#loc225) + %370 = xten_nn.subgraph (%arg5 = %369: tensor<1x128x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Sigmoid_344", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_344", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x128x1x1xbf16>) attributes { + LayerName = "Sigmoid_344", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_344", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + Specializes = "SigmoidTemplatedBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_344", OutputName = "Sigmoid_344"} : (tensor<1x128x1x1xbf16>) -> tensor<1x128x1x1xbf16> loc(#loc226) + xten_nn.output %462 : tensor<1x128x1x1xbf16> loc(#loc226) + } -> tensor<1x128x1x1xbf16> loc(#loc226) + xten_nn.output %461 : tensor<1x128x1x1xbf16> loc(#loc226) + } -> tensor<1x128x1x1xbf16> loc(#loc226) + %371 = xten_nn.subgraph (%arg5 = %370: tensor<1x128x1x1xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Generated-#58", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 1, 1]> : vector<4xindex> + } + ], + OutputName = "Generated-#59", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TileAdf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.i_dim_c = 128 : ui32, + config.i_dim_h = 1 : ui32, + config.i_dim_n = 1 : ui32, + config.i_dim_w = 1 : ui32, + config.rep_dim_c = 1 : ui32, + config.rep_dim_h = 12 : ui32, + config.rep_dim_w = 20 : ui32 + }} { + %461 = tosa.tile %arg5 {multiples = array} : (tensor<1x128x1x1xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc227) + xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc227) + } -> tensor<1x128x12x20xbf16> loc(#loc227) + %372 = xten_nn.subgraph (%arg5 = %366: tensor<1x960x12x20xbf16>, %arg6 = %33: tensor<128x960x1x1xbf16>, %arg7 = %32: tensor<128xbf16>, %arg8 = %371: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index, 3 : index], + LayerName = "Conv_340", + OfmShare = 3 : index, + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_345", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg9 = %arg5: tensor<1x960x12x20xbf16>, %arg10 = %arg6: tensor<128x960x1x1xbf16>, %arg11 = %arg7: tensor<128xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_340", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 960, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[128, 960, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_341", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %463 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc348) + %465 = tosa.reshape %arg10 {new_shape = array} : (tensor<128x960x1x1xbf16>) -> tensor<128x1x1x960xbf16> loc(#loc348) + %466 = tosa.transpose %arg9, %464 : (tensor<1x960x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x960xbf16> loc(#loc348) + %467 = tosa.conv2d %466, %465, %arg11 { + PartOfLayerName = "Conv_340", + PartOfOutputName = "Conv_340", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x960xbf16>, tensor<128x1x1x960xbf16>, tensor<128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc228) + %468 = tosa.clamp %467 { + LayerName = "Relu_341", + OutputName = "Relu_341", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x12x20x128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc229) + %469 = tosa.transpose %468, %463 : (tensor<1x12x20x128xbf16>, tensor<4xi32>) -> tensor<1x128x12x20xbf16> loc(#loc348) + xten_nn.output %469 : tensor<1x128x12x20xbf16> loc(#loc229) + } -> tensor<1x128x12x20xbf16> loc(#loc348) + %462 = xten_nn.subgraph (%arg9 = %461: tensor<1x128x12x20xbf16>, %arg10 = %arg8: tensor<1x128x12x20xbf16>) attributes { + LayerName = "Mul_345", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_345", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %463 = tosa.mul %arg9, %arg10 { + LayerName = "Mul_345", + OutputName = "Mul_345", + shift = 0 : i8} : (tensor<1x128x12x20xbf16>, tensor<1x128x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc227) + xten_nn.output %463 : tensor<1x128x12x20xbf16> loc(#loc227) + } -> tensor<1x128x12x20xbf16> loc(#loc227) + xten_nn.output %462 : tensor<1x128x12x20xbf16> loc(#loc227) + } -> tensor<1x128x12x20xbf16> loc(#loc347) + %373 = xten_nn.subgraph (%arg5 = %372: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_349_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Split_349_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 128 : ui32, + config.dim_h = 12 : ui32, + config.dim_w = 20 : ui32, + config.dtype = "bfloat16", + config.end = 64 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_349", + PartOfOutputName = "Split_349", + size = array, + start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc230) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc230) + } -> tensor<1x64x12x20xbf16> loc(#loc230) + %374 = xten_nn.subgraph (%arg5 = %372: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_349_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Split_349_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 128 : ui32, + config.dim_h = 12 : ui32, + config.dim_w = 20 : ui32, + config.dtype = "bfloat16", + config.end = 128 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 64 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_349", + PartOfOutputName = "Split_349", + size = array, + start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc230) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc230) + } -> tensor<1x64x12x20xbf16> loc(#loc230) + %375 = xten_nn.subgraph (%arg5 = %374: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_350", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Concat_350", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_350", + OutputName = "Concat_350", + axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc231) + xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc231) + } -> tensor<1x128x12x20xbf16> loc(#loc231) + %376 = xten_nn.subgraph (%arg5 = %375: tensor<1x128x12x20xbf16>, %arg6 = %31: tensor<128x128x3x3xbf16>, %arg7 = %30: tensor<128xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_351", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[128, 128, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_351", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x128x12x20xbf16>, %arg9 = %arg6: tensor<128x128x3x3xbf16>, %arg10 = %arg7: tensor<128xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_351", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[128, 128, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_351", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<128x128x3x3xbf16>, tensor<4xi32>) -> tensor<128x3x3x128xbf16> loc(#loc232) + %465 = tosa.transpose %arg8, %463 : (tensor<1x128x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x128xbf16> loc(#loc232) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_351", + PartOfOutputName = "Conv_351", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x128xbf16>, tensor<128x3x3x128xbf16>, tensor<128xbf16>) -> tensor<1x12x20x128xbf16> loc(#loc232) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x128xbf16>, tensor<4xi32>) -> tensor<1x128x12x20xbf16> loc(#loc232) + xten_nn.output %467 : tensor<1x128x12x20xbf16> loc(#loc232) + } -> tensor<1x128x12x20xbf16> loc(#loc232) + xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc232) + } -> tensor<1x128x12x20xbf16> loc(#loc232) + %377 = xten_nn.subgraph (%arg5 = %376: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Sigmoid_352", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_352", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x128x12x20xbf16>) attributes { + LayerName = "Sigmoid_352", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_352", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + Specializes = "SigmoidTemplatedBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_352", OutputName = "Sigmoid_352"} : (tensor<1x128x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc233) + xten_nn.output %462 : tensor<1x128x12x20xbf16> loc(#loc233) + } -> tensor<1x128x12x20xbf16> loc(#loc233) + xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc233) + } -> tensor<1x128x12x20xbf16> loc(#loc233) + %378 = xten_nn.subgraph (%arg5 = %377: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_353_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Split_353_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 128 : ui32, + config.dim_h = 12 : ui32, + config.dim_w = 20 : ui32, + config.dtype = "bfloat16", + config.end = 128 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 64 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_353", + PartOfOutputName = "Split_353", + size = array, + start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc234) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc234) + } -> tensor<1x64x12x20xbf16> loc(#loc234) + %379 = xten_nn.subgraph (%arg5 = %27: tensor<1x64x12x20xbf16>, %arg6 = %378: tensor<1x64x12x20xbf16>) attributes { + IfmOperands = [1 : index], + LayerName = "Sub_359", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Sub_359", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { + LayerName = "Sub_359", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Sub_359", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "SubBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_359", OutputName = "Sub_359"} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc5) + xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc5) + } -> tensor<1x64x12x20xbf16> loc(#loc5) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc5) + } -> tensor<1x64x12x20xbf16> loc(#loc5) + %380 = xten_nn.subgraph (%arg5 = %379: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_360", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_360", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { + LayerName = "Mul_360", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_360", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_360", + OutputName = "Mul_360", + shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc240) + xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc240) + } -> tensor<1x64x12x20xbf16> loc(#loc240) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc240) + } -> tensor<1x64x12x20xbf16> loc(#loc240) + %381 = xten_nn.subgraph (%arg5 = %377: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_353_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Split_353_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 128 : ui32, + config.dim_h = 12 : ui32, + config.dim_w = 20 : ui32, + config.dtype = "bfloat16", + config.end = 64 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_353", + PartOfOutputName = "Split_353", + size = array, + start = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc234) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc234) + } -> tensor<1x64x12x20xbf16> loc(#loc234) + %382 = xten_nn.subgraph (%arg5 = %381: tensor<1x64x12x20xbf16>, %arg6 = %arg4: tensor<1x64x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_354", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_354", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { + LayerName = "Mul_354", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_354", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_354", + OutputName = "Mul_354", + shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc235) + xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc235) + } -> tensor<1x64x12x20xbf16> loc(#loc235) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc235) + } -> tensor<1x64x12x20xbf16> loc(#loc235) + %383 = xten_nn.subgraph (%arg5 = %374: tensor<1x64x12x20xbf16>, %arg6 = %382: tensor<1x64x12x20xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_355", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Concat_355", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_355", + OutputName = "Concat_355", + axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc236) + xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc236) + } -> tensor<1x128x12x20xbf16> loc(#loc236) + %384 = xten_nn.subgraph (%arg5 = %383: tensor<1x128x12x20xbf16>, %arg6 = %29: tensor<64x128x3x3xbf16>, %arg7 = %28: tensor<64xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_356", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[64, 128, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_356", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x128x12x20xbf16>, %arg9 = %arg6: tensor<64x128x3x3xbf16>, %arg10 = %arg7: tensor<64xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_356", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[64, 128, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_356", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<64x128x3x3xbf16>, tensor<4xi32>) -> tensor<64x3x3x128xbf16> loc(#loc237) + %465 = tosa.transpose %arg8, %463 : (tensor<1x128x12x20xbf16>, tensor<4xi32>) -> tensor<1x12x20x128xbf16> loc(#loc237) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_356", + PartOfOutputName = "Conv_356", + dilation = array, + pad = array, + stride = array} : (tensor<1x12x20x128xbf16>, tensor<64x3x3x128xbf16>, tensor<64xbf16>) -> tensor<1x12x20x64xbf16> loc(#loc237) + %467 = tosa.transpose %466, %462 : (tensor<1x12x20x64xbf16>, tensor<4xi32>) -> tensor<1x64x12x20xbf16> loc(#loc237) + xten_nn.output %467 : tensor<1x64x12x20xbf16> loc(#loc237) + } -> tensor<1x64x12x20xbf16> loc(#loc237) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc237) + } -> tensor<1x64x12x20xbf16> loc(#loc237) + %385 = xten_nn.subgraph (%arg5 = %384: tensor<1x64x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Tanh_357", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Tanh_357", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x64x12x20xbf16>) attributes { + LayerName = "Tanh_357", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Tanh_357", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "TanhTemplatedBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.tanh %arg6 {LayerName = "Tanh_357", OutputName = "Tanh_357"} : (tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc238) + xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc238) + } -> tensor<1x64x12x20xbf16> loc(#loc238) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc238) + } -> tensor<1x64x12x20xbf16> loc(#loc238) + %386 = xten_nn.subgraph (%arg5 = %378: tensor<1x64x12x20xbf16>, %arg6 = %385: tensor<1x64x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_361", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_361", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { + LayerName = "Mul_361", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Mul_361", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_361", + OutputName = "Mul_361", + shift = 0 : i8} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc239) + xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc239) + } -> tensor<1x64x12x20xbf16> loc(#loc239) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc239) + } -> tensor<1x64x12x20xbf16> loc(#loc239) + %387 = xten_nn.subgraph (%arg5 = %380: tensor<1x64x12x20xbf16>, %arg6 = %386: tensor<1x64x12x20xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Add_362", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_362", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x64x12x20xbf16>, %arg8 = %arg6: tensor<1x64x12x20xbf16>) attributes { + LayerName = "Add_362", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Add_362", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.add %arg7, %arg8 {LayerName = "Add_362", OutputName = "Add_362"} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x64x12x20xbf16> loc(#loc241) + xten_nn.output %462 : tensor<1x64x12x20xbf16> loc(#loc241) + } -> tensor<1x64x12x20xbf16> loc(#loc241) + xten_nn.output %461 : tensor<1x64x12x20xbf16> loc(#loc241) + } -> tensor<1x64x12x20xbf16> loc(#loc241) + %388 = xten_nn.subgraph (%arg5 = %373: tensor<1x64x12x20xbf16>, %arg6 = %387: tensor<1x64x12x20xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_363", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 64, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Concat_363", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_363", + OutputName = "Concat_363", + axis = 1 : i32} : (tensor<1x64x12x20xbf16>, tensor<1x64x12x20xbf16>) -> tensor<1x128x12x20xbf16> loc(#loc242) + xten_nn.output %461 : tensor<1x128x12x20xbf16> loc(#loc242) + } -> tensor<1x128x12x20xbf16> loc(#loc242) + %389 = xten_nn.subgraph (%arg5 = %388: tensor<1x128x12x20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Resize_365", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 12, 20]> : vector<4xindex> + } + ], + OutputName = "Resize_365", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 24, 40]> : vector<4xindex> + } + ], + Specializes = "ResizeAdf", + With = { + config.co_trans_mode = 1 : ui32, + config.dim_0 = 1 : ui32, + config.dim_1 = 128 : ui32, + config.dim_2 = 12 : ui32, + config.dim_3 = 20 : ui32, + config.dtype = "bfloat16", + config.mode = 1 : ui32, + config.nearest_mode = 0 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.output_H = 24 : ui32, + config.output_W = 40 : ui32 + }} { + %461 = xten_nn.resize %arg5 { + LayerName = "Resize_365", + OutputName = "Resize_365", + coordinate_transformation_mode = 1 : i64, + mode = 1 : i64, + nearest_mode = 0 : i64, + scales = array} : (tensor<1x128x12x20xbf16>) -> tensor<1x128x24x40xbf16> loc(#loc243) + xten_nn.output %461 : tensor<1x128x24x40xbf16> loc(#loc243) + } -> tensor<1x128x24x40xbf16> loc(#loc243) + %390 = xten_nn.subgraph (%arg5 = %389: tensor<1x128x24x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Slice_371", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 24, 40]> : vector<4xindex> + } + ], + OutputName = "Slice_371", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "H", + config.dim_c = 128 : ui32, + config.dim_h = 24 : ui32, + config.dim_w = 40 : ui32, + config.dtype = "bfloat16", + config.end = 23 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + LayerName = "Slice_371", + OutputName = "Slice_371", + size = array, + start = array} : (tensor<1x128x24x40xbf16>) -> tensor<1x128x23x40xbf16> loc(#loc244) + xten_nn.output %461 : tensor<1x128x23x40xbf16> loc(#loc244) + } -> tensor<1x128x23x40xbf16> loc(#loc244) + %391 = xten_nn.subgraph (%arg5 = %166: tensor<1x3x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "AveragePool_346", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "AveragePool_346", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x180x320xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + HWPaddingNotCounted = [[0, 0], [0, 0]], + LayerName = "AveragePool_346", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "AveragePool_346", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> + } + ], + Specializes = "AvgPool2dBf16", + With = { + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.dtype = "bfloat16", + config.ksize = 2 : ui8, + config.stride_log2 = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc12) + %464 = tosa.transpose %arg6, %463 : (tensor<1x3x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x3xbf16> loc(#loc12) + %465 = tosa.avg_pool2d %464 { + PartOfLayerName = "AveragePool_346", + PartOfOutputName = "AveragePool_346", + acc_type = f32, + kernel = array, + pad = array, + stride = array} : (tensor<1x180x320x3xbf16>) -> tensor<1x90x160x3xbf16> loc(#loc12) + %466 = tosa.transpose %465, %462 : (tensor<1x90x160x3xbf16>, tensor<4xi32>) -> tensor<1x3x90x160xbf16> loc(#loc12) + xten_nn.output %466 : tensor<1x3x90x160xbf16> loc(#loc12) + } -> tensor<1x3x90x160xbf16> loc(#loc12) + xten_nn.output %461 : tensor<1x3x90x160xbf16> loc(#loc12) + } -> tensor<1x3x90x160xbf16> loc(#loc12) + %392 = xten_nn.subgraph (%arg5 = %391: tensor<1x3x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "AveragePool_347", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> + } + ], + OutputName = "AveragePool_347", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x90x160xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + HWPaddingNotCounted = [[0, 0], [0, 0]], + LayerName = "AveragePool_347", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> + } + ], + OutputName = "AveragePool_347", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> + } + ], + Specializes = "AvgPool2dBf16", + With = { + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.dtype = "bfloat16", + config.ksize = 2 : ui8, + config.stride_log2 = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc245) + %464 = tosa.transpose %arg6, %463 : (tensor<1x3x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x3xbf16> loc(#loc245) + %465 = tosa.avg_pool2d %464 { + PartOfLayerName = "AveragePool_347", + PartOfOutputName = "AveragePool_347", + acc_type = f32, + kernel = array, + pad = array, + stride = array} : (tensor<1x90x160x3xbf16>) -> tensor<1x45x80x3xbf16> loc(#loc245) + %466 = tosa.transpose %465, %462 : (tensor<1x45x80x3xbf16>, tensor<4xi32>) -> tensor<1x3x45x80xbf16> loc(#loc245) + xten_nn.output %466 : tensor<1x3x45x80xbf16> loc(#loc245) + } -> tensor<1x3x45x80xbf16> loc(#loc245) + xten_nn.output %461 : tensor<1x3x45x80xbf16> loc(#loc245) + } -> tensor<1x3x45x80xbf16> loc(#loc245) + %393 = xten_nn.subgraph (%arg5 = %392: tensor<1x3x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "AveragePool_348", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> + } + ], + OutputName = "AveragePool_348", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x45x80xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 1], [0, 0]], + HWPaddingNotCounted = [[0, 1], [0, 0]], + LayerName = "AveragePool_348", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> + } + ], + OutputName = "AveragePool_348", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex> + } + ], + Specializes = "AvgPool2dBf16", + With = { + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.dtype = "bfloat16", + config.ksize = 2 : ui8, + config.stride_log2 = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc246) + %464 = tosa.transpose %arg6, %463 : (tensor<1x3x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x3xbf16> loc(#loc246) + %465 = tosa.avg_pool2d %464 { + PartOfLayerName = "AveragePool_348", + PartOfOutputName = "AveragePool_348", + acc_type = f32, + kernel = array, + pad = array, + stride = array} : (tensor<1x45x80x3xbf16>) -> tensor<1x23x40x3xbf16> loc(#loc246) + %466 = tosa.transpose %465, %462 : (tensor<1x23x40x3xbf16>, tensor<4xi32>) -> tensor<1x3x23x40xbf16> loc(#loc246) + xten_nn.output %466 : tensor<1x3x23x40xbf16> loc(#loc246) + } -> tensor<1x3x23x40xbf16> loc(#loc246) + xten_nn.output %461 : tensor<1x3x23x40xbf16> loc(#loc246) + } -> tensor<1x3x23x40xbf16> loc(#loc246) + %394 = xten_nn.subgraph (%arg5 = %390: tensor<1x128x23x40xbf16>, %arg6 = %217: tensor<1x40x23x40xbf16>, %arg7 = %393: tensor<1x3x23x40xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index, 2 : index], + LayerName = "Concat_372", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 128, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm3", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Concat_372", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6, %arg7 { + LayerName = "Concat_372", + OutputName = "Concat_372", + axis = 1 : i32} : (tensor<1x128x23x40xbf16>, tensor<1x40x23x40xbf16>, tensor<1x3x23x40xbf16>) -> tensor<1x171x23x40xbf16> loc(#loc247) + xten_nn.output %461 : tensor<1x171x23x40xbf16> loc(#loc247) + } -> tensor<1x171x23x40xbf16> loc(#loc247) + %395 = xten_nn.subgraph (%arg5 = %394: tensor<1x171x23x40xbf16>, %arg6 = %26: tensor<80x171x3x3xbf16>, %arg7 = %25: tensor<80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_373", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[80, 171, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_374", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x171x23x40xbf16>, %arg9 = %arg6: tensor<80x171x3x3xbf16>, %arg10 = %arg7: tensor<80xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_373", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 171, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[80, 171, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_374", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<80x171x3x3xbf16>, tensor<4xi32>) -> tensor<80x3x3x171xbf16> loc(#loc349) + %465 = tosa.transpose %arg8, %463 : (tensor<1x171x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x171xbf16> loc(#loc349) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_373", + PartOfOutputName = "Conv_373", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x171xbf16>, tensor<80x3x3x171xbf16>, tensor<80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc248) + %467 = tosa.clamp %466 { + LayerName = "Relu_374", + OutputName = "Relu_374", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x23x40x80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc249) + %468 = tosa.transpose %467, %462 : (tensor<1x23x40x80xbf16>, tensor<4xi32>) -> tensor<1x80x23x40xbf16> loc(#loc349) + xten_nn.output %468 : tensor<1x80x23x40xbf16> loc(#loc249) + } -> tensor<1x80x23x40xbf16> loc(#loc349) + xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc349) + } -> tensor<1x80x23x40xbf16> loc(#loc349) + %396 = xten_nn.subgraph (%arg5 = %395: tensor<1x80x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_375_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Split_375_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 80 : ui32, + config.dim_h = 23 : ui32, + config.dim_w = 40 : ui32, + config.dtype = "bfloat16", + config.end = 40 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_375", + PartOfOutputName = "Split_375", + size = array, + start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc250) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc250) + } -> tensor<1x40x23x40xbf16> loc(#loc250) + %397 = xten_nn.subgraph (%arg5 = %395: tensor<1x80x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_375_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Split_375_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 80 : ui32, + config.dim_h = 23 : ui32, + config.dim_w = 40 : ui32, + config.dtype = "bfloat16", + config.end = 80 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 40 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_375", + PartOfOutputName = "Split_375", + size = array, + start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc250) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc250) + } -> tensor<1x40x23x40xbf16> loc(#loc250) + %398 = xten_nn.subgraph (%arg5 = %397: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_376", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Concat_376", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_376", + OutputName = "Concat_376", + axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc251) + xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc251) + } -> tensor<1x80x23x40xbf16> loc(#loc251) + %399 = xten_nn.subgraph (%arg5 = %398: tensor<1x80x23x40xbf16>, %arg6 = %24: tensor<80x80x3x3xbf16>, %arg7 = %23: tensor<80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_377", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 80, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_377", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x23x40xbf16>, %arg9 = %arg6: tensor<80x80x3x3xbf16>, %arg10 = %arg7: tensor<80xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_377", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[80, 80, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_377", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<80x80x3x3xbf16>, tensor<4xi32>) -> tensor<80x3x3x80xbf16> loc(#loc252) + %465 = tosa.transpose %arg8, %463 : (tensor<1x80x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x80xbf16> loc(#loc252) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_377", + PartOfOutputName = "Conv_377", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x80xbf16>, tensor<80x3x3x80xbf16>, tensor<80xbf16>) -> tensor<1x23x40x80xbf16> loc(#loc252) + %467 = tosa.transpose %466, %462 : (tensor<1x23x40x80xbf16>, tensor<4xi32>) -> tensor<1x80x23x40xbf16> loc(#loc252) + xten_nn.output %467 : tensor<1x80x23x40xbf16> loc(#loc252) + } -> tensor<1x80x23x40xbf16> loc(#loc252) + xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc252) + } -> tensor<1x80x23x40xbf16> loc(#loc252) + %400 = xten_nn.subgraph (%arg5 = %399: tensor<1x80x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Sigmoid_378", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_378", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x80x23x40xbf16>) attributes { + LayerName = "Sigmoid_378", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_378", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SigmoidTemplatedBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_378", OutputName = "Sigmoid_378"} : (tensor<1x80x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc253) + xten_nn.output %462 : tensor<1x80x23x40xbf16> loc(#loc253) + } -> tensor<1x80x23x40xbf16> loc(#loc253) + xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc253) + } -> tensor<1x80x23x40xbf16> loc(#loc253) + %401 = xten_nn.subgraph (%arg5 = %400: tensor<1x80x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_379_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Split_379_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 80 : ui32, + config.dim_h = 23 : ui32, + config.dim_w = 40 : ui32, + config.dtype = "bfloat16", + config.end = 80 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 40 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_379", + PartOfOutputName = "Split_379", + size = array, + start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc254) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc254) + } -> tensor<1x40x23x40xbf16> loc(#loc254) + %402 = xten_nn.subgraph (%arg5 = %20: tensor<1x40x23x40xbf16>, %arg6 = %401: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [1 : index], + LayerName = "Sub_385", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Sub_385", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Sub_385", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Sub_385", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SubBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_385", OutputName = "Sub_385"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc4) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc4) + } -> tensor<1x40x23x40xbf16> loc(#loc4) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc4) + } -> tensor<1x40x23x40xbf16> loc(#loc4) + %403 = xten_nn.subgraph (%arg5 = %402: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_386", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_386", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Mul_386", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_386", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_386", + OutputName = "Mul_386", + shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc260) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc260) + } -> tensor<1x40x23x40xbf16> loc(#loc260) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc260) + } -> tensor<1x40x23x40xbf16> loc(#loc260) + %404 = xten_nn.subgraph (%arg5 = %400: tensor<1x80x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_379_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Split_379_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 80 : ui32, + config.dim_h = 23 : ui32, + config.dim_w = 40 : ui32, + config.dtype = "bfloat16", + config.end = 40 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_379", + PartOfOutputName = "Split_379", + size = array, + start = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc254) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc254) + } -> tensor<1x40x23x40xbf16> loc(#loc254) + %405 = xten_nn.subgraph (%arg5 = %404: tensor<1x40x23x40xbf16>, %arg6 = %arg3: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_380", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_380", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Mul_380", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_380", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_380", + OutputName = "Mul_380", + shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc255) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc255) + } -> tensor<1x40x23x40xbf16> loc(#loc255) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc255) + } -> tensor<1x40x23x40xbf16> loc(#loc255) + %406 = xten_nn.subgraph (%arg5 = %397: tensor<1x40x23x40xbf16>, %arg6 = %405: tensor<1x40x23x40xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_381", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Concat_381", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_381", + OutputName = "Concat_381", + axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc256) + xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc256) + } -> tensor<1x80x23x40xbf16> loc(#loc256) + %407 = xten_nn.subgraph (%arg5 = %406: tensor<1x80x23x40xbf16>, %arg6 = %22: tensor<40x80x3x3xbf16>, %arg7 = %21: tensor<40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_382", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 80, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_382", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x80x23x40xbf16>, %arg9 = %arg6: tensor<40x80x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_382", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 80, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_382", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<40x80x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x80xbf16> loc(#loc257) + %465 = tosa.transpose %arg8, %463 : (tensor<1x80x23x40xbf16>, tensor<4xi32>) -> tensor<1x23x40x80xbf16> loc(#loc257) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_382", + PartOfOutputName = "Conv_382", + dilation = array, + pad = array, + stride = array} : (tensor<1x23x40x80xbf16>, tensor<40x3x3x80xbf16>, tensor<40xbf16>) -> tensor<1x23x40x40xbf16> loc(#loc257) + %467 = tosa.transpose %466, %462 : (tensor<1x23x40x40xbf16>, tensor<4xi32>) -> tensor<1x40x23x40xbf16> loc(#loc257) + xten_nn.output %467 : tensor<1x40x23x40xbf16> loc(#loc257) + } -> tensor<1x40x23x40xbf16> loc(#loc257) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc257) + } -> tensor<1x40x23x40xbf16> loc(#loc257) + %408 = xten_nn.subgraph (%arg5 = %407: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Tanh_383", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Tanh_383", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Tanh_383", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Tanh_383", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "TanhTemplatedBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.tanh %arg6 {LayerName = "Tanh_383", OutputName = "Tanh_383"} : (tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc258) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc258) + } -> tensor<1x40x23x40xbf16> loc(#loc258) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc258) + } -> tensor<1x40x23x40xbf16> loc(#loc258) + %409 = xten_nn.subgraph (%arg5 = %401: tensor<1x40x23x40xbf16>, %arg6 = %408: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_387", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_387", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Mul_387", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Mul_387", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_387", + OutputName = "Mul_387", + shift = 0 : i8} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc259) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc259) + } -> tensor<1x40x23x40xbf16> loc(#loc259) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc259) + } -> tensor<1x40x23x40xbf16> loc(#loc259) + %410 = xten_nn.subgraph (%arg5 = %403: tensor<1x40x23x40xbf16>, %arg6 = %409: tensor<1x40x23x40xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Add_388", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_388", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x40x23x40xbf16>, %arg8 = %arg6: tensor<1x40x23x40xbf16>) attributes { + LayerName = "Add_388", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Add_388", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.add %arg7, %arg8 {LayerName = "Add_388", OutputName = "Add_388"} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x40x23x40xbf16> loc(#loc261) + xten_nn.output %462 : tensor<1x40x23x40xbf16> loc(#loc261) + } -> tensor<1x40x23x40xbf16> loc(#loc261) + xten_nn.output %461 : tensor<1x40x23x40xbf16> loc(#loc261) + } -> tensor<1x40x23x40xbf16> loc(#loc261) + %411 = xten_nn.subgraph (%arg5 = %396: tensor<1x40x23x40xbf16>, %arg6 = %410: tensor<1x40x23x40xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_389", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Concat_389", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_389", + OutputName = "Concat_389", + axis = 1 : i32} : (tensor<1x40x23x40xbf16>, tensor<1x40x23x40xbf16>) -> tensor<1x80x23x40xbf16> loc(#loc262) + xten_nn.output %461 : tensor<1x80x23x40xbf16> loc(#loc262) + } -> tensor<1x80x23x40xbf16> loc(#loc262) + %412 = xten_nn.subgraph (%arg5 = %411: tensor<1x80x23x40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Resize_391", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 23, 40]> : vector<4xindex> + } + ], + OutputName = "Resize_391", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 46, 80]> : vector<4xindex> + } + ], + Specializes = "ResizeAdf", + With = { + config.co_trans_mode = 1 : ui32, + config.dim_0 = 1 : ui32, + config.dim_1 = 80 : ui32, + config.dim_2 = 23 : ui32, + config.dim_3 = 40 : ui32, + config.dtype = "bfloat16", + config.mode = 1 : ui32, + config.nearest_mode = 0 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.output_H = 46 : ui32, + config.output_W = 80 : ui32 + }} { + %461 = xten_nn.resize %arg5 { + LayerName = "Resize_391", + OutputName = "Resize_391", + coordinate_transformation_mode = 1 : i64, + mode = 1 : i64, + nearest_mode = 0 : i64, + scales = array} : (tensor<1x80x23x40xbf16>) -> tensor<1x80x46x80xbf16> loc(#loc263) + xten_nn.output %461 : tensor<1x80x46x80xbf16> loc(#loc263) + } -> tensor<1x80x46x80xbf16> loc(#loc263) + %413 = xten_nn.subgraph (%arg5 = %412: tensor<1x80x46x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Slice_397", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 46, 80]> : vector<4xindex> + } + ], + OutputName = "Slice_397", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "H", + config.dim_c = 80 : ui32, + config.dim_h = 46 : ui32, + config.dim_w = 80 : ui32, + config.dtype = "bfloat16", + config.end = 45 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + LayerName = "Slice_397", + OutputName = "Slice_397", + size = array, + start = array} : (tensor<1x80x46x80xbf16>) -> tensor<1x80x45x80xbf16> loc(#loc264) + xten_nn.output %461 : tensor<1x80x45x80xbf16> loc(#loc264) + } -> tensor<1x80x45x80xbf16> loc(#loc264) + %414 = xten_nn.subgraph (%arg5 = %413: tensor<1x80x45x80xbf16>, %arg6 = %181: tensor<1x24x45x80xbf16>, %arg7 = %392: tensor<1x3x45x80xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index, 2 : index], + LayerName = "Concat_398", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 80, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 24, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm3", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Concat_398", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6, %arg7 { + LayerName = "Concat_398", + OutputName = "Concat_398", + axis = 1 : i32} : (tensor<1x80x45x80xbf16>, tensor<1x24x45x80xbf16>, tensor<1x3x45x80xbf16>) -> tensor<1x107x45x80xbf16> loc(#loc265) + xten_nn.output %461 : tensor<1x107x45x80xbf16> loc(#loc265) + } -> tensor<1x107x45x80xbf16> loc(#loc265) + %415 = xten_nn.subgraph (%arg5 = %414: tensor<1x107x45x80xbf16>, %arg6 = %19: tensor<40x107x3x3xbf16>, %arg7 = %18: tensor<40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_399", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[40, 107, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_400", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x107x45x80xbf16>, %arg9 = %arg6: tensor<40x107x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_399", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 107, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[40, 107, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_400", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<40x107x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x107xbf16> loc(#loc350) + %465 = tosa.transpose %arg8, %463 : (tensor<1x107x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x107xbf16> loc(#loc350) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_399", + PartOfOutputName = "Conv_399", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x107xbf16>, tensor<40x3x3x107xbf16>, tensor<40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc266) + %467 = tosa.clamp %466 { + LayerName = "Relu_400", + OutputName = "Relu_400", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x45x80x40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc267) + %468 = tosa.transpose %467, %462 : (tensor<1x45x80x40xbf16>, tensor<4xi32>) -> tensor<1x40x45x80xbf16> loc(#loc350) + xten_nn.output %468 : tensor<1x40x45x80xbf16> loc(#loc267) + } -> tensor<1x40x45x80xbf16> loc(#loc350) + xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc350) + } -> tensor<1x40x45x80xbf16> loc(#loc350) + %416 = xten_nn.subgraph (%arg5 = %415: tensor<1x40x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_401_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Split_401_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 40 : ui32, + config.dim_h = 45 : ui32, + config.dim_w = 80 : ui32, + config.dtype = "bfloat16", + config.end = 20 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_401", + PartOfOutputName = "Split_401", + size = array, + start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc268) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc268) + } -> tensor<1x20x45x80xbf16> loc(#loc268) + %417 = xten_nn.subgraph (%arg5 = %415: tensor<1x40x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_401_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Split_401_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 40 : ui32, + config.dim_h = 45 : ui32, + config.dim_w = 80 : ui32, + config.dtype = "bfloat16", + config.end = 40 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 20 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_401", + PartOfOutputName = "Split_401", + size = array, + start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc268) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc268) + } -> tensor<1x20x45x80xbf16> loc(#loc268) + %418 = xten_nn.subgraph (%arg5 = %417: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_402", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Concat_402", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + Specializes = "ConcatC8Adf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.in1_dim_c = 24 : ui32, + config.in1_dim_h = 45 : ui32, + config.in1_dim_w = 80 : ui32, + config.in2_dim_c = 24 : ui32, + config.in2_dim_h = 45 : ui32, + config.in2_dim_w = 80 : ui32, + config.num_eff_concat_input0_size = 20 : ui32, + config.num_eff_concat_input0_start = 0 : ui32, + config.num_eff_concat_input1_size = 20 : ui32, + config.num_eff_concat_input1_start = 0 : ui32 + }} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_402", + OutputName = "Concat_402", + axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc269) + xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc269) + } -> tensor<1x40x45x80xbf16> loc(#loc269) + %419 = xten_nn.subgraph (%arg5 = %418: tensor<1x40x45x80xbf16>, %arg6 = %17: tensor<40x40x3x3xbf16>, %arg7 = %16: tensor<40xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_403", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 40, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_403", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x45x80xbf16>, %arg9 = %arg6: tensor<40x40x3x3xbf16>, %arg10 = %arg7: tensor<40xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_403", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[40, 40, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_403", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<40x40x3x3xbf16>, tensor<4xi32>) -> tensor<40x3x3x40xbf16> loc(#loc270) + %465 = tosa.transpose %arg8, %463 : (tensor<1x40x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x40xbf16> loc(#loc270) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_403", + PartOfOutputName = "Conv_403", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x40xbf16>, tensor<40x3x3x40xbf16>, tensor<40xbf16>) -> tensor<1x45x80x40xbf16> loc(#loc270) + %467 = tosa.transpose %466, %462 : (tensor<1x45x80x40xbf16>, tensor<4xi32>) -> tensor<1x40x45x80xbf16> loc(#loc270) + xten_nn.output %467 : tensor<1x40x45x80xbf16> loc(#loc270) + } -> tensor<1x40x45x80xbf16> loc(#loc270) + xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc270) + } -> tensor<1x40x45x80xbf16> loc(#loc270) + %420 = xten_nn.subgraph (%arg5 = %419: tensor<1x40x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Sigmoid_404", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_404", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x40x45x80xbf16>) attributes { + LayerName = "Sigmoid_404", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_404", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SigmoidTemplatedBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_404", OutputName = "Sigmoid_404"} : (tensor<1x40x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc271) + xten_nn.output %462 : tensor<1x40x45x80xbf16> loc(#loc271) + } -> tensor<1x40x45x80xbf16> loc(#loc271) + xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc271) + } -> tensor<1x40x45x80xbf16> loc(#loc271) + %421 = xten_nn.subgraph (%arg5 = %420: tensor<1x40x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_405_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Split_405_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 40 : ui32, + config.dim_h = 45 : ui32, + config.dim_w = 80 : ui32, + config.dtype = "bfloat16", + config.end = 40 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 20 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_405", + PartOfOutputName = "Split_405", + size = array, + start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc272) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc272) + } -> tensor<1x20x45x80xbf16> loc(#loc272) + %422 = xten_nn.subgraph (%arg5 = %13: tensor<1x20x45x80xbf16>, %arg6 = %421: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [1 : index], + LayerName = "Sub_411", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Sub_411", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { + LayerName = "Sub_411", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Sub_411", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SubBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_411", OutputName = "Sub_411"} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc3) + xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc3) + } -> tensor<1x20x45x80xbf16> loc(#loc3) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc3) + } -> tensor<1x20x45x80xbf16> loc(#loc3) + %423 = xten_nn.subgraph (%arg5 = %422: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_412", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Mul_412", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { + LayerName = "Mul_412", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Mul_412", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_412", + OutputName = "Mul_412", + shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc278) + xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc278) + } -> tensor<1x20x45x80xbf16> loc(#loc278) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc278) + } -> tensor<1x20x45x80xbf16> loc(#loc278) + %424 = xten_nn.subgraph (%arg5 = %420: tensor<1x40x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_405_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Split_405_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 40 : ui32, + config.dim_h = 45 : ui32, + config.dim_w = 80 : ui32, + config.dtype = "bfloat16", + config.end = 20 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_405", + PartOfOutputName = "Split_405", + size = array, + start = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc272) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc272) + } -> tensor<1x20x45x80xbf16> loc(#loc272) + %425 = xten_nn.subgraph (%arg5 = %424: tensor<1x20x45x80xbf16>, %arg6 = %arg2: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_406", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Mul_406", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { + LayerName = "Mul_406", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Mul_406", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_406", + OutputName = "Mul_406", + shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc273) + xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc273) + } -> tensor<1x20x45x80xbf16> loc(#loc273) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc273) + } -> tensor<1x20x45x80xbf16> loc(#loc273) + %426 = xten_nn.subgraph (%arg5 = %417: tensor<1x20x45x80xbf16>, %arg6 = %425: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_407", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Concat_407", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + Specializes = "ConcatC8Adf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.in1_dim_c = 24 : ui32, + config.in1_dim_h = 45 : ui32, + config.in1_dim_w = 80 : ui32, + config.in2_dim_c = 24 : ui32, + config.in2_dim_h = 45 : ui32, + config.in2_dim_w = 80 : ui32, + config.num_eff_concat_input0_size = 20 : ui32, + config.num_eff_concat_input0_start = 0 : ui32, + config.num_eff_concat_input1_size = 20 : ui32, + config.num_eff_concat_input1_start = 0 : ui32 + }} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_407", + OutputName = "Concat_407", + axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc274) + xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc274) + } -> tensor<1x40x45x80xbf16> loc(#loc274) + %427 = xten_nn.subgraph (%arg5 = %426: tensor<1x40x45x80xbf16>, %arg6 = %15: tensor<20x40x3x3xbf16>, %arg7 = %14: tensor<20xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_408", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[20, 40, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_408", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x40x45x80xbf16>, %arg9 = %arg6: tensor<20x40x3x3xbf16>, %arg10 = %arg7: tensor<20xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_408", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[20, 40, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_408", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<20x40x3x3xbf16>, tensor<4xi32>) -> tensor<20x3x3x40xbf16> loc(#loc275) + %465 = tosa.transpose %arg8, %463 : (tensor<1x40x45x80xbf16>, tensor<4xi32>) -> tensor<1x45x80x40xbf16> loc(#loc275) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_408", + PartOfOutputName = "Conv_408", + dilation = array, + pad = array, + stride = array} : (tensor<1x45x80x40xbf16>, tensor<20x3x3x40xbf16>, tensor<20xbf16>) -> tensor<1x45x80x20xbf16> loc(#loc275) + %467 = tosa.transpose %466, %462 : (tensor<1x45x80x20xbf16>, tensor<4xi32>) -> tensor<1x20x45x80xbf16> loc(#loc275) + xten_nn.output %467 : tensor<1x20x45x80xbf16> loc(#loc275) + } -> tensor<1x20x45x80xbf16> loc(#loc275) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc275) + } -> tensor<1x20x45x80xbf16> loc(#loc275) + %428 = xten_nn.subgraph (%arg5 = %427: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Tanh_409", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Tanh_409", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x20x45x80xbf16>) attributes { + LayerName = "Tanh_409", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Tanh_409", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "TanhTemplatedBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.tanh %arg6 {LayerName = "Tanh_409", OutputName = "Tanh_409"} : (tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc276) + xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc276) + } -> tensor<1x20x45x80xbf16> loc(#loc276) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc276) + } -> tensor<1x20x45x80xbf16> loc(#loc276) + %429 = xten_nn.subgraph (%arg5 = %421: tensor<1x20x45x80xbf16>, %arg6 = %428: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_413", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Mul_413", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { + LayerName = "Mul_413", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Mul_413", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_413", + OutputName = "Mul_413", + shift = 0 : i8} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc277) + xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc277) + } -> tensor<1x20x45x80xbf16> loc(#loc277) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc277) + } -> tensor<1x20x45x80xbf16> loc(#loc277) + %430 = xten_nn.subgraph (%arg5 = %423: tensor<1x20x45x80xbf16>, %arg6 = %429: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Add_414", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Add_414", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x20x45x80xbf16>, %arg8 = %arg6: tensor<1x20x45x80xbf16>) attributes { + LayerName = "Add_414", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Add_414", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.add %arg7, %arg8 {LayerName = "Add_414", OutputName = "Add_414"} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x20x45x80xbf16> loc(#loc279) + xten_nn.output %462 : tensor<1x20x45x80xbf16> loc(#loc279) + } -> tensor<1x20x45x80xbf16> loc(#loc279) + xten_nn.output %461 : tensor<1x20x45x80xbf16> loc(#loc279) + } -> tensor<1x20x45x80xbf16> loc(#loc279) + %431 = xten_nn.subgraph (%arg5 = %416: tensor<1x20x45x80xbf16>, %arg6 = %430: tensor<1x20x45x80xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_415", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 20, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Concat_415", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + Specializes = "ConcatC8Adf", + With = { + config.aie_arch = "aie2p", + config.dtype = "bfloat16", + config.in1_dim_c = 24 : ui32, + config.in1_dim_h = 45 : ui32, + config.in1_dim_w = 80 : ui32, + config.in2_dim_c = 24 : ui32, + config.in2_dim_h = 45 : ui32, + config.in2_dim_w = 80 : ui32, + config.num_eff_concat_input0_size = 20 : ui32, + config.num_eff_concat_input0_start = 0 : ui32, + config.num_eff_concat_input1_size = 20 : ui32, + config.num_eff_concat_input1_start = 0 : ui32 + }} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_415", + OutputName = "Concat_415", + axis = 1 : i32} : (tensor<1x20x45x80xbf16>, tensor<1x20x45x80xbf16>) -> tensor<1x40x45x80xbf16> loc(#loc280) + xten_nn.output %461 : tensor<1x40x45x80xbf16> loc(#loc280) + } -> tensor<1x40x45x80xbf16> loc(#loc280) + %432 = xten_nn.subgraph (%arg5 = %431: tensor<1x40x45x80xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Resize_417", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 45, 80]> : vector<4xindex> + } + ], + OutputName = "Resize_417", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 90, 160]> : vector<4xindex> + } + ], + Specializes = "ResizeAdf", + With = { + config.co_trans_mode = 1 : ui32, + config.dim_0 = 1 : ui32, + config.dim_1 = 40 : ui32, + config.dim_2 = 45 : ui32, + config.dim_3 = 80 : ui32, + config.dtype = "bfloat16", + config.mode = 1 : ui32, + config.nearest_mode = 0 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.output_H = 90 : ui32, + config.output_W = 160 : ui32 + }} { + %461 = xten_nn.resize %arg5 { + LayerName = "Resize_417", + OutputName = "Resize_417", + coordinate_transformation_mode = 1 : i64, + mode = 1 : i64, + nearest_mode = 0 : i64, + scales = array} : (tensor<1x40x45x80xbf16>) -> tensor<1x40x90x160xbf16> loc(#loc281) + xten_nn.output %461 : tensor<1x40x90x160xbf16> loc(#loc281) + } -> tensor<1x40x90x160xbf16> loc(#loc281) + %433 = xten_nn.subgraph (%arg5 = %432: tensor<1x40x90x160xbf16>, %arg6 = %175: tensor<1x16x90x160xbf16>, %arg7 = %391: tensor<1x3x90x160xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index, 2 : index], + LayerName = "Concat_418", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 40, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm3", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Concat_418", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6, %arg7 { + LayerName = "Concat_418", + OutputName = "Concat_418", + axis = 1 : i32} : (tensor<1x40x90x160xbf16>, tensor<1x16x90x160xbf16>, tensor<1x3x90x160xbf16>) -> tensor<1x59x90x160xbf16> loc(#loc282) + xten_nn.output %461 : tensor<1x59x90x160xbf16> loc(#loc282) + } -> tensor<1x59x90x160xbf16> loc(#loc282) + %434 = xten_nn.subgraph (%arg5 = %433: tensor<1x59x90x160xbf16>, %arg6 = %12: tensor<32x59x3x3xbf16>, %arg7 = %11: tensor<32xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_419", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[32, 59, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_420", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x59x90x160xbf16>, %arg9 = %arg6: tensor<32x59x3x3xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_419", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 59, 90, 160]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[32, 59, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_420", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<32x59x3x3xbf16>, tensor<4xi32>) -> tensor<32x3x3x59xbf16> loc(#loc351) + %465 = tosa.transpose %arg8, %463 : (tensor<1x59x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x59xbf16> loc(#loc351) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_419", + PartOfOutputName = "Conv_419", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x59xbf16>, tensor<32x3x3x59xbf16>, tensor<32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc283) + %467 = tosa.clamp %466 { + LayerName = "Relu_420", + OutputName = "Relu_420", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x90x160x32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc284) + %468 = tosa.transpose %467, %462 : (tensor<1x90x160x32xbf16>, tensor<4xi32>) -> tensor<1x32x90x160xbf16> loc(#loc351) + xten_nn.output %468 : tensor<1x32x90x160xbf16> loc(#loc284) + } -> tensor<1x32x90x160xbf16> loc(#loc351) + xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc351) + } -> tensor<1x32x90x160xbf16> loc(#loc351) + %435 = xten_nn.subgraph (%arg5 = %434: tensor<1x32x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_421_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Split_421_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 32 : ui32, + config.dim_h = 90 : ui32, + config.dim_w = 160 : ui32, + config.dtype = "bfloat16", + config.end = 16 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_421", + PartOfOutputName = "Split_421", + size = array, + start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc285) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc285) + } -> tensor<1x16x90x160xbf16> loc(#loc285) + %436 = xten_nn.subgraph (%arg5 = %434: tensor<1x32x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_421_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Split_421_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 32 : ui32, + config.dim_h = 90 : ui32, + config.dim_w = 160 : ui32, + config.dtype = "bfloat16", + config.end = 32 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 16 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_421", + PartOfOutputName = "Split_421", + size = array, + start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc285) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc285) + } -> tensor<1x16x90x160xbf16> loc(#loc285) + %437 = xten_nn.subgraph (%arg5 = %436: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_422", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Concat_422", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_422", + OutputName = "Concat_422", + axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc286) + xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc286) + } -> tensor<1x32x90x160xbf16> loc(#loc286) + %438 = xten_nn.subgraph (%arg5 = %437: tensor<1x32x90x160xbf16>, %arg6 = %10: tensor<32x32x3x3xbf16>, %arg7 = %9: tensor<32xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_423", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[32, 32, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_423", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x90x160xbf16>, %arg9 = %arg6: tensor<32x32x3x3xbf16>, %arg10 = %arg7: tensor<32xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_423", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[32, 32, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_423", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<32x32x3x3xbf16>, tensor<4xi32>) -> tensor<32x3x3x32xbf16> loc(#loc287) + %465 = tosa.transpose %arg8, %463 : (tensor<1x32x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x32xbf16> loc(#loc287) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_423", + PartOfOutputName = "Conv_423", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x32xbf16>, tensor<32x3x3x32xbf16>, tensor<32xbf16>) -> tensor<1x90x160x32xbf16> loc(#loc287) + %467 = tosa.transpose %466, %462 : (tensor<1x90x160x32xbf16>, tensor<4xi32>) -> tensor<1x32x90x160xbf16> loc(#loc287) + xten_nn.output %467 : tensor<1x32x90x160xbf16> loc(#loc287) + } -> tensor<1x32x90x160xbf16> loc(#loc287) + xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc287) + } -> tensor<1x32x90x160xbf16> loc(#loc287) + %439 = xten_nn.subgraph (%arg5 = %438: tensor<1x32x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Sigmoid_424", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_424", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x32x90x160xbf16>) attributes { + LayerName = "Sigmoid_424", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Sigmoid_424", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + Specializes = "SigmoidTemplatedBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.sigmoid %arg6 {LayerName = "Sigmoid_424", OutputName = "Sigmoid_424"} : (tensor<1x32x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc288) + xten_nn.output %462 : tensor<1x32x90x160xbf16> loc(#loc288) + } -> tensor<1x32x90x160xbf16> loc(#loc288) + xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc288) + } -> tensor<1x32x90x160xbf16> loc(#loc288) + %440 = xten_nn.subgraph (%arg5 = %439: tensor<1x32x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_425_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Split_425_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 32 : ui32, + config.dim_h = 90 : ui32, + config.dim_w = 160 : ui32, + config.dtype = "bfloat16", + config.end = 32 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 16 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_425", + PartOfOutputName = "Split_425", + size = array, + start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc289) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc289) + } -> tensor<1x16x90x160xbf16> loc(#loc289) + %441 = xten_nn.subgraph (%arg5 = %6: tensor<1x16x90x160xbf16>, %arg6 = %440: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [1 : index], + LayerName = "Sub_431", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Sub_431", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Sub_431", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Sub_431", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "SubBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.sub %arg7, %arg8 {LayerName = "Sub_431", OutputName = "Sub_431"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc2) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc2) + } -> tensor<1x16x90x160xbf16> loc(#loc2) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc2) + } -> tensor<1x16x90x160xbf16> loc(#loc2) + %442 = xten_nn.subgraph (%arg5 = %441: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_432", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_432", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Mul_432", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_432", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_432", + OutputName = "Mul_432", + shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc295) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc295) + } -> tensor<1x16x90x160xbf16> loc(#loc295) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc295) + } -> tensor<1x16x90x160xbf16> loc(#loc295) + %443 = xten_nn.subgraph (%arg5 = %439: tensor<1x32x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_425_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Split_425_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 32 : ui32, + config.dim_h = 90 : ui32, + config.dim_w = 160 : ui32, + config.dtype = "bfloat16", + config.end = 16 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_425", + PartOfOutputName = "Split_425", + size = array, + start = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc289) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc289) + } -> tensor<1x16x90x160xbf16> loc(#loc289) + %444 = xten_nn.subgraph (%arg5 = %443: tensor<1x16x90x160xbf16>, %arg6 = %arg1: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_426", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_426", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Mul_426", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_426", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_426", + OutputName = "Mul_426", + shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc290) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc290) + } -> tensor<1x16x90x160xbf16> loc(#loc290) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc290) + } -> tensor<1x16x90x160xbf16> loc(#loc290) + %445 = xten_nn.subgraph (%arg5 = %436: tensor<1x16x90x160xbf16>, %arg6 = %444: tensor<1x16x90x160xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_427", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Concat_427", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_427", + OutputName = "Concat_427", + axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc291) + xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc291) + } -> tensor<1x32x90x160xbf16> loc(#loc291) + %446 = xten_nn.subgraph (%arg5 = %445: tensor<1x32x90x160xbf16>, %arg6 = %8: tensor<16x32x3x3xbf16>, %arg7 = %7: tensor<16xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_428", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 32, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_428", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x32x90x160xbf16>, %arg9 = %arg6: tensor<16x32x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_428", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 32, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_428", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<16x32x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x32xbf16> loc(#loc292) + %465 = tosa.transpose %arg8, %463 : (tensor<1x32x90x160xbf16>, tensor<4xi32>) -> tensor<1x90x160x32xbf16> loc(#loc292) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_428", + PartOfOutputName = "Conv_428", + dilation = array, + pad = array, + stride = array} : (tensor<1x90x160x32xbf16>, tensor<16x3x3x32xbf16>, tensor<16xbf16>) -> tensor<1x90x160x16xbf16> loc(#loc292) + %467 = tosa.transpose %466, %462 : (tensor<1x90x160x16xbf16>, tensor<4xi32>) -> tensor<1x16x90x160xbf16> loc(#loc292) + xten_nn.output %467 : tensor<1x16x90x160xbf16> loc(#loc292) + } -> tensor<1x16x90x160xbf16> loc(#loc292) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc292) + } -> tensor<1x16x90x160xbf16> loc(#loc292) + %447 = xten_nn.subgraph (%arg5 = %446: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Tanh_429", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Tanh_429", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "single", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Tanh_429", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Tanh_429", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "TanhTemplatedBf16", + Traits = { + Elementwise = true, + Unary = true + }, + With = { + config.ENABLE_FP16_AS_BF16 = 0 : ui8, + config.aie_arch = "aie2p", + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.tanh %arg6 {LayerName = "Tanh_429", OutputName = "Tanh_429"} : (tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc293) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc293) + } -> tensor<1x16x90x160xbf16> loc(#loc293) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc293) + } -> tensor<1x16x90x160xbf16> loc(#loc293) + %448 = xten_nn.subgraph (%arg5 = %440: tensor<1x16x90x160xbf16>, %arg6 = %447: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Mul_433", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_433", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Mul_433", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Mul_433", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "MulBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.mul %arg7, %arg8 { + LayerName = "Mul_433", + OutputName = "Mul_433", + shift = 0 : i8} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc294) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc294) + } -> tensor<1x16x90x160xbf16> loc(#loc294) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc294) + } -> tensor<1x16x90x160xbf16> loc(#loc294) + %449 = xten_nn.subgraph (%arg5 = %442: tensor<1x16x90x160xbf16>, %arg6 = %448: tensor<1x16x90x160xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Add_434", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Add_434", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x16x90x160xbf16>, %arg8 = %arg6: tensor<1x16x90x160xbf16>) attributes { + LayerName = "Add_434", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Add_434", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.add %arg7, %arg8 {LayerName = "Add_434", OutputName = "Add_434"} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x16x90x160xbf16> loc(#loc296) + xten_nn.output %462 : tensor<1x16x90x160xbf16> loc(#loc296) + } -> tensor<1x16x90x160xbf16> loc(#loc296) + xten_nn.output %461 : tensor<1x16x90x160xbf16> loc(#loc296) + } -> tensor<1x16x90x160xbf16> loc(#loc296) + %450 = xten_nn.subgraph (%arg5 = %435: tensor<1x16x90x160xbf16>, %arg6 = %449: tensor<1x16x90x160xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_435", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Concat_435", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_435", + OutputName = "Concat_435", + axis = 1 : i32} : (tensor<1x16x90x160xbf16>, tensor<1x16x90x160xbf16>) -> tensor<1x32x90x160xbf16> loc(#loc297) + xten_nn.output %461 : tensor<1x32x90x160xbf16> loc(#loc297) + } -> tensor<1x32x90x160xbf16> loc(#loc297) + %451 = xten_nn.subgraph (%arg5 = %450: tensor<1x32x90x160xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Resize_437", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 90, 160]> : vector<4xindex> + } + ], + OutputName = "Resize_437", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 180, 320]> : vector<4xindex> + } + ], + Specializes = "ResizeAdf", + With = { + config.co_trans_mode = 1 : ui32, + config.dim_0 = 1 : ui32, + config.dim_1 = 32 : ui32, + config.dim_2 = 90 : ui32, + config.dim_3 = 160 : ui32, + config.dtype = "bfloat16", + config.mode = 1 : ui32, + config.nearest_mode = 0 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.output_H = 180 : ui32, + config.output_W = 320 : ui32 + }} { + %461 = xten_nn.resize %arg5 { + LayerName = "Resize_437", + OutputName = "Resize_437", + coordinate_transformation_mode = 1 : i64, + mode = 1 : i64, + nearest_mode = 0 : i64, + scales = array} : (tensor<1x32x90x160xbf16>) -> tensor<1x32x180x320xbf16> loc(#loc298) + xten_nn.output %461 : tensor<1x32x180x320xbf16> loc(#loc298) + } -> tensor<1x32x180x320xbf16> loc(#loc298) + %452 = xten_nn.subgraph (%arg5 = %451: tensor<1x32x180x320xbf16>, %arg6 = %166: tensor<1x3x180x320xbf16>) attributes { + Axis = 1 : i32, + IfmOperands = [0 : index, 1 : index], + LayerName = "Concat_438", + Op = "Concat", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<0> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 32, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Concat_438", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "PseudoOp", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex> + } + ], + current_data_format = "NCHW", + data_format = "HCWN"} { + %461 = tosa.concat %arg5, %arg6 { + LayerName = "Concat_438", + OutputName = "Concat_438", + axis = 1 : i32} : (tensor<1x32x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x35x180x320xbf16> loc(#loc299) + xten_nn.output %461 : tensor<1x35x180x320xbf16> loc(#loc299) + } -> tensor<1x35x180x320xbf16> loc(#loc299) + %453 = xten_nn.subgraph (%arg5 = %452: tensor<1x35x180x320xbf16>, %arg6 = %5: tensor<16x35x3x3xbf16>, %arg7 = %4: tensor<16xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_439", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[16, 35, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_440", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x35x180x320xbf16>, %arg9 = %arg6: tensor<16x35x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_439", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 35, 180, 320]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[16, 35, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_440", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<16x35x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x35xbf16> loc(#loc352) + %465 = tosa.transpose %arg8, %463 : (tensor<1x35x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x35xbf16> loc(#loc352) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_439", + PartOfOutputName = "Conv_439", + dilation = array, + pad = array, + stride = array} : (tensor<1x180x320x35xbf16>, tensor<16x3x3x35xbf16>, tensor<16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc300) + %467 = tosa.clamp %466 { + LayerName = "Relu_440", + OutputName = "Relu_440", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x180x320x16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc301) + %468 = tosa.transpose %467, %462 : (tensor<1x180x320x16xbf16>, tensor<4xi32>) -> tensor<1x16x180x320xbf16> loc(#loc352) + xten_nn.output %468 : tensor<1x16x180x320xbf16> loc(#loc301) + } -> tensor<1x16x180x320xbf16> loc(#loc352) + xten_nn.output %461 : tensor<1x16x180x320xbf16> loc(#loc352) + } -> tensor<1x16x180x320xbf16> loc(#loc352) + %454 = xten_nn.subgraph (%arg5 = %453: tensor<1x16x180x320xbf16>, %arg6 = %3: tensor<16x16x3x3xbf16>, %arg7 = %2: tensor<16xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_441", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 16, 3, 3]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Relu_442", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x180x320xbf16>, %arg9 = %arg6: tensor<16x16x3x3xbf16>, %arg10 = %arg7: tensor<16xbf16>) attributes { + Dilations = array, + HWPadding = [[1, 1], [1, 1]], + LayerName = "Conv_441", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[16, 16, 3, 3]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Relu_442", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true, + NonNegativeOut = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 1 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 3 : ui8, + config.ksize.width = 3 : ui8, + config.lrelu_alpha = 0.000000e+00 : bf16, + config.lrelu_alpha_kernel = 0.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %464 = tosa.transpose %arg9, %463 : (tensor<16x16x3x3xbf16>, tensor<4xi32>) -> tensor<16x3x3x16xbf16> loc(#loc353) + %465 = tosa.transpose %arg8, %463 : (tensor<1x16x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x16xbf16> loc(#loc353) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_441", + PartOfOutputName = "Conv_441", + dilation = array, + pad = array, + stride = array} : (tensor<1x180x320x16xbf16>, tensor<16x3x3x16xbf16>, tensor<16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc302) + %467 = tosa.clamp %466 { + LayerName = "Relu_442", + OutputName = "Relu_442", + max_fp = 3.40282347E+38 : f32, + max_int = 2147483647 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x180x320x16xbf16>) -> tensor<1x180x320x16xbf16> loc(#loc303) + %468 = tosa.transpose %467, %462 : (tensor<1x180x320x16xbf16>, tensor<4xi32>) -> tensor<1x16x180x320xbf16> loc(#loc353) + xten_nn.output %468 : tensor<1x16x180x320xbf16> loc(#loc303) + } -> tensor<1x16x180x320xbf16> loc(#loc353) + xten_nn.output %461 : tensor<1x16x180x320xbf16> loc(#loc353) + } -> tensor<1x16x180x320xbf16> loc(#loc353) + %455 = xten_nn.subgraph (%arg5 = %454: tensor<1x16x180x320xbf16>, %arg6 = %1: tensor<4x16x1x1xbf16>, %arg7 = %0: tensor<4xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Conv_443", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + }, + { + UnknownDataFormat = true, + l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[4, 16, 1, 1]> : vector<4xindex> + }, + { + UnknownDataFormat = true + } + ], + OutputName = "Conv_443", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg8 = %arg5: tensor<1x16x180x320xbf16>, %arg9 = %arg6: tensor<4x16x1x1xbf16>, %arg10 = %arg7: tensor<4xbf16>) attributes { + Dilations = array, + HWPadding = [[0, 0], [0, 0]], + LayerName = "Conv_443", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 16, 180, 320]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "wts_data", + UnknownDataFormat = true, + l3_extend_end = dense<[4, 0, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[4, 16, 1, 1]> : vector<4xindex> + }, + { + Port = "data_io.wts", + SubPort = "bias", + UnknownDataFormat = true + } + ], + OutputName = "Conv_443", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> + } + ], + Specializes = "Conv2DBf16", + Traits = { + AllowDMAOptimization = true + }, + With = { + config.AIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16 = 1 : ui8, + config.act = 0 : ui8, + config.act_type = "RELU", + config.aie_arch = "aie2p", + config.batch_size = 1 : ui8, + config.compiler = "chess", + config.conv_type = [0 : ui8, 12 : ui8, 64 : ui8], + config.dtype_ifm = "bfloat16", + config.dtype_ofm = "bfloat16", + config.dtype_wts = "bfloat16", + config.ksize.height = 1 : ui8, + config.ksize.width = 1 : ui8, + config.lrelu_alpha = 1.000000e+00 : bf16, + config.lrelu_alpha_kernel = 1.000000e+00 : bf16, + config.stride_h = 1 : ui8, + config.stride_w = 1 : ui8 + }} { + %462 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc) + %463 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32> loc(#loc304) + %464 = tosa.reshape %arg9 {new_shape = array} : (tensor<4x16x1x1xbf16>) -> tensor<4x1x1x16xbf16> loc(#loc304) + %465 = tosa.transpose %arg8, %463 : (tensor<1x16x180x320xbf16>, tensor<4xi32>) -> tensor<1x180x320x16xbf16> loc(#loc304) + %466 = tosa.conv2d %465, %464, %arg10 { + PartOfLayerName = "Conv_443", + PartOfOutputName = "Conv_443", + dilation = array, + pad = array, + stride = array} : (tensor<1x180x320x16xbf16>, tensor<4x1x1x16xbf16>, tensor<4xbf16>) -> tensor<1x180x320x4xbf16> loc(#loc304) + %467 = tosa.transpose %466, %462 : (tensor<1x180x320x4xbf16>, tensor<4xi32>) -> tensor<1x4x180x320xbf16> loc(#loc304) + xten_nn.output %467 : tensor<1x4x180x320xbf16> loc(#loc304) + } -> tensor<1x4x180x320xbf16> loc(#loc304) + xten_nn.output %461 : tensor<1x4x180x320xbf16> loc(#loc304) + } -> tensor<1x4x180x320xbf16> loc(#loc304) + %456 = xten_nn.subgraph (%arg5 = %455: tensor<1x4x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_444_Duplicated#1", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Split_444_Duplicated#1", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 8 : ui32, + config.dim_h = 180 : ui32, + config.dim_w = 320 : ui32, + config.dtype = "bfloat16", + config.end = 4 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 3 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_444", + PartOfOutputName = "Split_444", + size = array, + start = array} : (tensor<1x4x180x320xbf16>) -> tensor<1x1x180x320xbf16> loc(#loc305) + xten_nn.output %461 : tensor<1x1x180x320xbf16> loc(#loc305) + } -> tensor<1x1x180x320xbf16> loc(#loc305) + %457 = xten_nn.subgraph (%arg5 = %456: tensor<1x1x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_447", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Clip_447", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x1x180x320xbf16>) attributes { + LayerName = "Clip_447", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Clip_447", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 7, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 1, 180, 320]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 1.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_447", + OutputName = "Clip_447", + max_fp = 1.000000e+00 : f32, + max_int = 1 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x1x180x320xbf16>) -> tensor<1x1x180x320xbf16> loc(#loc307) + xten_nn.output %462 : tensor<1x1x180x320xbf16> loc(#loc307) + } -> tensor<1x1x180x320xbf16> loc(#loc307) + xten_nn.output %461 : tensor<1x1x180x320xbf16> loc(#loc307) + } -> tensor<1x1x180x320xbf16> loc(#loc307) + %458 = xten_nn.subgraph (%arg5 = %455: tensor<1x4x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Split_444_Duplicated#0", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 4, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 4, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Split_444_Duplicated#0", + Overlay = "1x1_1x1_unspecifiedConnectivity", + Reason = "TemplatedGraph", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_extend_start = dense<0> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "SliceHCWC8Adf", + With = { + config.aie_arch = "aie2p", + config.axis_letter = "C", + config.dim_c = 8 : ui32, + config.dim_h = 180 : ui32, + config.dim_w = 320 : ui32, + config.dtype = "bfloat16", + config.end = 3 : ui32, + config.num_ifm_shim_ch = 2 : ui32, + config.num_ofm_shim_ch = 2 : ui32, + config.start = 0 : ui32, + config.step = 1 : ui32 + }} { + %461 = tosa.slice %arg5 { + PartOfLayerName = "Split_444", + PartOfOutputName = "Split_444", + size = array, + start = array} : (tensor<1x4x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc305) + xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc305) + } -> tensor<1x3x180x320xbf16> loc(#loc305) + %459 = xten_nn.subgraph (%arg5 = %458: tensor<1x3x180x320xbf16>, %arg6 = %166: tensor<1x3x180x320xbf16>) attributes { + IfmOperands = [0 : index, 1 : index], + LayerName = "Add_445", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Add_445", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg7 = %arg5: tensor<1x3x180x320xbf16>, %arg8 = %arg6: tensor<1x3x180x320xbf16>) attributes { + LayerName = "Add_445", + Operands = [ + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm1", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + }, + { + CurrentDataFormat = "NCHW", + External = false, + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm2", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Add_445", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "AddBf16", + Traits = { + Binary = true, + Elementwise = true + }, + With = { + config.act = 0 : ui8, + config.act_type = "LINEAR", + config.aie_arch = "aie2p", + config.compiler = "chess", + config.dtype = "bfloat16", + config.num_kernel_iters = 0 : ui16 + }} { + %462 = tosa.add %arg7, %arg8 {LayerName = "Add_445", OutputName = "Add_445"} : (tensor<1x3x180x320xbf16>, tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc11) + xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc11) + } -> tensor<1x3x180x320xbf16> loc(#loc11) + xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc11) + } -> tensor<1x3x180x320xbf16> loc(#loc11) + %460 = xten_nn.subgraph (%arg5 = %459: tensor<1x3x180x320xbf16>) attributes { + IfmOperands = [0 : index], + LayerName = "Clip_446", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Clip_446", + Overlay = "4x4_1x4_vertBroadcastLeft_horizBroadcastRight", + Reason = "InCoreChain", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + memory_configuration = { + L1 = {layout = "strict"}, + L2 = {feature_maps_buffering = "double", layout = "flexible"} + }} { + %461 = xten_nn.subgraph (%arg6 = %arg5: tensor<1x3x180x320xbf16>) attributes { + LayerName = "Clip_446", + Operands = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ifm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + OutputName = "Clip_446", + Reason = "MllibKernel", + Results = [ + { + CurrentDataFormat = "NCHW", + L3DataFormat = "HCWN", + L3Vectorization = "C:8", + Port = "data_io.ofm", + l3_extend_end = dense<[0, 5, 0, 0]> : vector<4xindex>, + l3_tile_count = dense<[1, 3, 180, 320]> : vector<4xindex> + } + ], + Specializes = "ClipBf16", + Traits = { + Elementwise = true, + NonNegativeOut = true, + Unary = true + }, + With = { + config.aie_arch = "aie2p", + config.clamp_max = 1.000000e+00 : bf16, + config.clamp_min = 0.000000e+00 : bf16, + config.compiler = "chess", + config.ifm_shift = 0 : si8, + config.num_kernel_iters = 0 : ui16, + config.ofm_shift = 0 : si8 + }} { + %462 = tosa.clamp %arg6 { + LayerName = "Clip_446", + OutputName = "Clip_446", + max_fp = 1.000000e+00 : f32, + max_int = 1 : i64, + min_fp = 0.000000e+00 : f32, + min_int = 0 : i64} : (tensor<1x3x180x320xbf16>) -> tensor<1x3x180x320xbf16> loc(#loc306) + xten_nn.output %462 : tensor<1x3x180x320xbf16> loc(#loc306) + } -> tensor<1x3x180x320xbf16> loc(#loc306) + xten_nn.output %461 : tensor<1x3x180x320xbf16> loc(#loc306) + } -> tensor<1x3x180x320xbf16> loc(#loc306) + return %449, %430, %410, %387, %460, %457 : tensor<1x16x90x160xbf16>, tensor<1x20x45x80xbf16>, tensor<1x40x23x40xbf16>, tensor<1x64x12x20xbf16>, tensor<1x3x180x320xbf16>, tensor<1x1x180x320xbf16> loc(#loc308) + } loc(#loc308) +} loc(#loc) +#loc1 = loc("Div_2") +#loc2 = loc("Sub_431") +#loc3 = loc("Sub_411") +#loc4 = loc("Sub_385") +#loc5 = loc("Sub_359") +#loc6 = loc("Div_16") +#loc7 = loc("Sub_14") +#loc8 = loc("Initializer_398") +#loc9 = loc("Slice_7") +#loc10 = loc("CompilerGeneratedLoc") +#loc11 = loc("Add_445") +#loc12 = loc("AveragePool_346") +#loc13 = loc("Conv_17") +#loc14 = loc("Add_19") +#loc15 = loc("Clip_22") +#loc16 = loc("Div_24") +#loc17 = loc("Mul_25") +#loc18 = loc("Conv_26") +#loc19 = loc("Relu_27") +#loc20 = loc("Conv_28") +#loc21 = loc("Add_29") +#loc22 = loc("Conv_30") +#loc23 = loc("Relu_31") +#loc24 = loc("Conv_32") +#loc25 = loc("Relu_33") +#loc26 = loc("Conv_34") +#loc27 = loc("Conv_35") +#loc28 = loc("Relu_36") +#loc29 = loc("Conv_37") +#loc30 = loc("Relu_38") +#loc31 = loc("Conv_39") +#loc32 = loc("Add_40") +#loc33 = loc("Conv_41") +#loc34 = loc("Relu_42") +#loc35 = loc("Conv_43") +#loc36 = loc("Relu_44") +#loc37 = loc("GlobalAveragePool_45") +#loc38 = loc("Conv_46") +#loc39 = loc("Relu_47") +#loc40 = loc("Conv_48") +#loc41 = loc("Add_50") +#loc42 = loc("Clip_53") +#loc43 = loc("Div_55") +#loc44 = loc("Mul_56") +#loc45 = loc("Conv_57") +#loc46 = loc("Conv_58") +#loc47 = loc("Relu_59") +#loc48 = loc("Conv_60") +#loc49 = loc("Relu_61") +#loc50 = loc("GlobalAveragePool_62") +#loc51 = loc("Conv_63") +#loc52 = loc("Relu_64") +#loc53 = loc("Conv_65") +#loc54 = loc("Add_67") +#loc55 = loc("Clip_70") +#loc56 = loc("Div_72") +#loc57 = loc("Mul_73") +#loc58 = loc("Conv_74") +#loc59 = loc("Add_75") +#loc60 = loc("Conv_76") +#loc61 = loc("Relu_77") +#loc62 = loc("Conv_78") +#loc63 = loc("Relu_79") +#loc64 = loc("GlobalAveragePool_80") +#loc65 = loc("Conv_81") +#loc66 = loc("Relu_82") +#loc67 = loc("Conv_83") +#loc68 = loc("Add_85") +#loc69 = loc("Clip_88") +#loc70 = loc("Div_90") +#loc71 = loc("Mul_91") +#loc72 = loc("Conv_92") +#loc73 = loc("Add_93") +#loc74 = loc("Conv_94") +#loc75 = loc("Add_96") +#loc76 = loc("Clip_99") +#loc77 = loc("Div_101") +#loc78 = loc("Mul_102") +#loc79 = loc("Conv_103") +#loc80 = loc("Add_105") +#loc81 = loc("Clip_108") +#loc82 = loc("Div_110") +#loc83 = loc("Mul_111") +#loc84 = loc("Conv_112") +#loc85 = loc("Conv_113") +#loc86 = loc("Add_115") +#loc87 = loc("Clip_118") +#loc88 = loc("Div_120") +#loc89 = loc("Mul_121") +#loc90 = loc("Conv_122") +#loc91 = loc("Add_124") +#loc92 = loc("Clip_127") +#loc93 = loc("Div_129") +#loc94 = loc("Mul_130") +#loc95 = loc("Conv_131") +#loc96 = loc("Add_132") +#loc97 = loc("Conv_133") +#loc98 = loc("Add_135") +#loc99 = loc("Clip_138") +#loc100 = loc("Div_140") +#loc101 = loc("Mul_141") +#loc102 = loc("Conv_142") +#loc103 = loc("Add_144") +#loc104 = loc("Clip_147") +#loc105 = loc("Div_149") +#loc106 = loc("Mul_150") +#loc107 = loc("Conv_151") +#loc108 = loc("Add_152") +#loc109 = loc("Conv_153") +#loc110 = loc("Add_155") +#loc111 = loc("Clip_158") +#loc112 = loc("Div_160") +#loc113 = loc("Mul_161") +#loc114 = loc("Conv_162") +#loc115 = loc("Add_164") +#loc116 = loc("Clip_167") +#loc117 = loc("Div_169") +#loc118 = loc("Mul_170") +#loc119 = loc("Conv_171") +#loc120 = loc("Add_172") +#loc121 = loc("Conv_173") +#loc122 = loc("Add_175") +#loc123 = loc("Clip_178") +#loc124 = loc("Div_180") +#loc125 = loc("Mul_181") +#loc126 = loc("Conv_182") +#loc127 = loc("Add_184") +#loc128 = loc("Clip_187") +#loc129 = loc("Div_189") +#loc130 = loc("Mul_190") +#loc131 = loc("GlobalAveragePool_191") +#loc132 = loc("Conv_192") +#loc133 = loc("Relu_193") +#loc134 = loc("Conv_194") +#loc135 = loc("Add_196") +#loc136 = loc("Clip_199") +#loc137 = loc("Div_201") +#loc138 = loc("Mul_202") +#loc139 = loc("Conv_203") +#loc140 = loc("Conv_204") +#loc141 = loc("Add_206") +#loc142 = loc("Clip_209") +#loc143 = loc("Div_211") +#loc144 = loc("Mul_212") +#loc145 = loc("Conv_213") +#loc146 = loc("Add_215") +#loc147 = loc("Clip_218") +#loc148 = loc("Div_220") +#loc149 = loc("Mul_221") +#loc150 = loc("GlobalAveragePool_222") +#loc151 = loc("Conv_223") +#loc152 = loc("Relu_224") +#loc153 = loc("Conv_225") +#loc154 = loc("Add_227") +#loc155 = loc("Clip_230") +#loc156 = loc("Div_232") +#loc157 = loc("Mul_233") +#loc158 = loc("Conv_234") +#loc159 = loc("Add_235") +#loc160 = loc("Conv_236") +#loc161 = loc("Add_238") +#loc162 = loc("Clip_241") +#loc163 = loc("Div_243") +#loc164 = loc("Mul_244") +#loc165 = loc("Conv_245") +#loc166 = loc("Add_247") +#loc167 = loc("Clip_250") +#loc168 = loc("Div_252") +#loc169 = loc("Mul_253") +#loc170 = loc("GlobalAveragePool_254") +#loc171 = loc("Conv_255") +#loc172 = loc("Relu_256") +#loc173 = loc("Conv_257") +#loc174 = loc("Add_259") +#loc175 = loc("Clip_262") +#loc176 = loc("Div_264") +#loc177 = loc("Mul_265") +#loc178 = loc("Conv_266") +#loc179 = loc("Conv_267") +#loc180 = loc("Add_269") +#loc181 = loc("Clip_272") +#loc182 = loc("Div_274") +#loc183 = loc("Mul_275") +#loc184 = loc("Conv_276") +#loc185 = loc("Add_278") +#loc186 = loc("Clip_281") +#loc187 = loc("Div_283") +#loc188 = loc("Mul_284") +#loc189 = loc("GlobalAveragePool_285") +#loc190 = loc("Conv_286") +#loc191 = loc("Relu_287") +#loc192 = loc("Conv_288") +#loc193 = loc("Add_290") +#loc194 = loc("Clip_293") +#loc195 = loc("Div_295") +#loc196 = loc("Mul_296") +#loc197 = loc("Conv_297") +#loc198 = loc("Add_298") +#loc199 = loc("Conv_299") +#loc200 = loc("Add_301") +#loc201 = loc("Clip_304") +#loc202 = loc("Div_306") +#loc203 = loc("Mul_307") +#loc204 = loc("Conv_308") +#loc205 = loc("Add_310") +#loc206 = loc("Clip_313") +#loc207 = loc("Div_315") +#loc208 = loc("Mul_316") +#loc209 = loc("GlobalAveragePool_317") +#loc210 = loc("Conv_318") +#loc211 = loc("Relu_319") +#loc212 = loc("Conv_320") +#loc213 = loc("Add_322") +#loc214 = loc("Clip_325") +#loc215 = loc("Div_327") +#loc216 = loc("Mul_328") +#loc217 = loc("Conv_329") +#loc218 = loc("Add_330") +#loc219 = loc("Conv_331") +#loc220 = loc("Add_333") +#loc221 = loc("Clip_336") +#loc222 = loc("Div_338") +#loc223 = loc("Mul_339") +#loc224 = loc("GlobalAveragePool_342") +#loc225 = loc("Conv_343") +#loc226 = loc("Sigmoid_344") +#loc227 = loc("Mul_345") +#loc228 = loc("Conv_340") +#loc229 = loc("Relu_341") +#loc230 = loc("Split_349") +#loc231 = loc("Concat_350") +#loc232 = loc("Conv_351") +#loc233 = loc("Sigmoid_352") +#loc234 = loc("Split_353") +#loc235 = loc("Mul_354") +#loc236 = loc("Concat_355") +#loc237 = loc("Conv_356") +#loc238 = loc("Tanh_357") +#loc239 = loc("Mul_361") +#loc240 = loc("Mul_360") +#loc241 = loc("Add_362") +#loc242 = loc("Concat_363") +#loc243 = loc("Resize_365") +#loc244 = loc("Slice_371") +#loc245 = loc("AveragePool_347") +#loc246 = loc("AveragePool_348") +#loc247 = loc("Concat_372") +#loc248 = loc("Conv_373") +#loc249 = loc("Relu_374") +#loc250 = loc("Split_375") +#loc251 = loc("Concat_376") +#loc252 = loc("Conv_377") +#loc253 = loc("Sigmoid_378") +#loc254 = loc("Split_379") +#loc255 = loc("Mul_380") +#loc256 = loc("Concat_381") +#loc257 = loc("Conv_382") +#loc258 = loc("Tanh_383") +#loc259 = loc("Mul_387") +#loc260 = loc("Mul_386") +#loc261 = loc("Add_388") +#loc262 = loc("Concat_389") +#loc263 = loc("Resize_391") +#loc264 = loc("Slice_397") +#loc265 = loc("Concat_398") +#loc266 = loc("Conv_399") +#loc267 = loc("Relu_400") +#loc268 = loc("Split_401") +#loc269 = loc("Concat_402") +#loc270 = loc("Conv_403") +#loc271 = loc("Sigmoid_404") +#loc272 = loc("Split_405") +#loc273 = loc("Mul_406") +#loc274 = loc("Concat_407") +#loc275 = loc("Conv_408") +#loc276 = loc("Tanh_409") +#loc277 = loc("Mul_413") +#loc278 = loc("Mul_412") +#loc279 = loc("Add_414") +#loc280 = loc("Concat_415") +#loc281 = loc("Resize_417") +#loc282 = loc("Concat_418") +#loc283 = loc("Conv_419") +#loc284 = loc("Relu_420") +#loc285 = loc("Split_421") +#loc286 = loc("Concat_422") +#loc287 = loc("Conv_423") +#loc288 = loc("Sigmoid_424") +#loc289 = loc("Split_425") +#loc290 = loc("Mul_426") +#loc291 = loc("Concat_427") +#loc292 = loc("Conv_428") +#loc293 = loc("Tanh_429") +#loc294 = loc("Mul_433") +#loc295 = loc("Mul_432") +#loc296 = loc("Add_434") +#loc297 = loc("Concat_435") +#loc298 = loc("Resize_437") +#loc299 = loc("Concat_438") +#loc300 = loc("Conv_439") +#loc301 = loc("Relu_440") +#loc302 = loc("Conv_441") +#loc303 = loc("Relu_442") +#loc304 = loc("Conv_443") +#loc305 = loc("Split_444") +#loc306 = loc("Clip_446") +#loc307 = loc("Clip_447") +#loc308 = loc(fused[#loc1, #loc2, #loc3, #loc4, #loc5, #loc6, #loc7, #loc8, #loc9, #loc10, #loc11, #loc12, #loc13, #loc14, #loc15, #loc16, #loc17, #loc18, #loc19, #loc20, #loc21, #loc22, #loc23, #loc24, #loc25, #loc26, #loc27, #loc28, #loc29, #loc30, #loc31, #loc32, #loc33, #loc34, #loc35, #loc36, #loc37, #loc38, #loc39, #loc40, #loc41, #loc42, #loc43, #loc44, #loc45, #loc46, #loc47, #loc48, #loc49, #loc50, #loc51, #loc52, #loc53, #loc54, #loc55, #loc56, #loc57, #loc58, #loc59, #loc60, #loc61, #loc62, #loc63, #loc64, #loc65, #loc66, #loc67, #loc68, #loc69, #loc70, #loc71, #loc72, #loc73, #loc74, #loc75, #loc76, #loc77, #loc78, #loc79, #loc80, #loc81, #loc82, #loc83, #loc84, #loc85, #loc86, #loc87, #loc88, #loc89, #loc90, #loc91, #loc92, #loc93, #loc94, #loc95, #loc96, #loc97, #loc98, #loc99, #loc100, #loc101, #loc102, #loc103, #loc104, #loc105, #loc106, #loc107, #loc108, #loc109, #loc110, #loc111, #loc112, #loc113, #loc114, #loc115, #loc116, #loc117, #loc118, #loc119, #loc120, #loc121, #loc122, #loc123, #loc124, #loc125, #loc126, #loc127, #loc128, #loc129, #loc130, #loc131, #loc132, #loc133, #loc134, #loc135, #loc136, #loc137, #loc138, #loc139, #loc140, #loc141, #loc142, #loc143, #loc144, #loc145, #loc146, #loc147, #loc148, #loc149, #loc150, #loc151, #loc152, #loc153, #loc154, #loc155, #loc156, #loc157, #loc158, #loc159, #loc160, #loc161, #loc162, #loc163, #loc164, #loc165, #loc166, #loc167, #loc168, #loc169, #loc170, #loc171, #loc172, #loc173, #loc174, #loc175, #loc176, #loc177, #loc178, #loc179, #loc180, #loc181, #loc182, #loc183, #loc184, #loc185, #loc186, #loc187, #loc188, #loc189, #loc190, #loc191, #loc192, #loc193, #loc194, #loc195, #loc196, #loc197, #loc198, #loc199, #loc200, #loc201, #loc202, #loc203, #loc204, #loc205, #loc206, #loc207, #loc208, #loc209, #loc210, #loc211, #loc212, #loc213, #loc214, #loc215, #loc216, #loc217, #loc218, #loc219, #loc220, #loc221, #loc222, #loc223, #loc224, #loc225, #loc226, #loc227, #loc228, #loc229, #loc230, #loc231, #loc232, #loc233, #loc234, #loc235, #loc236, #loc237, #loc238, #loc239, #loc240, #loc241, #loc242, #loc243, #loc244, #loc245, #loc246, #loc247, #loc248, #loc249, #loc250, #loc251, #loc252, #loc253, #loc254, #loc255, #loc256, #loc257, #loc258, #loc259, #loc260, #loc261, #loc262, #loc263, #loc264, #loc265, #loc266, #loc267, #loc268, #loc269, #loc270, #loc271, #loc272, #loc273, #loc274, #loc275, #loc276, #loc277, #loc278, #loc279, #loc280, #loc281, #loc282, #loc283, #loc284, #loc285, #loc286, #loc287, #loc288, #loc289, #loc290, #loc291, #loc292, #loc293, #loc294, #loc295, #loc296, #loc297, #loc298, #loc299, #loc300, #loc301, #loc302, #loc303, #loc304, #loc305, #loc306, #loc307]) +#loc309 = loc(fused[#loc7, #loc8]) +#loc310 = loc(fused[#loc11, #loc9, #loc12]) +#loc311 = loc(fused[#loc9, #loc12, #loc11]) +#loc312 = loc(fused[#loc18, #loc19]) +#loc313 = loc(fused[#loc20, #loc21]) +#loc314 = loc(fused[#loc22, #loc23]) +#loc315 = loc(fused[#loc24, #loc25]) +#loc316 = loc(fused[#loc27, #loc28]) +#loc317 = loc(fused[#loc29, #loc30]) +#loc318 = loc(fused[#loc31, #loc32]) +#loc319 = loc(fused[#loc33, #loc34]) +#loc320 = loc(fused[#loc35, #loc36]) +#loc321 = loc(fused[#loc38, #loc39]) +#loc322 = loc(fused[#loc46, #loc47]) +#loc323 = loc(fused[#loc48, #loc49]) +#loc324 = loc(fused[#loc51, #loc52]) +#loc325 = loc(fused[#loc58, #loc59]) +#loc326 = loc(fused[#loc60, #loc61]) +#loc327 = loc(fused[#loc62, #loc63]) +#loc328 = loc(fused[#loc65, #loc66]) +#loc329 = loc(fused[#loc72, #loc73]) +#loc330 = loc(fused[#loc95, #loc96]) +#loc331 = loc(fused[#loc107, #loc108]) +#loc332 = loc(fused[#loc119, #loc120]) +#loc333 = loc(fused[#loc130, #loc131]) +#loc334 = loc(fused[#loc132, #loc133]) +#loc335 = loc(fused[#loc149, #loc150]) +#loc336 = loc(fused[#loc151, #loc152]) +#loc337 = loc(fused[#loc158, #loc159]) +#loc338 = loc(fused[#loc169, #loc170]) +#loc339 = loc(fused[#loc171, #loc172]) +#loc340 = loc(fused[#loc188, #loc189]) +#loc341 = loc(fused[#loc190, #loc191]) +#loc342 = loc(fused[#loc197, #loc198]) +#loc343 = loc(fused[#loc208, #loc209]) +#loc344 = loc(fused[#loc210, #loc211]) +#loc345 = loc(fused[#loc217, #loc218]) +#loc346 = loc(fused[#loc223, #loc224]) +#loc347 = loc(fused[#loc228, #loc229, #loc227]) +#loc348 = loc(fused[#loc228, #loc229]) +#loc349 = loc(fused[#loc248, #loc249]) +#loc350 = loc(fused[#loc266, #loc267]) +#loc351 = loc(fused[#loc283, #loc284]) +#loc352 = loc(fused[#loc300, #loc301]) +#loc353 = loc(fused[#loc302, #loc303])