diff --git a/.gitignore b/.gitignore index bfe453b..d72f22a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *AutoencoderKL-fp16.bin *FrozenCLIPEmbedder-fp16.bin *UNetModel-fp16.bin -*UNetModel-MHA-fp16.bin \ No newline at end of file +*UNetModel-MHA-fp16.bin +*AutoencoderKL-encoder-512-512-fp16.bin \ No newline at end of file diff --git a/x86/exe/assets/AutoencoderKL-encoder-512-512-fp16.param b/x86/exe/assets/AutoencoderKL-encoder-512-512-fp16.param new file mode 100644 index 0000000..f831bd7 --- /dev/null +++ b/x86/exe/assets/AutoencoderKL-encoder-512-512-fp16.param @@ -0,0 +1,108 @@ +7767517 +106 118 +Input in0 0 1 in0 +Convolution conv_3 1 1 in0 1 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=3456 +Split splitncnn_0 1 2 1 2 3 +GroupNorm gn_31 1 1 3 4 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_54 1 1 4 5 +Convolution conv_4 1 1 5 6 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +GroupNorm gn_32 1 1 6 7 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_55 1 1 7 8 +Convolution conv_5 1 1 8 9 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +BinaryOp add_0 2 1 2 9 10 0=0 +Split splitncnn_1 1 2 10 11 12 +GroupNorm gn_33 1 1 12 13 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_56 1 1 13 14 +Convolution conv_6 1 1 14 15 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +GroupNorm gn_34 1 1 15 16 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_57 1 1 16 17 +Convolution conv_7 1 1 17 18 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +BinaryOp add_1 2 1 11 18 19 0=0 +Padding pad_0 1 1 19 20 0=0 1=1 2=0 3=1 4=0 5=0.000000e+00 6=0 +Convolution conv_8 1 1 20 21 0=128 1=3 11=3 12=1 13=2 14=0 2=1 3=2 4=0 5=1 6=147456 +Split splitncnn_2 1 2 21 22 23 +GroupNorm gn_35 1 1 23 24 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_58 1 1 24 25 +Convolution conv_9 1 1 25 26 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=294912 +GroupNorm gn_36 1 1 26 27 0=32 1=256 2=1.000000e-06 3=1 +Convolution conv_11 1 1 22 28 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=32768 +Swish silu_59 1 1 27 29 +Convolution conv_10 1 1 29 30 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=589824 +BinaryOp add_2 2 1 28 30 31 0=0 +Split splitncnn_3 1 2 31 32 33 +GroupNorm gn_37 1 1 33 34 0=32 1=256 2=1.000000e-06 3=1 +Swish silu_60 1 1 34 35 +Convolution conv_12 1 1 35 36 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=589824 +GroupNorm gn_38 1 1 36 37 0=32 1=256 2=1.000000e-06 3=1 +Swish silu_61 1 1 37 38 +Convolution conv_13 1 1 38 39 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=589824 +BinaryOp add_3 2 1 32 39 40 0=0 +Padding pad_1 1 1 40 41 0=0 1=1 2=0 3=1 4=0 5=0.000000e+00 6=0 +Convolution conv_14 1 1 41 42 0=256 1=3 11=3 12=1 13=2 14=0 2=1 3=2 4=0 5=1 6=589824 +Split splitncnn_4 1 2 42 43 44 +GroupNorm gn_39 1 1 44 45 0=32 1=256 2=1.000000e-06 3=1 +Swish silu_62 1 1 45 46 +Convolution conv_15 1 1 46 47 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=1179648 +GroupNorm gn_40 1 1 47 48 0=32 1=512 2=1.000000e-06 3=1 +Convolution conv_17 1 1 43 49 0=512 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=131072 +Swish silu_63 1 1 48 50 +Convolution conv_16 1 1 50 51 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_4 2 1 49 51 52 0=0 +Split splitncnn_5 1 2 52 53 54 +GroupNorm gn_41 1 1 54 55 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_64 1 1 55 56 +Convolution conv_18 1 1 56 57 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_42 1 1 57 58 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_65 1 1 58 59 +Convolution conv_19 1 1 59 60 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_5 2 1 53 60 61 0=0 +Padding pad_2 1 1 61 62 0=0 1=1 2=0 3=1 4=0 5=0.000000e+00 6=0 +Convolution conv_20 1 1 62 63 0=512 1=3 11=3 12=1 13=2 14=0 2=1 3=2 4=0 5=1 6=2359296 +Split splitncnn_6 1 2 63 64 65 +GroupNorm gn_43 1 1 65 66 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_66 1 1 66 67 +Convolution conv_21 1 1 67 68 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_44 1 1 68 69 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_67 1 1 69 70 +Convolution conv_22 1 1 70 71 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_6 2 1 64 71 72 0=0 +Split splitncnn_7 1 2 72 73 74 +GroupNorm gn_45 1 1 74 75 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_68 1 1 75 76 +Convolution conv_23 1 1 76 77 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_46 1 1 77 78 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_69 1 1 78 79 +Convolution conv_24 1 1 79 80 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_7 2 1 73 80 81 0=0 +Split splitncnn_8 1 2 81 82 83 +GroupNorm gn_47 1 1 83 84 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_70 1 1 84 85 +Convolution conv_25 1 1 85 86 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_48 1 1 86 87 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_71 1 1 87 88 +Convolution conv_26 1 1 88 89 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_8 2 1 82 89 90 0=0 +Split splitncnn_9 1 2 90 91 92 +GroupNorm gn_49 1 1 92 93 0=32 1=512 2=1.000000e-06 3=1 +Reshape view_76 1 1 93 94 0=4096 1=512 +Permute transpose_78 1 1 94 95 0=1 +MultiHeadAttention attention_53 1 1 95 96 0=512 1=1 2=262144 3=512 4=512 +Permute transpose_79 1 1 96 97 0=1 +Reshape reshape_75 1 1 97 98 0=64 1=64 2=512 +BinaryOp add_9 2 1 98 91 99 0=0 +Split splitncnn_10 1 2 99 100 101 +GroupNorm gn_50 1 1 101 102 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_72 1 1 102 103 +Convolution conv_27 1 1 103 104 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_51 1 1 104 105 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_73 1 1 105 106 +Convolution conv_28 1 1 106 107 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_10 2 1 100 107 108 0=0 +GroupNorm gn_52 1 1 108 109 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_74 1 1 109 110 +Convolution conv_29 1 1 110 111 0=8 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 +Convolution conv_30 1 1 111 112 0=8 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=64 +Slice chunk_0 1 2 112 out0 114 -23300=2,-233,-233 1=0 +Clip clamp_77 1 1 114 115 0=-3.000000e+01 1=2.000000e+01 +BinaryOp mul_11 1 1 115 116 0=2 1=1 2=5.000000e-01 +UnaryOp exp_12 1 1 116 out1 0=7 diff --git a/x86/exe/assets/tmp-AutoencoderKL-384-256-fp16.param b/x86/exe/assets/tmp-AutoencoderKL-384-256-fp16.param deleted file mode 100644 index 333657b..0000000 --- a/x86/exe/assets/tmp-AutoencoderKL-384-256-fp16.param +++ /dev/null @@ -1,146 +0,0 @@ -7767517 -144 161 -Input input.1 0 1 input.1 -Convolution Conv_4 1 1 input.1 input 0=4 1=1 5=1 6=16 -Convolution Conv_5 1 1 input input.4 0=512 1=3 4=1 5=1 6=18432 -Split splitncnn_0 1 2 input.4 input.4_splitncnn_0 input.4_splitncnn_1 -GroupNorm Add_14 1 1 input.4_splitncnn_1 h 0=32 1=512 2=1.000000e-06 -Swish Mul_16 1 1 h input.8 -Convolution Conv_17 1 1 input.8 input.12 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_26 1 1 input.12 h.3 0=32 1=512 2=1.000000e-06 -Swish Mul_28 1 1 h.3 input.16 -Convolution Conv_29 1 1 input.16 onnx::Add_282 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_30 2 1 input.4_splitncnn_0 onnx::Add_282 input.20 -Split splitncnn_1 1 2 input.20 input.20_splitncnn_0 input.20_splitncnn_1 -GroupNorm Add_39 1 1 input.20_splitncnn_1 input.24 0=32 1=512 2=1.000000e-06 -Split splitncnn_2 1 3 input.24 input.24_splitncnn_0 input.24_splitncnn_1 input.24_splitncnn_2 -Convolution Conv_40 1 1 input.24_splitncnn_2 onnx::Reshape_297 0=512 1=1 5=1 6=262144 -Convolution Conv_41 1 1 input.24_splitncnn_1 onnx::Reshape_298 0=512 1=1 5=1 6=262144 -Convolution Conv_42 1 1 input.24_splitncnn_0 onnx::Reshape_299 0=512 1=1 5=1 6=262144 -Reshape Reshape_43 1 1 onnx::Reshape_297 onnx::Transpose_312 0=1536 1=512 -Permute Transpose_44 1 1 onnx::Transpose_312 onnx::MatMul_313 0=1 -Reshape Reshape_45 1 1 onnx::Reshape_298 onnx::MatMul_322 0=1536 1=512 -Gemm MatMul_46 2 1 onnx::MatMul_313 onnx::MatMul_322 onnx::Mul_323 -BinaryOp Mul_48 1 1 onnx::Mul_323 w2 0=2 1=1 2=4.419417e-02 -Softmax Softmax_49 1 1 w2 onnx::Cast_326 0=1 1=1 -Reshape Reshape_51 1 1 onnx::Reshape_299 onnx::MatMul_336 0=1536 1=512 -Permute Transpose_52 1 1 onnx::Cast_326 onnx::MatMul_337 0=1 -Gemm MatMul_53 2 1 onnx::MatMul_336 onnx::MatMul_337 onnx::Reshape_338 -Reshape Reshape_54 1 1 onnx::Reshape_338 input.28 0=32 1=48 2=512 -Convolution Conv_55 1 1 input.28 onnx::Add_349 0=512 1=1 5=1 6=262144 -BinaryOp Add_56 2 1 onnx::Add_349 input.20_splitncnn_0 onnx::Reshape_350 -Split splitncnn_3 1 2 onnx::Reshape_350 onnx::Reshape_350_splitncnn_0 onnx::Reshape_350_splitncnn_1 -GroupNorm Add_65 1 1 onnx::Reshape_350_splitncnn_1 h.7 0=32 1=512 2=1.000000e-06 -Swish Mul_67 1 1 h.7 input.32 -Convolution Conv_68 1 1 input.32 input.36 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_77 1 1 input.36 h.11 0=32 1=512 2=1.000000e-06 -Swish Mul_79 1 1 h.11 input.40 -Convolution Conv_80 1 1 input.40 onnx::Add_382 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_81 2 1 onnx::Reshape_350_splitncnn_0 onnx::Add_382 input.44 -Split splitncnn_4 1 2 input.44 input.44_splitncnn_0 input.44_splitncnn_1 -GroupNorm Add_90 1 1 input.44_splitncnn_1 h.15 0=32 1=512 2=1.000000e-06 -Swish Mul_92 1 1 h.15 input.48 -Convolution Conv_93 1 1 input.48 input.52 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_102 1 1 input.52 h.19 0=32 1=512 2=1.000000e-06 -Swish Mul_104 1 1 h.19 input.56 -Convolution Conv_105 1 1 input.56 onnx::Add_415 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_106 2 1 input.44_splitncnn_0 onnx::Add_415 input.60 -Split splitncnn_5 1 2 input.60 input.60_splitncnn_0 input.60_splitncnn_1 -GroupNorm Add_115 1 1 input.60_splitncnn_1 h.23 0=32 1=512 2=1.000000e-06 -Swish Mul_117 1 1 h.23 input.64 -Convolution Conv_118 1 1 input.64 input.68 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_127 1 1 input.68 h.27 0=32 1=512 2=1.000000e-06 -Swish Mul_129 1 1 h.27 input.72 -Convolution Conv_130 1 1 input.72 onnx::Add_448 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_131 2 1 input.60_splitncnn_0 onnx::Add_448 input.76 -Split splitncnn_6 1 2 input.76 input.76_splitncnn_0 input.76_splitncnn_1 -GroupNorm Add_140 1 1 input.76_splitncnn_1 h.31 0=32 1=512 2=1.000000e-06 -Swish Mul_142 1 1 h.31 input.80 -Convolution Conv_143 1 1 input.80 input.84 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_152 1 1 input.84 h.35 0=32 1=512 2=1.000000e-06 -Swish Mul_154 1 1 h.35 input.88 -Convolution Conv_155 1 1 input.88 onnx::Add_481 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_156 2 1 input.76_splitncnn_0 onnx::Add_481 x -Interp Resize_157 1 1 x input.92 0=1 1=2.000000e+00 2=2.000000e+00 -Convolution Conv_158 1 1 input.92 input.96 0=512 1=3 4=1 5=1 6=2359296 -Split splitncnn_7 1 2 input.96 input.96_splitncnn_0 input.96_splitncnn_1 -GroupNorm Add_167 1 1 input.96_splitncnn_1 h.39 0=32 1=512 2=1.000000e-06 -Swish Mul_169 1 1 h.39 input.100 -Convolution Conv_170 1 1 input.100 input.104 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_179 1 1 input.104 h.43 0=32 1=512 2=1.000000e-06 -Swish Mul_181 1 1 h.43 input.108 -Convolution Conv_182 1 1 input.108 onnx::Add_520 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_183 2 1 input.96_splitncnn_0 onnx::Add_520 input.112 -Split splitncnn_8 1 2 input.112 input.112_splitncnn_0 input.112_splitncnn_1 -GroupNorm Add_192 1 1 input.112_splitncnn_1 h.47 0=32 1=512 2=1.000000e-06 -Swish Mul_194 1 1 h.47 input.116 -Convolution Conv_195 1 1 input.116 input.120 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_204 1 1 input.120 h.51 0=32 1=512 2=1.000000e-06 -Swish Mul_206 1 1 h.51 input.124 -Convolution Conv_207 1 1 input.124 onnx::Add_553 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_208 2 1 input.112_splitncnn_0 onnx::Add_553 input.128 -Split splitncnn_9 1 2 input.128 input.128_splitncnn_0 input.128_splitncnn_1 -GroupNorm Add_217 1 1 input.128_splitncnn_1 h.55 0=32 1=512 2=1.000000e-06 -Swish Mul_219 1 1 h.55 input.132 -Convolution Conv_220 1 1 input.132 input.136 0=512 1=3 4=1 5=1 6=2359296 -GroupNorm Add_229 1 1 input.136 h.59 0=32 1=512 2=1.000000e-06 -Swish Mul_231 1 1 h.59 input.140 -Convolution Conv_232 1 1 input.140 onnx::Add_586 0=512 1=3 4=1 5=1 6=2359296 -BinaryOp Add_233 2 1 input.128_splitncnn_0 onnx::Add_586 x.3 -Interp Resize_234 1 1 x.3 input.144 0=1 1=2.000000e+00 2=2.000000e+00 -Convolution Conv_235 1 1 input.144 input.148 0=512 1=3 4=1 5=1 6=2359296 -Split splitncnn_10 1 2 input.148 input.148_splitncnn_0 input.148_splitncnn_1 -GroupNorm Add_244 1 1 input.148_splitncnn_1 h.63 0=32 1=512 2=1.000000e-06 -Swish Mul_246 1 1 h.63 input.152 -Convolution Conv_247 1 1 input.152 input.156 0=256 1=3 4=1 5=1 6=1179648 -GroupNorm Add_256 1 1 input.156 h.67 0=32 1=256 2=1.000000e-06 -Swish Mul_258 1 1 h.67 input.160 -Convolution Conv_259 1 1 input.160 onnx::Add_625 0=256 1=3 4=1 5=1 6=589824 -Convolution Conv_260 1 1 input.148_splitncnn_0 onnx::Add_626 0=256 1=1 5=1 6=131072 -BinaryOp Add_261 2 1 onnx::Add_626 onnx::Add_625 input.164 -Split splitncnn_11 1 2 input.164 input.164_splitncnn_0 input.164_splitncnn_1 -GroupNorm Add_270 1 1 input.164_splitncnn_1 h.71 0=32 1=256 2=1.000000e-06 -Swish Mul_272 1 1 h.71 input.168 -Convolution Conv_273 1 1 input.168 input.172 0=256 1=3 4=1 5=1 6=589824 -GroupNorm Add_282 1 1 input.172 h.75 0=32 1=256 2=1.000000e-06 -Swish Mul_284 1 1 h.75 input.176 -Convolution Conv_285 1 1 input.176 onnx::Add_659 0=256 1=3 4=1 5=1 6=589824 -BinaryOp Add_286 2 1 input.164_splitncnn_0 onnx::Add_659 input.180 -Split splitncnn_12 1 2 input.180 input.180_splitncnn_0 input.180_splitncnn_1 -GroupNorm Add_295 1 1 input.180_splitncnn_1 h.79 0=32 1=256 2=1.000000e-06 -Swish Mul_297 1 1 h.79 input.184 -Convolution Conv_298 1 1 input.184 input.188 0=256 1=3 4=1 5=1 6=589824 -GroupNorm Add_307 1 1 input.188 h.83 0=32 1=256 2=1.000000e-06 -Swish Mul_309 1 1 h.83 input.192 -Convolution Conv_310 1 1 input.192 onnx::Add_692 0=256 1=3 4=1 5=1 6=589824 -BinaryOp Add_311 2 1 input.180_splitncnn_0 onnx::Add_692 x.7 -Interp Resize_312 1 1 x.7 input.196 0=1 1=2.000000e+00 2=2.000000e+00 -Convolution Conv_313 1 1 input.196 input.200 0=256 1=3 4=1 5=1 6=589824 -Split splitncnn_13 1 2 input.200 input.200_splitncnn_0 input.200_splitncnn_1 -GroupNorm Add_322 1 1 input.200_splitncnn_1 h.87 0=32 1=256 2=1.000000e-06 -Swish Mul_324 1 1 h.87 input.204 -Convolution Conv_325 1 1 input.204 input.208 0=128 1=3 4=1 5=1 6=294912 -GroupNorm Add_334 1 1 input.208 h.91 0=32 1=128 2=1.000000e-06 -Swish Mul_336 1 1 h.91 input.212 -Convolution Conv_337 1 1 input.212 onnx::Add_731 0=128 1=3 4=1 5=1 6=147456 -Convolution Conv_338 1 1 input.200_splitncnn_0 onnx::Add_732 0=128 1=1 5=1 6=32768 -BinaryOp Add_339 2 1 onnx::Add_732 onnx::Add_731 input.216 -Split splitncnn_14 1 2 input.216 input.216_splitncnn_0 input.216_splitncnn_1 -GroupNorm Add_348 1 1 input.216_splitncnn_1 h.95 0=32 1=128 2=1.000000e-06 -Swish Mul_350 1 1 h.95 input.220 -Convolution Conv_351 1 1 input.220 input.224 0=128 1=3 4=1 5=1 6=147456 -GroupNorm Add_360 1 1 input.224 h.99 0=32 1=128 2=1.000000e-06 -Swish Mul_362 1 1 h.99 input.228 -Convolution Conv_363 1 1 input.228 onnx::Add_765 0=128 1=3 4=1 5=1 6=147456 -BinaryOp Add_364 2 1 input.216_splitncnn_0 onnx::Add_765 input.232 -Split splitncnn_15 1 2 input.232 input.232_splitncnn_0 input.232_splitncnn_1 -GroupNorm Add_373 1 1 input.232_splitncnn_1 h.103 0=32 1=128 2=1.000000e-06 -Swish Mul_375 1 1 h.103 input.236 -Convolution Conv_376 1 1 input.236 input.240 0=128 1=3 4=1 5=1 6=147456 -GroupNorm Add_385 1 1 input.240 h.107 0=32 1=128 2=1.000000e-06 -Swish Mul_387 1 1 h.107 input.244 -Convolution Conv_388 1 1 input.244 onnx::Add_798 0=128 1=3 4=1 5=1 6=147456 -BinaryOp Add_389 2 1 input.232_splitncnn_0 onnx::Add_798 input.248 -GroupNorm Add_398 1 1 input.248 h.111 0=32 1=128 2=1.000000e-06 -Swish Mul_400 1 1 h.111 input.252 -Convolution Conv_401 1 1 input.252 815 0=3 1=3 4=1 5=1 6=3456 diff --git a/x86/exe/assets/tmp-UNetModel-384-256-MHA-fp16.param b/x86/exe/assets/tmp-UNetModel-384-256-MHA-fp16.param deleted file mode 100644 index ff59e15..0000000 --- a/x86/exe/assets/tmp-UNetModel-384-256-MHA-fp16.param +++ /dev/null @@ -1,746 +0,0 @@ -7767517 -744 944 -Input in0 0 1 in0 -Input in1 0 1 in1 -Input in2 0 1 in2 -Input c_in 0 1 c_in -Input c_out 0 1 c_out -Split input_splitncnn 1 2 in0 in0_1 in0_2 -BinaryOp pre_mul 2 1 in0_1 c_in c_in_in0 0=2 -Split splitncnn_0 1 32 in2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 -Tile repeat_tile 1 1 in1 36 -MemoryData pnnx_fold_v_8.1 0 1 35 0=160 -BinaryOp mul_0 2 1 36 35 37 0=2 -Split splitncnn_1 1 2 37 38 39 -UnaryOp cos_1 1 1 38 40 0=10 -UnaryOp sin_2 1 1 39 41 0=9 -Concat cat_0 2 1 40 41 42 -InnerProduct linear_258 1 1 42 43 0=1280 1=1 2=409600 -Convolution conv_96 1 1 c_in_in0 44 0=320 1=3 4=1 5=1 6=11520 -Split splitncnn_2 1 3 44 45 46 47 -GroupNorm gn_16 1 1 47 48 0=32 1=320 2=1.000000e-05 -Swish silu_442 1 1 43 49 -InnerProduct linear_259 1 1 49 50 0=1280 1=1 2=1638400 -Swish silu_444 1 1 50 51 -Split splitncnn_3 1 22 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 -InnerProduct linear_260 1 1 73 74 0=320 1=1 2=409600 -Swish silu_443 1 1 48 75 -Convolution conv_97 1 1 75 76 0=320 1=3 4=1 5=1 6=921600 -ExpandDims unsqueeze_1034 1 1 74 77 -23303=1,1 -ExpandDims unsqueeze_1035 1 1 77 78 -23303=1,2 -BinaryOp add_3 2 1 76 78 79 -GroupNorm gn_17 1 1 79 80 0=32 1=320 2=1.000000e-05 -Swish silu_445 1 1 80 81 -Convolution conv_98 1 1 81 82 0=320 1=3 4=1 5=1 6=921600 -BinaryOp add_4 2 1 46 82 83 -Split splitncnn_4 1 2 83 84 85 -GroupNorm gn_194 1 1 85 86 0=32 1=320 2=1.000000e-06 -Convolution conv_99 1 1 86 87 0=320 1=1 5=1 6=102400 -Permute permute_841 1 1 87 88 0=3 -Reshape reshape_489 1 1 88 89 0=320 1=1536 -Split splitncnn_5 1 2 89 90 91 -LayerNorm ln_210 1 1 91 92 0=320 1=1.000000e-05 -Split splitncnn_6 1 3 92 93 94 95 -MultiHeadAttention linear_264 3 1 95 94 93 115 0=320 1=8 2=102400 3=320 4=320 -BinaryOp add_6 2 1 115 90 116 -Split splitncnn_7 1 2 116 117 118 -LayerNorm ln_211 1 1 118 119 0=320 1=1.000000e-05 -MultiHeadAttention linear_268 3 1 119 34 33 139 0=320 1=8 2=102400 3=768 4=768 -BinaryOp add_8 2 1 139 117 140 -Split splitncnn_8 1 2 140 141 142 -LayerNorm ln_212 1 1 142 143 0=320 1=1.000000e-05 -InnerProduct linear_269 1 1 143 144 0=2560 1=1 2=819200 -Slice split_0 1 2 144 145 146 -23300=2,1280,-233 1=1 -GELU gelu_0 1 1 146 147 0=1 -BinaryOp mul_9 2 1 145 147 148 0=2 -InnerProduct linear_270 1 1 148 149 0=320 1=1 2=409600 -BinaryOp add_10 2 1 149 141 150 -Reshape reshape_506 1 1 150 151 0=320 1=32 2=48 -Permute permute_852 1 1 151 152 0=4 -Convolution conv_100 1 1 152 153 0=320 1=1 5=1 6=102400 -BinaryOp add_11 2 1 153 84 154 -Split splitncnn_9 1 3 154 155 156 157 -GroupNorm gn_18 1 1 157 158 0=32 1=320 2=1.000000e-05 -InnerProduct linear_271 1 1 72 159 0=320 1=1 2=409600 -Swish silu_446 1 1 158 160 -Convolution conv_101 1 1 160 161 0=320 1=3 4=1 5=1 6=921600 -ExpandDims unsqueeze_1036 1 1 159 162 -23303=1,1 -ExpandDims unsqueeze_1037 1 1 162 163 -23303=1,2 -BinaryOp add_12 2 1 161 163 164 -GroupNorm gn_19 1 1 164 165 0=32 1=320 2=1.000000e-05 -Swish silu_447 1 1 165 166 -Convolution conv_102 1 1 166 167 0=320 1=3 4=1 5=1 6=921600 -BinaryOp add_13 2 1 156 167 168 -Split splitncnn_10 1 2 168 169 170 -GroupNorm gn_195 1 1 170 171 0=32 1=320 2=1.000000e-06 -Convolution conv_103 1 1 171 172 0=320 1=1 5=1 6=102400 -Permute permute_853 1 1 172 173 0=3 -Reshape reshape_507 1 1 173 174 0=320 1=1536 -Split splitncnn_11 1 2 174 175 176 -LayerNorm ln_213 1 1 176 177 0=320 1=1.000000e-05 -Split splitncnn_12 1 3 177 178 179 180 -MultiHeadAttention linear_275 3 1 180 179 178 200 0=320 1=8 2=102400 3=320 4=320 -BinaryOp add_15 2 1 200 175 201 -Split splitncnn_13 1 2 201 202 203 -LayerNorm ln_214 1 1 203 204 0=320 1=1.000000e-05 -MultiHeadAttention linear_279 3 1 204 32 31 224 0=320 1=8 2=102400 3=768 4=768 -BinaryOp add_17 2 1 224 202 225 -Split splitncnn_14 1 2 225 226 227 -LayerNorm ln_215 1 1 227 228 0=320 1=1.000000e-05 -InnerProduct linear_280 1 1 228 229 0=2560 1=1 2=819200 -Slice split_1 1 2 229 230 231 -23300=2,1280,-233 1=1 -GELU gelu_1 1 1 231 232 0=1 -BinaryOp mul_18 2 1 230 232 233 0=2 -InnerProduct linear_281 1 1 233 234 0=320 1=1 2=409600 -BinaryOp add_19 2 1 234 226 235 -Reshape reshape_524 1 1 235 236 0=320 1=32 2=48 -Permute permute_864 1 1 236 237 0=4 -Convolution conv_104 1 1 237 238 0=320 1=1 5=1 6=102400 -BinaryOp add_20 2 1 238 169 239 -Split splitncnn_15 1 2 239 240 241 -Convolution conv_105 1 1 241 242 0=320 1=3 3=2 4=1 5=1 6=921600 -Split splitncnn_16 1 3 242 243 244 245 -GroupNorm gn_20 1 1 244 246 0=32 1=320 2=1.000000e-05 -InnerProduct linear_282 1 1 71 247 0=640 1=1 2=819200 -Swish silu_448 1 1 246 248 -Convolution conv_106 1 1 248 249 0=640 1=3 4=1 5=1 6=1843200 -ExpandDims unsqueeze_1038 1 1 247 250 -23303=1,1 -ExpandDims unsqueeze_1039 1 1 250 251 -23303=1,2 -BinaryOp add_21 2 1 249 251 252 -GroupNorm gn_21 1 1 252 253 0=32 1=640 2=1.000000e-05 -Convolution conv_108 1 1 245 254 0=640 1=1 5=1 6=204800 -Swish silu_449 1 1 253 255 -Convolution conv_107 1 1 255 256 0=640 1=3 4=1 5=1 6=3686400 -BinaryOp add_22 2 1 254 256 257 -Split splitncnn_17 1 2 257 258 259 -GroupNorm gn_196 1 1 259 260 0=32 1=640 2=1.000000e-06 -Convolution conv_109 1 1 260 261 0=640 1=1 5=1 6=409600 -Permute permute_865 1 1 261 262 0=3 -Reshape reshape_525 1 1 262 263 0=640 1=384 -Split splitncnn_18 1 2 263 264 265 -LayerNorm ln_216 1 1 265 266 0=640 1=1.000000e-05 -Split splitncnn_19 1 3 266 267 268 269 -MultiHeadAttention linear_286 3 1 269 268 267 289 0=640 1=8 2=409600 3=640 4=640 -BinaryOp add_24 2 1 289 264 290 -Split splitncnn_20 1 2 290 291 292 -LayerNorm ln_217 1 1 292 293 0=640 1=1.000000e-05 -MultiHeadAttention linear_290 3 1 293 30 29 313 0=640 1=8 2=409600 3=768 4=768 -BinaryOp add_26 2 1 313 291 314 -Split splitncnn_21 1 2 314 315 316 -LayerNorm ln_218 1 1 316 317 0=640 1=1.000000e-05 -InnerProduct linear_291 1 1 317 318 0=5120 1=1 2=3276800 -Slice split_2 1 2 318 319 320 -23300=2,2560,-233 1=1 -GELU gelu_2 1 1 320 321 0=1 -BinaryOp mul_27 2 1 319 321 322 0=2 -InnerProduct linear_292 1 1 322 323 0=640 1=1 2=1638400 -BinaryOp add_28 2 1 323 315 324 -Reshape reshape_542 1 1 324 325 0=640 1=16 2=24 -Permute permute_876 1 1 325 326 0=4 -Convolution conv_110 1 1 326 327 0=640 1=1 5=1 6=409600 -BinaryOp add_29 2 1 327 258 328 -Split splitncnn_22 1 3 328 329 330 331 -GroupNorm gn_22 1 1 331 332 0=32 1=640 2=1.000000e-05 -InnerProduct linear_293 1 1 70 333 0=640 1=1 2=819200 -Swish silu_450 1 1 332 334 -Convolution conv_111 1 1 334 335 0=640 1=3 4=1 5=1 6=3686400 -ExpandDims unsqueeze_1040 1 1 333 336 -23303=1,1 -ExpandDims unsqueeze_1041 1 1 336 337 -23303=1,2 -BinaryOp add_30 2 1 335 337 338 -GroupNorm gn_23 1 1 338 339 0=32 1=640 2=1.000000e-05 -Swish silu_451 1 1 339 340 -Convolution conv_112 1 1 340 341 0=640 1=3 4=1 5=1 6=3686400 -BinaryOp add_31 2 1 330 341 342 -Split splitncnn_23 1 2 342 343 344 -GroupNorm gn_197 1 1 344 345 0=32 1=640 2=1.000000e-06 -Convolution conv_113 1 1 345 346 0=640 1=1 5=1 6=409600 -Permute permute_877 1 1 346 347 0=3 -Reshape reshape_543 1 1 347 348 0=640 1=384 -Split splitncnn_24 1 2 348 349 350 -LayerNorm ln_219 1 1 350 351 0=640 1=1.000000e-05 -Split splitncnn_25 1 3 351 352 353 354 -MultiHeadAttention linear_297 3 1 354 353 352 374 0=640 1=8 2=409600 3=640 4=640 -BinaryOp add_33 2 1 374 349 375 -Split splitncnn_26 1 2 375 376 377 -LayerNorm ln_220 1 1 377 378 0=640 1=1.000000e-05 -MultiHeadAttention linear_301 3 1 378 28 27 398 0=640 1=8 2=409600 3=768 4=768 -BinaryOp add_35 2 1 398 376 399 -Split splitncnn_27 1 2 399 400 401 -LayerNorm ln_221 1 1 401 402 0=640 1=1.000000e-05 -InnerProduct linear_302 1 1 402 403 0=5120 1=1 2=3276800 -Slice split_3 1 2 403 404 405 -23300=2,2560,-233 1=1 -GELU gelu_3 1 1 405 406 0=1 -BinaryOp mul_36 2 1 404 406 407 0=2 -InnerProduct linear_303 1 1 407 408 0=640 1=1 2=1638400 -BinaryOp add_37 2 1 408 400 409 -Reshape reshape_560 1 1 409 410 0=640 1=16 2=24 -Permute permute_888 1 1 410 411 0=4 -Convolution conv_114 1 1 411 412 0=640 1=1 5=1 6=409600 -BinaryOp add_38 2 1 412 343 413 -Split splitncnn_28 1 2 413 414 415 -Convolution conv_115 1 1 415 416 0=640 1=3 3=2 4=1 5=1 6=3686400 -Split splitncnn_29 1 3 416 417 418 419 -GroupNorm gn_24 1 1 418 420 0=32 1=640 2=1.000000e-05 -InnerProduct linear_304 1 1 69 421 0=1280 1=1 2=1638400 -Swish silu_452 1 1 420 422 -Convolution conv_116 1 1 422 423 0=1280 1=3 4=1 5=1 6=7372800 -ExpandDims unsqueeze_1042 1 1 421 424 -23303=1,1 -ExpandDims unsqueeze_1043 1 1 424 425 -23303=1,2 -BinaryOp add_39 2 1 423 425 426 -GroupNorm gn_25 1 1 426 427 0=32 1=1280 2=1.000000e-05 -Convolution conv_118 1 1 419 428 0=1280 1=1 5=1 6=819200 -Swish silu_453 1 1 427 429 -Convolution conv_117 1 1 429 430 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_40 2 1 428 430 431 -Split splitncnn_30 1 2 431 432 433 -GroupNorm gn_198 1 1 433 434 0=32 1=1280 2=1.000000e-06 -Convolution conv_119 1 1 434 435 0=1280 1=1 5=1 6=1638400 -Permute permute_889 1 1 435 436 0=3 -Reshape reshape_561 1 1 436 437 0=1280 1=96 -Split splitncnn_31 1 2 437 438 439 -LayerNorm ln_222 1 1 439 440 0=1280 1=1.000000e-05 -Split splitncnn_32 1 3 440 441 442 443 -MultiHeadAttention linear_308 3 1 443 442 441 463 0=1280 1=8 2=1638400 3=1280 4=1280 -BinaryOp add_42 2 1 463 438 464 -Split splitncnn_33 1 2 464 465 466 -LayerNorm ln_223 1 1 466 467 0=1280 1=1.000000e-05 -MultiHeadAttention linear_312 3 1 467 26 25 487 0=1280 1=8 2=1638400 3=768 4=768 -BinaryOp add_44 2 1 487 465 488 -Split splitncnn_34 1 2 488 489 490 -LayerNorm ln_224 1 1 490 491 0=1280 1=1.000000e-05 -InnerProduct linear_313 1 1 491 492 0=10240 1=1 2=13107200 -Slice split_4 1 2 492 493 494 -23300=2,5120,-233 1=1 -GELU gelu_4 1 1 494 495 0=1 -BinaryOp mul_45 2 1 493 495 496 0=2 -InnerProduct linear_314 1 1 496 497 0=1280 1=1 2=6553600 -BinaryOp add_46 2 1 497 489 498 -Reshape reshape_578 1 1 498 499 0=1280 1=8 2=12 -Permute permute_900 1 1 499 500 0=4 -Convolution conv_120 1 1 500 501 0=1280 1=1 5=1 6=1638400 -BinaryOp add_47 2 1 501 432 502 -Split splitncnn_35 1 3 502 503 504 505 -GroupNorm gn_26 1 1 505 506 0=32 1=1280 2=1.000000e-05 -InnerProduct linear_315 1 1 68 507 0=1280 1=1 2=1638400 -Swish silu_454 1 1 506 508 -Convolution conv_121 1 1 508 509 0=1280 1=3 4=1 5=1 6=14745600 -ExpandDims unsqueeze_1044 1 1 507 510 -23303=1,1 -ExpandDims unsqueeze_1045 1 1 510 511 -23303=1,2 -BinaryOp add_48 2 1 509 511 512 -GroupNorm gn_27 1 1 512 513 0=32 1=1280 2=1.000000e-05 -Swish silu_455 1 1 513 514 -Convolution conv_122 1 1 514 515 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_49 2 1 504 515 516 -Split splitncnn_36 1 2 516 517 518 -GroupNorm gn_199 1 1 518 519 0=32 1=1280 2=1.000000e-06 -Convolution conv_123 1 1 519 520 0=1280 1=1 5=1 6=1638400 -Permute permute_901 1 1 520 521 0=3 -Reshape reshape_579 1 1 521 522 0=1280 1=96 -Split splitncnn_37 1 2 522 523 524 -LayerNorm ln_225 1 1 524 525 0=1280 1=1.000000e-05 -Split splitncnn_38 1 3 525 526 527 528 -MultiHeadAttention linear_319 3 1 528 527 526 548 0=1280 1=8 2=1638400 3=1280 4=1280 -BinaryOp add_51 2 1 548 523 549 -Split splitncnn_39 1 2 549 550 551 -LayerNorm ln_226 1 1 551 552 0=1280 1=1.000000e-05 -MultiHeadAttention linear_323 3 1 552 24 23 572 0=1280 1=8 2=1638400 3=768 4=768 -BinaryOp add_53 2 1 572 550 573 -Split splitncnn_40 1 2 573 574 575 -LayerNorm ln_227 1 1 575 576 0=1280 1=1.000000e-05 -InnerProduct linear_324 1 1 576 577 0=10240 1=1 2=13107200 -Slice split_5 1 2 577 578 579 -23300=2,5120,-233 1=1 -GELU gelu_5 1 1 579 580 0=1 -BinaryOp mul_54 2 1 578 580 581 0=2 -InnerProduct linear_325 1 1 581 582 0=1280 1=1 2=6553600 -BinaryOp add_55 2 1 582 574 583 -Reshape reshape_596 1 1 583 584 0=1280 1=8 2=12 -Permute permute_912 1 1 584 585 0=4 -Convolution conv_124 1 1 585 586 0=1280 1=1 5=1 6=1638400 -BinaryOp add_56 2 1 586 517 587 -Split splitncnn_41 1 2 587 588 589 -Convolution conv_125 1 1 589 590 0=1280 1=3 3=2 4=1 5=1 6=14745600 -Split splitncnn_42 1 3 590 591 592 593 -GroupNorm gn_28 1 1 593 594 0=32 1=1280 2=1.000000e-05 -InnerProduct linear_326 1 1 67 595 0=1280 1=1 2=1638400 -Swish silu_456 1 1 594 596 -Convolution conv_126 1 1 596 597 0=1280 1=3 4=1 5=1 6=14745600 -ExpandDims unsqueeze_1046 1 1 595 598 -23303=1,1 -ExpandDims unsqueeze_1047 1 1 598 599 -23303=1,2 -BinaryOp add_57 2 1 597 599 600 -GroupNorm gn_29 1 1 600 601 0=32 1=1280 2=1.000000e-05 -Swish silu_457 1 1 601 602 -Convolution conv_127 1 1 602 603 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_58 2 1 592 603 604 -Split splitncnn_43 1 3 604 605 606 607 -GroupNorm gn_30 1 1 607 608 0=32 1=1280 2=1.000000e-05 -InnerProduct linear_327 1 1 66 609 0=1280 1=1 2=1638400 -Swish silu_458 1 1 608 610 -Convolution conv_128 1 1 610 611 0=1280 1=3 4=1 5=1 6=14745600 -ExpandDims unsqueeze_1048 1 1 609 612 -23303=1,1 -ExpandDims unsqueeze_1049 1 1 612 613 -23303=1,2 -BinaryOp add_59 2 1 611 613 614 -GroupNorm gn_31 1 1 614 615 0=32 1=1280 2=1.000000e-05 -Swish silu_459 1 1 615 616 -Convolution conv_129 1 1 616 617 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_60 2 1 606 617 618 -Split splitncnn_44 1 3 618 619 620 621 -GroupNorm gn_32 1 1 621 622 0=32 1=1280 2=1.000000e-05 -InnerProduct linear_328 1 1 65 623 0=1280 1=1 2=1638400 -Swish silu_460 1 1 622 624 -Convolution conv_130 1 1 624 625 0=1280 1=3 4=1 5=1 6=14745600 -ExpandDims unsqueeze_1050 1 1 623 626 -23303=1,1 -ExpandDims unsqueeze_1051 1 1 626 627 -23303=1,2 -BinaryOp add_61 2 1 625 627 628 -GroupNorm gn_33 1 1 628 629 0=32 1=1280 2=1.000000e-05 -Swish silu_461 1 1 629 630 -Convolution conv_131 1 1 630 631 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_62 2 1 620 631 632 -Split splitncnn_45 1 2 632 633 634 -GroupNorm gn_200 1 1 634 635 0=32 1=1280 2=1.000000e-06 -Convolution conv_132 1 1 635 636 0=1280 1=1 5=1 6=1638400 -Permute permute_913 1 1 636 637 0=3 -Reshape reshape_597 1 1 637 638 0=1280 1=24 -Split splitncnn_46 1 2 638 639 640 -LayerNorm ln_228 1 1 640 641 0=1280 1=1.000000e-05 -Split splitncnn_47 1 3 641 642 643 644 -MultiHeadAttention linear_332 3 1 644 643 642 664 0=1280 1=8 2=1638400 3=1280 4=1280 -BinaryOp add_64 2 1 664 639 665 -Split splitncnn_48 1 2 665 666 667 -LayerNorm ln_229 1 1 667 668 0=1280 1=1.000000e-05 -MultiHeadAttention linear_336 3 1 668 22 21 688 0=1280 1=8 2=1638400 3=768 4=768 -BinaryOp add_66 2 1 688 666 689 -Split splitncnn_49 1 2 689 690 691 -LayerNorm ln_230 1 1 691 692 0=1280 1=1.000000e-05 -InnerProduct linear_337 1 1 692 693 0=10240 1=1 2=13107200 -Slice split_6 1 2 693 694 695 -23300=2,5120,-233 1=1 -GELU gelu_6 1 1 695 696 0=1 -BinaryOp mul_67 2 1 694 696 697 0=2 -InnerProduct linear_338 1 1 697 698 0=1280 1=1 2=6553600 -BinaryOp add_68 2 1 698 690 699 -Reshape reshape_614 1 1 699 700 0=1280 1=4 2=6 -Permute permute_924 1 1 700 701 0=4 -Convolution conv_133 1 1 701 702 0=1280 1=1 5=1 6=1638400 -BinaryOp add_69 2 1 702 633 703 -Split splitncnn_50 1 2 703 704 705 -GroupNorm gn_34 1 1 705 706 0=32 1=1280 2=1.000000e-05 -InnerProduct linear_339 1 1 64 707 0=1280 1=1 2=1638400 -Swish silu_462 1 1 706 708 -Convolution conv_134 1 1 708 709 0=1280 1=3 4=1 5=1 6=14745600 -ExpandDims unsqueeze_1052 1 1 707 710 -23303=1,1 -ExpandDims unsqueeze_1053 1 1 710 711 -23303=1,2 -BinaryOp add_70 2 1 709 711 712 -GroupNorm gn_35 1 1 712 713 0=32 1=1280 2=1.000000e-05 -Swish silu_463 1 1 713 714 -Convolution conv_135 1 1 714 715 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_71 2 1 704 715 716 -Concat cat_1 2 1 716 619 717 -Split splitncnn_51 1 2 717 718 719 -GroupNorm gn_36 1 1 718 720 0=32 1=2560 2=1.000000e-05 -InnerProduct linear_340 1 1 63 721 0=1280 1=1 2=1638400 -Swish silu_464 1 1 720 722 -Convolution conv_136 1 1 722 723 0=1280 1=3 4=1 5=1 6=29491200 -ExpandDims unsqueeze_1054 1 1 721 724 -23303=1,1 -ExpandDims unsqueeze_1055 1 1 724 725 -23303=1,2 -BinaryOp add_72 2 1 723 725 726 -GroupNorm gn_37 1 1 726 727 0=32 1=1280 2=1.000000e-05 -Convolution conv_138 1 1 719 728 0=1280 1=1 5=1 6=3276800 -Swish silu_465 1 1 727 729 -Convolution conv_137 1 1 729 730 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_73 2 1 728 730 731 -Concat cat_2 2 1 731 605 732 -Split splitncnn_52 1 2 732 733 734 -GroupNorm gn_38 1 1 733 735 0=32 1=2560 2=1.000000e-05 -InnerProduct linear_341 1 1 62 736 0=1280 1=1 2=1638400 -Swish silu_466 1 1 735 737 -Convolution conv_139 1 1 737 738 0=1280 1=3 4=1 5=1 6=29491200 -ExpandDims unsqueeze_1056 1 1 736 739 -23303=1,1 -ExpandDims unsqueeze_1057 1 1 739 740 -23303=1,2 -BinaryOp add_74 2 1 738 740 741 -GroupNorm gn_39 1 1 741 742 0=32 1=1280 2=1.000000e-05 -Convolution conv_141 1 1 734 743 0=1280 1=1 5=1 6=3276800 -Swish silu_467 1 1 742 744 -Convolution conv_140 1 1 744 745 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_75 2 1 743 745 746 -Concat cat_3 2 1 746 591 747 -Split splitncnn_53 1 2 747 748 749 -GroupNorm gn_40 1 1 748 750 0=32 1=2560 2=1.000000e-05 -InnerProduct linear_342 1 1 61 751 0=1280 1=1 2=1638400 -Swish silu_468 1 1 750 752 -Convolution conv_142 1 1 752 753 0=1280 1=3 4=1 5=1 6=29491200 -ExpandDims unsqueeze_1058 1 1 751 754 -23303=1,1 -ExpandDims unsqueeze_1059 1 1 754 755 -23303=1,2 -BinaryOp add_76 2 1 753 755 756 -GroupNorm gn_41 1 1 756 757 0=32 1=1280 2=1.000000e-05 -Convolution conv_144 1 1 749 758 0=1280 1=1 5=1 6=3276800 -Swish silu_469 1 1 757 759 -Convolution conv_143 1 1 759 760 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_77 2 1 758 760 761 -Interp upsample_nearest_93 1 1 761 762 0=1 1=2.000000e+00 2=2.000000e+00 -Convolution conv_145 1 1 762 763 0=1280 1=3 4=1 5=1 6=14745600 -Concat cat_4 2 1 763 588 764 -Split splitncnn_54 1 2 764 765 766 -GroupNorm gn_42 1 1 765 767 0=32 1=2560 2=1.000000e-05 -InnerProduct linear_343 1 1 60 768 0=1280 1=1 2=1638400 -Swish silu_470 1 1 767 769 -Convolution conv_146 1 1 769 770 0=1280 1=3 4=1 5=1 6=29491200 -ExpandDims unsqueeze_1060 1 1 768 771 -23303=1,1 -ExpandDims unsqueeze_1061 1 1 771 772 -23303=1,2 -BinaryOp add_78 2 1 770 772 773 -GroupNorm gn_43 1 1 773 774 0=32 1=1280 2=1.000000e-05 -Convolution conv_148 1 1 766 775 0=1280 1=1 5=1 6=3276800 -Swish silu_471 1 1 774 776 -Convolution conv_147 1 1 776 777 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_79 2 1 775 777 778 -Split splitncnn_55 1 2 778 779 780 -GroupNorm gn_201 1 1 780 781 0=32 1=1280 2=1.000000e-06 -Convolution conv_149 1 1 781 782 0=1280 1=1 5=1 6=1638400 -Permute permute_925 1 1 782 783 0=3 -Reshape reshape_615 1 1 783 784 0=1280 1=96 -Split splitncnn_56 1 2 784 785 786 -LayerNorm ln_231 1 1 786 787 0=1280 1=1.000000e-05 -Split splitncnn_57 1 3 787 788 789 790 -MultiHeadAttention linear_347 3 1 790 789 788 810 0=1280 1=8 2=1638400 3=1280 4=1280 -BinaryOp add_81 2 1 810 785 811 -Split splitncnn_58 1 2 811 812 813 -LayerNorm ln_232 1 1 813 814 0=1280 1=1.000000e-05 -MultiHeadAttention linear_351 3 1 814 20 19 834 0=1280 1=8 2=1638400 3=768 4=768 -BinaryOp add_83 2 1 834 812 835 -Split splitncnn_59 1 2 835 836 837 -LayerNorm ln_233 1 1 837 838 0=1280 1=1.000000e-05 -InnerProduct linear_352 1 1 838 839 0=10240 1=1 2=13107200 -Slice split_7 1 2 839 840 841 -23300=2,5120,-233 1=1 -GELU gelu_7 1 1 841 842 0=1 -BinaryOp mul_84 2 1 840 842 843 0=2 -InnerProduct linear_353 1 1 843 844 0=1280 1=1 2=6553600 -BinaryOp add_85 2 1 844 836 845 -Reshape reshape_632 1 1 845 846 0=1280 1=8 2=12 -Permute permute_936 1 1 846 847 0=4 -Convolution conv_150 1 1 847 848 0=1280 1=1 5=1 6=1638400 -BinaryOp add_86 2 1 848 779 849 -Concat cat_5 2 1 849 503 850 -Split splitncnn_60 1 2 850 851 852 -GroupNorm gn_44 1 1 851 853 0=32 1=2560 2=1.000000e-05 -InnerProduct linear_354 1 1 59 854 0=1280 1=1 2=1638400 -Swish silu_472 1 1 853 855 -Convolution conv_151 1 1 855 856 0=1280 1=3 4=1 5=1 6=29491200 -ExpandDims unsqueeze_1062 1 1 854 857 -23303=1,1 -ExpandDims unsqueeze_1063 1 1 857 858 -23303=1,2 -BinaryOp add_87 2 1 856 858 859 -GroupNorm gn_45 1 1 859 860 0=32 1=1280 2=1.000000e-05 -Convolution conv_153 1 1 852 861 0=1280 1=1 5=1 6=3276800 -Swish silu_473 1 1 860 862 -Convolution conv_152 1 1 862 863 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_88 2 1 861 863 864 -Split splitncnn_61 1 2 864 865 866 -GroupNorm gn_202 1 1 866 867 0=32 1=1280 2=1.000000e-06 -Convolution conv_154 1 1 867 868 0=1280 1=1 5=1 6=1638400 -Permute permute_937 1 1 868 869 0=3 -Reshape reshape_633 1 1 869 870 0=1280 1=96 -Split splitncnn_62 1 2 870 871 872 -LayerNorm ln_234 1 1 872 873 0=1280 1=1.000000e-05 -Split splitncnn_63 1 3 873 874 875 876 -MultiHeadAttention linear_358 3 1 876 875 874 896 0=1280 1=8 2=1638400 3=1280 4=1280 -BinaryOp add_90 2 1 896 871 897 -Split splitncnn_64 1 2 897 898 899 -LayerNorm ln_235 1 1 899 900 0=1280 1=1.000000e-05 -MultiHeadAttention linear_362 3 1 900 18 17 920 0=1280 1=8 2=1638400 3=768 4=768 -BinaryOp add_92 2 1 920 898 921 -Split splitncnn_65 1 2 921 922 923 -LayerNorm ln_236 1 1 923 924 0=1280 1=1.000000e-05 -InnerProduct linear_363 1 1 924 925 0=10240 1=1 2=13107200 -Slice split_8 1 2 925 926 927 -23300=2,5120,-233 1=1 -GELU gelu_8 1 1 927 928 0=1 -BinaryOp mul_93 2 1 926 928 929 0=2 -InnerProduct linear_364 1 1 929 930 0=1280 1=1 2=6553600 -BinaryOp add_94 2 1 930 922 931 -Reshape reshape_650 1 1 931 932 0=1280 1=8 2=12 -Permute permute_948 1 1 932 933 0=4 -Convolution conv_155 1 1 933 934 0=1280 1=1 5=1 6=1638400 -BinaryOp add_95 2 1 934 865 935 -Concat cat_6 2 1 935 417 936 -Split splitncnn_66 1 2 936 937 938 -GroupNorm gn_46 1 1 937 939 0=32 1=1920 2=1.000000e-05 -InnerProduct linear_365 1 1 58 940 0=1280 1=1 2=1638400 -Swish silu_474 1 1 939 941 -Convolution conv_156 1 1 941 942 0=1280 1=3 4=1 5=1 6=22118400 -ExpandDims unsqueeze_1064 1 1 940 943 -23303=1,1 -ExpandDims unsqueeze_1065 1 1 943 944 -23303=1,2 -BinaryOp add_96 2 1 942 944 945 -GroupNorm gn_47 1 1 945 946 0=32 1=1280 2=1.000000e-05 -Convolution conv_158 1 1 938 947 0=1280 1=1 5=1 6=2457600 -Swish silu_475 1 1 946 948 -Convolution conv_157 1 1 948 949 0=1280 1=3 4=1 5=1 6=14745600 -BinaryOp add_97 2 1 947 949 950 -Split splitncnn_67 1 2 950 951 952 -GroupNorm gn_203 1 1 952 953 0=32 1=1280 2=1.000000e-06 -Convolution conv_159 1 1 953 954 0=1280 1=1 5=1 6=1638400 -Permute permute_949 1 1 954 955 0=3 -Reshape reshape_651 1 1 955 956 0=1280 1=96 -Split splitncnn_68 1 2 956 957 958 -LayerNorm ln_237 1 1 958 959 0=1280 1=1.000000e-05 -Split splitncnn_69 1 3 959 960 961 962 -MultiHeadAttention linear_369 3 1 962 961 960 982 0=1280 1=8 2=1638400 3=1280 4=1280 -BinaryOp add_99 2 1 982 957 983 -Split splitncnn_70 1 2 983 984 985 -LayerNorm ln_238 1 1 985 986 0=1280 1=1.000000e-05 -MultiHeadAttention linear_373 3 1 986 16 15 1006 0=1280 1=8 2=1638400 3=768 4=768 -BinaryOp add_101 2 1 1006 984 1007 -Split splitncnn_71 1 2 1007 1008 1009 -LayerNorm ln_239 1 1 1009 1010 0=1280 1=1.000000e-05 -InnerProduct linear_374 1 1 1010 1011 0=10240 1=1 2=13107200 -Slice split_9 1 2 1011 1012 1013 -23300=2,5120,-233 1=1 -GELU gelu_9 1 1 1013 1014 0=1 -BinaryOp mul_102 2 1 1012 1014 1015 0=2 -InnerProduct linear_375 1 1 1015 1016 0=1280 1=1 2=6553600 -BinaryOp add_103 2 1 1016 1008 1017 -Reshape reshape_668 1 1 1017 1018 0=1280 1=8 2=12 -Permute permute_960 1 1 1018 1019 0=4 -Convolution conv_160 1 1 1019 1020 0=1280 1=1 5=1 6=1638400 -BinaryOp add_104 2 1 1020 951 1021 -Interp upsample_nearest_94 1 1 1021 1022 0=1 1=2.000000e+00 2=2.000000e+00 -Convolution conv_161 1 1 1022 1023 0=1280 1=3 4=1 5=1 6=14745600 -Concat cat_7 2 1 1023 414 1024 -Split splitncnn_72 1 2 1024 1025 1026 -GroupNorm gn_48 1 1 1025 1027 0=32 1=1920 2=1.000000e-05 -InnerProduct linear_376 1 1 57 1028 0=640 1=1 2=819200 -Swish silu_476 1 1 1027 1029 -Convolution conv_162 1 1 1029 1030 0=640 1=3 4=1 5=1 6=11059200 -ExpandDims unsqueeze_1066 1 1 1028 1031 -23303=1,1 -ExpandDims unsqueeze_1067 1 1 1031 1032 -23303=1,2 -BinaryOp add_105 2 1 1030 1032 1033 -GroupNorm gn_49 1 1 1033 1034 0=32 1=640 2=1.000000e-05 -Convolution conv_164 1 1 1026 1035 0=640 1=1 5=1 6=1228800 -Swish silu_477 1 1 1034 1036 -Convolution conv_163 1 1 1036 1037 0=640 1=3 4=1 5=1 6=3686400 -BinaryOp add_106 2 1 1035 1037 1038 -Split splitncnn_73 1 2 1038 1039 1040 -GroupNorm gn_204 1 1 1040 1041 0=32 1=640 2=1.000000e-06 -Convolution conv_165 1 1 1041 1042 0=640 1=1 5=1 6=409600 -Permute permute_961 1 1 1042 1043 0=3 -Reshape reshape_669 1 1 1043 1044 0=640 1=384 -Split splitncnn_74 1 2 1044 1045 1046 -LayerNorm ln_240 1 1 1046 1047 0=640 1=1.000000e-05 -Split splitncnn_75 1 3 1047 1048 1049 1050 -MultiHeadAttention linear_380 3 1 1050 1049 1048 1070 0=640 1=8 2=409600 3=640 4=640 -BinaryOp add_108 2 1 1070 1045 1071 -Split splitncnn_76 1 2 1071 1072 1073 -LayerNorm ln_241 1 1 1073 1074 0=640 1=1.000000e-05 -MultiHeadAttention linear_384 3 1 1074 14 13 1094 0=640 1=8 2=409600 3=768 4=768 -BinaryOp add_110 2 1 1094 1072 1095 -Split splitncnn_77 1 2 1095 1096 1097 -LayerNorm ln_242 1 1 1097 1098 0=640 1=1.000000e-05 -InnerProduct linear_385 1 1 1098 1099 0=5120 1=1 2=3276800 -Slice split_10 1 2 1099 1100 1101 -23300=2,2560,-233 1=1 -GELU gelu_10 1 1 1101 1102 0=1 -BinaryOp mul_111 2 1 1100 1102 1103 0=2 -InnerProduct linear_386 1 1 1103 1104 0=640 1=1 2=1638400 -BinaryOp add_112 2 1 1104 1096 1105 -Reshape reshape_686 1 1 1105 1106 0=640 1=16 2=24 -Permute permute_972 1 1 1106 1107 0=4 -Convolution conv_166 1 1 1107 1108 0=640 1=1 5=1 6=409600 -BinaryOp add_113 2 1 1108 1039 1109 -Concat cat_8 2 1 1109 329 1110 -Split splitncnn_78 1 2 1110 1111 1112 -GroupNorm gn_50 1 1 1111 1113 0=32 1=1280 2=1.000000e-05 -InnerProduct linear_387 1 1 56 1114 0=640 1=1 2=819200 -Swish silu_478 1 1 1113 1115 -Convolution conv_167 1 1 1115 1116 0=640 1=3 4=1 5=1 6=7372800 -ExpandDims unsqueeze_1068 1 1 1114 1117 -23303=1,1 -ExpandDims unsqueeze_1069 1 1 1117 1118 -23303=1,2 -BinaryOp add_114 2 1 1116 1118 1119 -GroupNorm gn_51 1 1 1119 1120 0=32 1=640 2=1.000000e-05 -Convolution conv_169 1 1 1112 1121 0=640 1=1 5=1 6=819200 -Swish silu_479 1 1 1120 1122 -Convolution conv_168 1 1 1122 1123 0=640 1=3 4=1 5=1 6=3686400 -BinaryOp add_115 2 1 1121 1123 1124 -Split splitncnn_79 1 2 1124 1125 1126 -GroupNorm gn_205 1 1 1126 1127 0=32 1=640 2=1.000000e-06 -Convolution conv_170 1 1 1127 1128 0=640 1=1 5=1 6=409600 -Permute permute_973 1 1 1128 1129 0=3 -Reshape reshape_687 1 1 1129 1130 0=640 1=384 -Split splitncnn_80 1 2 1130 1131 1132 -LayerNorm ln_243 1 1 1132 1133 0=640 1=1.000000e-05 -Split splitncnn_81 1 3 1133 1134 1135 1136 -MultiHeadAttention linear_391 3 1 1136 1135 1134 1156 0=640 1=8 2=409600 3=640 4=640 -BinaryOp add_117 2 1 1156 1131 1157 -Split splitncnn_82 1 2 1157 1158 1159 -LayerNorm ln_244 1 1 1159 1160 0=640 1=1.000000e-05 -MultiHeadAttention linear_395 3 1 1160 12 11 1180 0=640 1=8 2=409600 3=768 4=768 -BinaryOp add_119 2 1 1180 1158 1181 -Split splitncnn_83 1 2 1181 1182 1183 -LayerNorm ln_245 1 1 1183 1184 0=640 1=1.000000e-05 -InnerProduct linear_396 1 1 1184 1185 0=5120 1=1 2=3276800 -Slice split_11 1 2 1185 1186 1187 -23300=2,2560,-233 1=1 -GELU gelu_11 1 1 1187 1188 0=1 -BinaryOp mul_120 2 1 1186 1188 1189 0=2 -InnerProduct linear_397 1 1 1189 1190 0=640 1=1 2=1638400 -BinaryOp add_121 2 1 1190 1182 1191 -Reshape reshape_704 1 1 1191 1192 0=640 1=16 2=24 -Permute permute_984 1 1 1192 1193 0=4 -Convolution conv_171 1 1 1193 1194 0=640 1=1 5=1 6=409600 -BinaryOp add_122 2 1 1194 1125 1195 -Concat cat_9 2 1 1195 243 1196 -Split splitncnn_84 1 2 1196 1197 1198 -GroupNorm gn_52 1 1 1197 1199 0=32 1=960 2=1.000000e-05 -InnerProduct linear_398 1 1 55 1200 0=640 1=1 2=819200 -Swish silu_480 1 1 1199 1201 -Convolution conv_172 1 1 1201 1202 0=640 1=3 4=1 5=1 6=5529600 -ExpandDims unsqueeze_1070 1 1 1200 1203 -23303=1,1 -ExpandDims unsqueeze_1071 1 1 1203 1204 -23303=1,2 -BinaryOp add_123 2 1 1202 1204 1205 -GroupNorm gn_53 1 1 1205 1206 0=32 1=640 2=1.000000e-05 -Convolution conv_174 1 1 1198 1207 0=640 1=1 5=1 6=614400 -Swish silu_481 1 1 1206 1208 -Convolution conv_173 1 1 1208 1209 0=640 1=3 4=1 5=1 6=3686400 -BinaryOp add_124 2 1 1207 1209 1210 -Split splitncnn_85 1 2 1210 1211 1212 -GroupNorm gn_206 1 1 1212 1213 0=32 1=640 2=1.000000e-06 -Convolution conv_175 1 1 1213 1214 0=640 1=1 5=1 6=409600 -Permute permute_985 1 1 1214 1215 0=3 -Reshape reshape_705 1 1 1215 1216 0=640 1=384 -Split splitncnn_86 1 2 1216 1217 1218 -LayerNorm ln_246 1 1 1218 1219 0=640 1=1.000000e-05 -Split splitncnn_87 1 3 1219 1220 1221 1222 -MultiHeadAttention linear_402 3 1 1222 1221 1220 1242 0=640 1=8 2=409600 3=640 4=640 -BinaryOp add_126 2 1 1242 1217 1243 -Split splitncnn_88 1 2 1243 1244 1245 -LayerNorm ln_247 1 1 1245 1246 0=640 1=1.000000e-05 -MultiHeadAttention linear_406 3 1 1246 10 9 1266 0=640 1=8 2=409600 3=768 4=768 -BinaryOp add_128 2 1 1266 1244 1267 -Split splitncnn_89 1 2 1267 1268 1269 -LayerNorm ln_248 1 1 1269 1270 0=640 1=1.000000e-05 -InnerProduct linear_407 1 1 1270 1271 0=5120 1=1 2=3276800 -Slice split_12 1 2 1271 1272 1273 -23300=2,2560,-233 1=1 -GELU gelu_12 1 1 1273 1274 0=1 -BinaryOp mul_129 2 1 1272 1274 1275 0=2 -InnerProduct linear_408 1 1 1275 1276 0=640 1=1 2=1638400 -BinaryOp add_130 2 1 1276 1268 1277 -Reshape reshape_722 1 1 1277 1278 0=640 1=16 2=24 -Permute permute_996 1 1 1278 1279 0=4 -Convolution conv_176 1 1 1279 1280 0=640 1=1 5=1 6=409600 -BinaryOp add_131 2 1 1280 1211 1281 -Interp upsample_nearest_95 1 1 1281 1282 0=1 1=2.000000e+00 2=2.000000e+00 -Convolution conv_177 1 1 1282 1283 0=640 1=3 4=1 5=1 6=3686400 -Concat cat_10 2 1 1283 240 1284 -Split splitncnn_90 1 2 1284 1285 1286 -GroupNorm gn_54 1 1 1285 1287 0=32 1=960 2=1.000000e-05 -InnerProduct linear_409 1 1 54 1288 0=320 1=1 2=409600 -Swish silu_482 1 1 1287 1289 -Convolution conv_178 1 1 1289 1290 0=320 1=3 4=1 5=1 6=2764800 -ExpandDims unsqueeze_1072 1 1 1288 1291 -23303=1,1 -ExpandDims unsqueeze_1073 1 1 1291 1292 -23303=1,2 -BinaryOp add_132 2 1 1290 1292 1293 -GroupNorm gn_55 1 1 1293 1294 0=32 1=320 2=1.000000e-05 -Convolution conv_180 1 1 1286 1295 0=320 1=1 5=1 6=307200 -Swish silu_483 1 1 1294 1296 -Convolution conv_179 1 1 1296 1297 0=320 1=3 4=1 5=1 6=921600 -BinaryOp add_133 2 1 1295 1297 1298 -Split splitncnn_91 1 2 1298 1299 1300 -GroupNorm gn_207 1 1 1300 1301 0=32 1=320 2=1.000000e-06 -Convolution conv_181 1 1 1301 1302 0=320 1=1 5=1 6=102400 -Permute permute_997 1 1 1302 1303 0=3 -Reshape reshape_723 1 1 1303 1304 0=320 1=1536 -Split splitncnn_92 1 2 1304 1305 1306 -LayerNorm ln_249 1 1 1306 1307 0=320 1=1.000000e-05 -Split splitncnn_93 1 3 1307 1308 1309 1310 -MultiHeadAttention linear_413 3 1 1310 1309 1308 1330 0=320 1=8 2=102400 3=320 4=320 -BinaryOp add_135 2 1 1330 1305 1331 -Split splitncnn_94 1 2 1331 1332 1333 -LayerNorm ln_250 1 1 1333 1334 0=320 1=1.000000e-05 -MultiHeadAttention linear_417 3 1 1334 8 7 1354 0=320 1=8 2=102400 3=768 4=768 -BinaryOp add_137 2 1 1354 1332 1355 -Split splitncnn_95 1 2 1355 1356 1357 -LayerNorm ln_251 1 1 1357 1358 0=320 1=1.000000e-05 -InnerProduct linear_418 1 1 1358 1359 0=2560 1=1 2=819200 -Slice split_13 1 2 1359 1360 1361 -23300=2,1280,-233 1=1 -GELU gelu_13 1 1 1361 1362 0=1 -BinaryOp mul_138 2 1 1360 1362 1363 0=2 -InnerProduct linear_419 1 1 1363 1364 0=320 1=1 2=409600 -BinaryOp add_139 2 1 1364 1356 1365 -Reshape reshape_740 1 1 1365 1366 0=320 1=32 2=48 -Permute permute_1008 1 1 1366 1367 0=4 -Convolution conv_182 1 1 1367 1368 0=320 1=1 5=1 6=102400 -BinaryOp add_140 2 1 1368 1299 1369 -Concat cat_11 2 1 1369 155 1370 -Split splitncnn_96 1 2 1370 1371 1372 -GroupNorm gn_56 1 1 1371 1373 0=32 1=640 2=1.000000e-05 -InnerProduct linear_420 1 1 53 1374 0=320 1=1 2=409600 -Swish silu_484 1 1 1373 1375 -Convolution conv_183 1 1 1375 1376 0=320 1=3 4=1 5=1 6=1843200 -ExpandDims unsqueeze_1074 1 1 1374 1377 -23303=1,1 -ExpandDims unsqueeze_1075 1 1 1377 1378 -23303=1,2 -BinaryOp add_141 2 1 1376 1378 1379 -GroupNorm gn_57 1 1 1379 1380 0=32 1=320 2=1.000000e-05 -Convolution conv_185 1 1 1372 1381 0=320 1=1 5=1 6=204800 -Swish silu_485 1 1 1380 1382 -Convolution conv_184 1 1 1382 1383 0=320 1=3 4=1 5=1 6=921600 -BinaryOp add_142 2 1 1381 1383 1384 -Split splitncnn_97 1 2 1384 1385 1386 -GroupNorm gn_208 1 1 1386 1387 0=32 1=320 2=1.000000e-06 -Convolution conv_186 1 1 1387 1388 0=320 1=1 5=1 6=102400 -Permute permute_1009 1 1 1388 1389 0=3 -Reshape reshape_741 1 1 1389 1390 0=320 1=1536 -Split splitncnn_98 1 2 1390 1391 1392 -LayerNorm ln_252 1 1 1392 1393 0=320 1=1.000000e-05 -Split splitncnn_99 1 3 1393 1394 1395 1396 -MultiHeadAttention linear_424 3 1 1396 1395 1394 1416 0=320 1=8 2=102400 3=320 4=320 -BinaryOp add_144 2 1 1416 1391 1417 -Split splitncnn_100 1 2 1417 1418 1419 -LayerNorm ln_253 1 1 1419 1420 0=320 1=1.000000e-05 -MultiHeadAttention linear_428 3 1 1420 6 5 1440 0=320 1=8 2=102400 3=768 4=768 -BinaryOp add_146 2 1 1440 1418 1441 -Split splitncnn_101 1 2 1441 1442 1443 -LayerNorm ln_254 1 1 1443 1444 0=320 1=1.000000e-05 -InnerProduct linear_429 1 1 1444 1445 0=2560 1=1 2=819200 -Slice split_14 1 2 1445 1446 1447 -23300=2,1280,-233 1=1 -GELU gelu_14 1 1 1447 1448 0=1 -BinaryOp mul_147 2 1 1446 1448 1449 0=2 -InnerProduct linear_430 1 1 1449 1450 0=320 1=1 2=409600 -BinaryOp add_148 2 1 1450 1442 1451 -Reshape reshape_758 1 1 1451 1452 0=320 1=32 2=48 -Permute permute_1020 1 1 1452 1453 0=4 -Convolution conv_187 1 1 1453 1454 0=320 1=1 5=1 6=102400 -BinaryOp add_149 2 1 1454 1385 1455 -Concat cat_12 2 1 1455 45 1456 -Split splitncnn_102 1 2 1456 1457 1458 -GroupNorm gn_58 1 1 1457 1459 0=32 1=640 2=1.000000e-05 -InnerProduct linear_431 1 1 52 1460 0=320 1=1 2=409600 -Swish silu_486 1 1 1459 1461 -Convolution conv_188 1 1 1461 1462 0=320 1=3 4=1 5=1 6=1843200 -ExpandDims unsqueeze_1076 1 1 1460 1463 -23303=1,1 -ExpandDims unsqueeze_1077 1 1 1463 1464 -23303=1,2 -BinaryOp add_150 2 1 1462 1464 1465 -GroupNorm gn_59 1 1 1465 1466 0=32 1=320 2=1.000000e-05 -Convolution conv_190 1 1 1458 1467 0=320 1=1 5=1 6=204800 -Swish silu_487 1 1 1466 1468 -Convolution conv_189 1 1 1468 1469 0=320 1=3 4=1 5=1 6=921600 -BinaryOp add_151 2 1 1467 1469 1470 -Split splitncnn_103 1 2 1470 1471 1472 -GroupNorm gn_209 1 1 1472 1473 0=32 1=320 2=1.000000e-06 -Convolution conv_191 1 1 1473 1474 0=320 1=1 5=1 6=102400 -Permute permute_1021 1 1 1474 1475 0=3 -Reshape reshape_759 1 1 1475 1476 0=320 1=1536 -Split splitncnn_104 1 2 1476 1477 1478 -LayerNorm ln_255 1 1 1478 1479 0=320 1=1.000000e-05 -Split splitncnn_105 1 3 1479 1480 1481 1482 -MultiHeadAttention linear_435 3 1 1482 1481 1480 1502 0=320 1=8 2=102400 3=320 4=320 -BinaryOp add_153 2 1 1502 1477 1503 -Split splitncnn_106 1 2 1503 1504 1505 -LayerNorm ln_256 1 1 1505 1506 0=320 1=1.000000e-05 -MultiHeadAttention linear_439 3 1 1506 4 3 1526 0=320 1=8 2=102400 3=768 4=768 -BinaryOp add_155 2 1 1526 1504 1527 -Split splitncnn_107 1 2 1527 1528 1529 -LayerNorm ln_257 1 1 1529 1530 0=320 1=1.000000e-05 -InnerProduct linear_440 1 1 1530 1531 0=2560 1=1 2=819200 -Slice split_15 1 2 1531 1532 1533 -23300=2,1280,-233 1=1 -GELU gelu_15 1 1 1533 1534 0=1 -BinaryOp mul_156 2 1 1532 1534 1535 0=2 -InnerProduct linear_441 1 1 1535 1536 0=320 1=1 2=409600 -BinaryOp add_157 2 1 1536 1528 1537 -Reshape reshape_776 1 1 1537 1538 0=320 1=32 2=48 -Permute permute_1032 1 1 1538 1539 0=4 -Convolution conv_192 1 1 1539 1540 0=320 1=1 5=1 6=102400 -BinaryOp add_158 2 1 1540 1471 1541 -GroupNorm gn_60 1 1 1541 1542 0=32 1=320 2=1.000000e-05 -Swish silu_488 1 1 1542 1543 -Convolution conv_193 1 1 1543 out0 0=4 1=3 4=1 5=1 6=11520 -BinaryOp post_mul 2 1 out0 c_out c_out_out0 0=2 -BinaryOp out_add 2 1 c_out_out0 in0_2 outout diff --git a/x86/exe/init.jpg b/x86/exe/init.jpg new file mode 100644 index 0000000..f0900ee Binary files /dev/null and b/x86/exe/init.jpg differ diff --git a/x86/exe/magic.txt b/x86/exe/magic.txt index ffe3977..8996217 100644 --- a/x86/exe/magic.txt +++ b/x86/exe/magic.txt @@ -1,7 +1,8 @@ 256 256 1 -15 +50 42 +init.jpg floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful ((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text \ No newline at end of file diff --git a/x86/exe/stable-diffusion.exe b/x86/exe/stable-diffusion.exe index ab0e7b0..f370283 100644 Binary files a/x86/exe/stable-diffusion.exe and b/x86/exe/stable-diffusion.exe differ diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/assets/AutoencoderKL-encoder-512-512-fp16.param b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/assets/AutoencoderKL-encoder-512-512-fp16.param new file mode 100644 index 0000000..f831bd7 --- /dev/null +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/assets/AutoencoderKL-encoder-512-512-fp16.param @@ -0,0 +1,108 @@ +7767517 +106 118 +Input in0 0 1 in0 +Convolution conv_3 1 1 in0 1 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=3456 +Split splitncnn_0 1 2 1 2 3 +GroupNorm gn_31 1 1 3 4 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_54 1 1 4 5 +Convolution conv_4 1 1 5 6 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +GroupNorm gn_32 1 1 6 7 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_55 1 1 7 8 +Convolution conv_5 1 1 8 9 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +BinaryOp add_0 2 1 2 9 10 0=0 +Split splitncnn_1 1 2 10 11 12 +GroupNorm gn_33 1 1 12 13 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_56 1 1 13 14 +Convolution conv_6 1 1 14 15 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +GroupNorm gn_34 1 1 15 16 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_57 1 1 16 17 +Convolution conv_7 1 1 17 18 0=128 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=147456 +BinaryOp add_1 2 1 11 18 19 0=0 +Padding pad_0 1 1 19 20 0=0 1=1 2=0 3=1 4=0 5=0.000000e+00 6=0 +Convolution conv_8 1 1 20 21 0=128 1=3 11=3 12=1 13=2 14=0 2=1 3=2 4=0 5=1 6=147456 +Split splitncnn_2 1 2 21 22 23 +GroupNorm gn_35 1 1 23 24 0=32 1=128 2=1.000000e-06 3=1 +Swish silu_58 1 1 24 25 +Convolution conv_9 1 1 25 26 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=294912 +GroupNorm gn_36 1 1 26 27 0=32 1=256 2=1.000000e-06 3=1 +Convolution conv_11 1 1 22 28 0=256 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=32768 +Swish silu_59 1 1 27 29 +Convolution conv_10 1 1 29 30 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=589824 +BinaryOp add_2 2 1 28 30 31 0=0 +Split splitncnn_3 1 2 31 32 33 +GroupNorm gn_37 1 1 33 34 0=32 1=256 2=1.000000e-06 3=1 +Swish silu_60 1 1 34 35 +Convolution conv_12 1 1 35 36 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=589824 +GroupNorm gn_38 1 1 36 37 0=32 1=256 2=1.000000e-06 3=1 +Swish silu_61 1 1 37 38 +Convolution conv_13 1 1 38 39 0=256 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=589824 +BinaryOp add_3 2 1 32 39 40 0=0 +Padding pad_1 1 1 40 41 0=0 1=1 2=0 3=1 4=0 5=0.000000e+00 6=0 +Convolution conv_14 1 1 41 42 0=256 1=3 11=3 12=1 13=2 14=0 2=1 3=2 4=0 5=1 6=589824 +Split splitncnn_4 1 2 42 43 44 +GroupNorm gn_39 1 1 44 45 0=32 1=256 2=1.000000e-06 3=1 +Swish silu_62 1 1 45 46 +Convolution conv_15 1 1 46 47 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=1179648 +GroupNorm gn_40 1 1 47 48 0=32 1=512 2=1.000000e-06 3=1 +Convolution conv_17 1 1 43 49 0=512 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=131072 +Swish silu_63 1 1 48 50 +Convolution conv_16 1 1 50 51 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_4 2 1 49 51 52 0=0 +Split splitncnn_5 1 2 52 53 54 +GroupNorm gn_41 1 1 54 55 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_64 1 1 55 56 +Convolution conv_18 1 1 56 57 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_42 1 1 57 58 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_65 1 1 58 59 +Convolution conv_19 1 1 59 60 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_5 2 1 53 60 61 0=0 +Padding pad_2 1 1 61 62 0=0 1=1 2=0 3=1 4=0 5=0.000000e+00 6=0 +Convolution conv_20 1 1 62 63 0=512 1=3 11=3 12=1 13=2 14=0 2=1 3=2 4=0 5=1 6=2359296 +Split splitncnn_6 1 2 63 64 65 +GroupNorm gn_43 1 1 65 66 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_66 1 1 66 67 +Convolution conv_21 1 1 67 68 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_44 1 1 68 69 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_67 1 1 69 70 +Convolution conv_22 1 1 70 71 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_6 2 1 64 71 72 0=0 +Split splitncnn_7 1 2 72 73 74 +GroupNorm gn_45 1 1 74 75 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_68 1 1 75 76 +Convolution conv_23 1 1 76 77 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_46 1 1 77 78 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_69 1 1 78 79 +Convolution conv_24 1 1 79 80 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_7 2 1 73 80 81 0=0 +Split splitncnn_8 1 2 81 82 83 +GroupNorm gn_47 1 1 83 84 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_70 1 1 84 85 +Convolution conv_25 1 1 85 86 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_48 1 1 86 87 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_71 1 1 87 88 +Convolution conv_26 1 1 88 89 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_8 2 1 82 89 90 0=0 +Split splitncnn_9 1 2 90 91 92 +GroupNorm gn_49 1 1 92 93 0=32 1=512 2=1.000000e-06 3=1 +Reshape view_76 1 1 93 94 0=4096 1=512 +Permute transpose_78 1 1 94 95 0=1 +MultiHeadAttention attention_53 1 1 95 96 0=512 1=1 2=262144 3=512 4=512 +Permute transpose_79 1 1 96 97 0=1 +Reshape reshape_75 1 1 97 98 0=64 1=64 2=512 +BinaryOp add_9 2 1 98 91 99 0=0 +Split splitncnn_10 1 2 99 100 101 +GroupNorm gn_50 1 1 101 102 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_72 1 1 102 103 +Convolution conv_27 1 1 103 104 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +GroupNorm gn_51 1 1 104 105 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_73 1 1 105 106 +Convolution conv_28 1 1 106 107 0=512 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=2359296 +BinaryOp add_10 2 1 100 107 108 0=0 +GroupNorm gn_52 1 1 108 109 0=32 1=512 2=1.000000e-06 3=1 +Swish silu_74 1 1 109 110 +Convolution conv_29 1 1 110 111 0=8 1=3 11=3 12=1 13=1 14=1 2=1 3=1 4=1 5=1 6=36864 +Convolution conv_30 1 1 111 112 0=8 1=1 11=1 12=1 13=1 14=0 2=1 3=1 4=0 5=1 6=64 +Slice chunk_0 1 2 112 out0 114 -23300=2,-233,-233 1=0 +Clip clamp_77 1 1 114 115 0=-3.000000e+01 1=2.000000e+01 +BinaryOp mul_11 1 1 115 116 0=2 1=1 2=5.000000e-01 +UnaryOp exp_12 1 1 116 out1 0=7 diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/decoder_slover.h b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/decoder_slover.h index 67c20bb..10fb796 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/decoder_slover.h +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/decoder_slover.h @@ -23,7 +23,7 @@ class DecodeSlover private: void generate_param(int height, int width); - const float factor[4] = { 5.48998f, 5.48998f, 5.48998f, 5.48998f }; + const float factor[4] = { 1.0 / 0.18215f, 1.0 / 0.18215f, 1.0 / 0.18215f, 1.0 / 0.18215f }; const float _mean_[3] = { -1.0f, -1.0f, -1.0f }; const float _norm_[3] = { 127.5f, 127.5f, 127.5f }; diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.cpp b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.cpp index 8c7a179..1d0b679 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.cpp +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.cpp @@ -235,7 +235,7 @@ ncnn::Mat DiffusionSlover::CFGDenoiser_CompVisDenoiser(ncnn::Mat& input, float s float* u_ptr = denoised_uncond.channel(c); float* c_ptr = denoised_cond.channel(c); for (int hw = 0; hw < h_size * w_size; hw++) { - (*u_ptr) = (*u_ptr) + 7 * ((*c_ptr) - (*u_ptr)); + (*u_ptr) = (*u_ptr) + guidance_scale * ((*c_ptr) - (*u_ptr)); u_ptr++; c_ptr++; } @@ -244,10 +244,8 @@ ncnn::Mat DiffusionSlover::CFGDenoiser_CompVisDenoiser(ncnn::Mat& input, float s return denoised_uncond; } -ncnn::Mat DiffusionSlover::sampler(int seed, int step, ncnn::Mat& c, ncnn::Mat& uc) +ncnn::Mat DiffusionSlover::sampler_txt2img(int seed, int step, ncnn::Mat& c, ncnn::Mat& uc) { - ncnn::Mat x_mat = randn_4(seed % 1000); - // t_to_sigma vector sigma(step); float delta = 0.0 - 999.0 / (step - 1); @@ -260,18 +258,20 @@ ncnn::Mat DiffusionSlover::sampler(int seed, int step, ncnn::Mat& c, ncnn::Mat& } sigma.push_back(0.f); + // init + ncnn::Mat x_mat = randn_4(seed % 1000); float _norm_[4] = { sigma[0], sigma[0], sigma[0], sigma[0] }; x_mat.substract_mean_normalize(0, _norm_); // euler ancestral { for (int i = 0; i < sigma.size() - 1; i++) { - cout << "step:" << i << "\t\t"; + printf("step:%2d/%d\t", i + 1, sigma.size() - 1); double t1 = ncnn::get_current_time(); ncnn::Mat denoised = CFGDenoiser_CompVisDenoiser(x_mat, sigma[i], c, uc); double t2 = ncnn::get_current_time(); - cout << t2 - t1 << "ms" << endl; + printf("%.2lfms\n", t2 - t1); float sigma_up = min(sigma[i + 1], sqrt(sigma[i + 1] * sigma[i + 1] * (sigma[i] * sigma[i] - sigma[i + 1] * sigma[i + 1]) / (sigma[i] * sigma[i]))); float sigma_down = sqrt(sigma[i + 1] * sigma[i + 1] - sigma_up * sigma_up); @@ -346,3 +346,90 @@ ncnn::Mat DiffusionSlover::sampler(int seed, int step, ncnn::Mat& c, ncnn::Mat& fuck_x.clone_from(x_mat); return fuck_x; } + +ncnn::Mat DiffusionSlover::sampler_img2img(int seed, int step, ncnn::Mat& c, ncnn::Mat& uc, vector& init) +{ + // t_to_sigma + vector sigma(step); + float delta = 0.0 - 999.0 / (step - 1); + for (int i = 0; i < step; i++) { + float t = 999.0 + i * delta; + int low_idx = floor(t); + int high_idx = ceil(t); + float w = t - low_idx; + sigma[i] = exp((1 - w) * log_sigmas[low_idx] + w * log_sigmas[high_idx]); + } + sigma.push_back(0.f); + + // init + ncnn::Mat x_mat(w_size, h_size, 4); + + // finish the rest of decoder + { + ncnn::Mat noise_mat = randn_4(seed % 1000); + for (int c = 0; c < 4; c++) { + float* x_ptr = x_mat.channel(c); + float* noise_ptr = noise_mat.channel(c); + float* mean_ptr = init[0].channel(c); + float* std_ptr = init[1].channel(c); + for (int hw = 0; hw < h_size * w_size; hw++) { + *x_ptr = *mean_ptr + *std_ptr * *noise_ptr; + x_ptr++; + noise_ptr++; + mean_ptr++; + std_ptr++; + } + } + x_mat.substract_mean_normalize(0, factor); + } + + // reset scheduling + int new_step = step * strength; + { + float _sigma_ = sigma[step - new_step]; + ncnn::Mat noise_mat = randn_4(seed % 1000); + for (int c = 0; c < 4; c++) { + float* x_ptr = x_mat.channel(c); + float* noise_ptr = noise_mat.channel(c); + for (int hw = 0; hw < h_size * w_size; hw++) { + *x_ptr = *x_ptr + *noise_ptr * _sigma_; + x_ptr++; + noise_ptr++; + } + } + } + vector sub_sigma(sigma.begin() + step - new_step, sigma.end()); + + // euler ancestral + { + for (int i = 0; i < sub_sigma.size() - 1; i++) { + printf("step:%2d/%d\t", i+1, sub_sigma.size()-1); + + double t1 = ncnn::get_current_time(); + ncnn::Mat denoised = CFGDenoiser_CompVisDenoiser(x_mat, sub_sigma[i], c, uc); + double t2 = ncnn::get_current_time(); + printf("%.2lfms\n", t2 - t1); + + float sigma_up = min(sub_sigma[i + 1], sqrt(sub_sigma[i + 1] * sub_sigma[i + 1] * (sub_sigma[i] * sub_sigma[i] - sub_sigma[i + 1] * sub_sigma[i + 1]) / (sub_sigma[i] * sub_sigma[i]))); + float sigma_down = sqrt(sub_sigma[i + 1] * sub_sigma[i + 1] - sigma_up * sigma_up); + + srand(time(NULL) + i); + ncnn::Mat randn = randn_4(rand() % 1000); + for (int c = 0; c < 4; c++) { + float* x_ptr = x_mat.channel(c); + float* d_ptr = denoised.channel(c); + float* r_ptr = randn.channel(c); + for (int hw = 0; hw < h_size * w_size; hw++) { + *x_ptr = *x_ptr + ((*x_ptr - *d_ptr) / sub_sigma[i]) * (sigma_down - sub_sigma[i]) + *r_ptr * sigma_up; + x_ptr++; + d_ptr++; + r_ptr++; + } + } + } + } + + ncnn::Mat fuck_x; + fuck_x.clone_from(x_mat); + return fuck_x; +} diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.h b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.h index 2e24be7..49a409a 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.h +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/diffusion_slover.h @@ -19,7 +19,8 @@ class DiffusionSlover public: DiffusionSlover(int h, int w, int mode); - ncnn::Mat sampler(int seed, int step, ncnn::Mat& c, ncnn::Mat& uc); + ncnn::Mat sampler_txt2img(int seed, int step, ncnn::Mat& c, ncnn::Mat& uc); + ncnn::Mat sampler_img2img(int seed, int step, ncnn::Mat& c, ncnn::Mat& uc, vector& init); private: void generate_param(int height, int width); @@ -29,6 +30,10 @@ class DiffusionSlover private: float log_sigmas[1000] = { 0 }; + const float guidance_scale = 7.5; + const float strength = 0.75; + + const float factor[4] = { 0.18215f, 0.18215f, 0.18215f, 0.18215f }; ncnn::Net net; diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/encoder_slover.cpp b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/encoder_slover.cpp new file mode 100644 index 0000000..88b8d5f --- /dev/null +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/encoder_slover.cpp @@ -0,0 +1,71 @@ +#include "encoder_slover.h" + +EncodeSlover::EncodeSlover(int h, int w) +{ + net.opt.use_vulkan_compute = false; + net.opt.use_winograd_convolution = false; + net.opt.use_sgemm_convolution = false; + net.opt.use_fp16_packed = false; + net.opt.use_fp16_storage = false; + net.opt.use_fp16_arithmetic = false; + net.opt.use_bf16_storage = true; + net.opt.use_packing_layout = true; + + if (h == 512 && w == 512) + net.load_param("assets/AutoencoderKL-encoder-512-512-fp16.param"); + else + { + generate_param(h, w); + net.load_param(("assets/tmp-AutoencoderKL-encoder-" + to_string(h) + "-" + to_string(w) + "-fp16.param").c_str()); + } + net.load_model("assets/AutoencoderKL-encoder-512-512-fp16.bin"); + + h_size = h; + w_size = w; +} + +void EncodeSlover::generate_param(int height, int width) +{ + string line; + ifstream encoder_file("assets/AutoencoderKL-encoder-512-512-fp16.param"); + ofstream encoder_file_new("assets/tmp-AutoencoderKL-encoder-" + std::to_string(height) + "-" + std::to_string(width) + "-fp16.param"); + + int cnt = 0; + while (getline(encoder_file, line)) + { + if (line.substr(0, 7) == "Reshape") + { + switch (cnt) + { + case 0: line = line.substr(0, line.size() - 12) + "0=" + to_string(width * height / 8 / 8) + " 1=512"; break; + case 1: line = line.substr(0, line.size() - 15) + "0=" + to_string(width / 8) + " 1=" + std::to_string(height / 8) + " 2=512"; break; + default: break; + } + + cnt++; + } + encoder_file_new << line << endl; + } + encoder_file_new.close(); + encoder_file.close(); +} + +std::vector EncodeSlover::encode(cv::Mat& bgr_image) +{ + std::vector mean_std(2); + { + int ih = bgr_image.rows, iw = bgr_image.cols; + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr_image.data, ncnn::Mat::PIXEL_BGR2RGB, iw, ih, w_size, h_size); + in.substract_mean_normalize(_mean_, _norm_); + + { + ncnn::Extractor ex = net.create_extractor(); + ex.set_light_mode(true); + ex.input("in0", in); + ex.extract("out0", mean_std[0]); + ex.extract("out1", mean_std[1]); + } + } + + return mean_std; +} \ No newline at end of file diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/encoder_slover.h b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/encoder_slover.h new file mode 100644 index 0000000..2fae44b --- /dev/null +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/encoder_slover.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +class EncodeSlover +{ +public: + EncodeSlover(int h, int w); + + std::vector encode(cv::Mat& image); + +private: + void generate_param(int height, int width); + + const float _mean_[3] = { 127.5f, 127.5f, 127.5f }; + const float _norm_[3] = { 1.0 / 127.5f, 1.0 / 127.5f, 1.0 / 127.5f }; + + ncnn::Net net; + + int h_size, w_size; +}; \ No newline at end of file diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/init.jpg b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/init.jpg new file mode 100644 index 0000000..f0900ee Binary files /dev/null and b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/init.jpg differ diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/magic.txt b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/magic.txt index ffe3977..8996217 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/magic.txt +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/magic.txt @@ -1,7 +1,8 @@ 256 256 1 -15 +50 42 +init.jpg floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful ((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text \ No newline at end of file diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.cpp b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.cpp index 4bc7fcc..6258b32 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.cpp +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.cpp @@ -7,18 +7,22 @@ #include #include #include -#include "prompt_slover.h" -#include "decoder_slover.h" -#include "diffusion_slover.h" #include #include #include #include "getmem.h" + +#include "prompt_slover.h" +#include "encoder_slover.h" +#include "diffusion_slover.h" +#include "decoder_slover.h" + using namespace std; int main() { int height, width, mode, step, seed; + string init_image; string positive_prompt, negative_prompt; // default setting @@ -27,6 +31,7 @@ int main() mode = 0; step = 15; seed = 42; + init_image = ""; positive_prompt = "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful"; negative_prompt = "((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text"; @@ -39,7 +44,7 @@ int main() else { string content = ""; int i = 0; - for (i = 0; i < 7; i++) { + for (i = 0; i < 8; i++) { if (getline(magic, content)) { switch (i) { @@ -48,8 +53,9 @@ int main() case 2:mode = stoi(content); case 3:step = stoi(content); case 4:seed = stoi(content); - case 5:positive_prompt = content; - case 6:negative_prompt = content; + case 5:init_image = content; + case 6:positive_prompt = content; + case 7:negative_prompt = content; default:break; } } @@ -57,7 +63,7 @@ int main() break; } } - if (i != 7) { + if (i != 8) { cout << "magic.txt has wrong format, please fix it" << endl; return 0; } @@ -69,32 +75,45 @@ int main() magic.close(); // stable diffusion - cout << "----------------[init]--------------------"; + cout << "----------------[ init ]----------------"; PromptSlover prompt_slover; DiffusionSlover diffusion_slover(height, width, mode); DecodeSlover decode_slover(height, width); + EncodeSlover encode_slover(height, width); printf(" %.2lfG / %.2lfG\n", getCurrentRSS() / 1024.0 / 1024.0 / 1024.0, getPeakRSS() / 1024.0 / 1024.0 / 1024.0); - cout << "----------------[prompt]------------------"; + cout << "----------------[ prompt ]----------------"; ncnn::Mat cond = prompt_slover.get_conditioning(positive_prompt); ncnn::Mat uncond = prompt_slover.get_conditioning(negative_prompt); printf(" %.2lfG / %.2lfG\n", getCurrentRSS() / 1024.0 / 1024.0 / 1024.0, getPeakRSS() / 1024.0 / 1024.0 / 1024.0); - cout << "----------------[diffusion]---------------" << endl; - ncnn::Mat sample = diffusion_slover.sampler(seed, step, cond, uncond); - cout << "----------------[diffusion]---------------"; + vector init_latents; + cv::Mat img = cv::imread(init_image); + if (!img.empty()) { + cout << "----------------[ encoder ]----------------"; + init_latents = encode_slover.encode(img); + printf(" %.2lfG / %.2lfG\n", getCurrentRSS() / 1024.0 / 1024.0 / 1024.0, getPeakRSS() / 1024.0 / 1024.0 / 1024.0); + } + + cout << "----------------[diffusion]----------------" << endl; + ncnn::Mat sample; + if (!img.empty()) { + sample = diffusion_slover.sampler_img2img(seed, step, cond, uncond, init_latents); + } + else { + sample = diffusion_slover.sampler_txt2img(seed, step, cond, uncond); + } + cout << "----------------[diffusion]----------------"; printf(" %.2lfG / %.2lfG\n", getCurrentRSS() / 1024.0 / 1024.0 / 1024.0, getPeakRSS() / 1024.0 / 1024.0 / 1024.0); - cout << "----------------[decode]------------------"; + cout << "----------------[ decoder ]----------------"; ncnn::Mat x_samples_ddim = decode_slover.decode(sample); printf(" %.2lfG / %.2lfG\n", getCurrentRSS() / 1024.0 / 1024.0 / 1024.0, getPeakRSS() / 1024.0 / 1024.0 / 1024.0); - cout << "----------------[save]--------------------" << endl; + cout << "----------------[ save ]----------------" << endl; cv::Mat image(height, width, CV_8UC3); x_samples_ddim.to_pixels(image.data, ncnn::Mat::PIXEL_RGB2BGR); cv::imwrite("result_" + to_string(step) + "_" + to_string(seed) + "_" + to_string(height) + "x" + to_string(width) + ".png", image); - cout << "----------------[close]-------------------" << endl; - return 0; } diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj index 1bc2040..d40fffd 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj @@ -148,12 +148,14 @@ + + diff --git a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj.filters b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj.filters index 94d8524..0ef9a52 100644 --- a/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj.filters +++ b/x86/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo/vs2019_opencv-mobile_ncnn-dll_demo.vcxproj.filters @@ -27,6 +27,9 @@ 源文件 + + 源文件 + @@ -41,5 +44,8 @@ 头文件 + + 头文件 + \ No newline at end of file