diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md
index c0255375b388..6056c277b1f5 100644
--- a/docs/developer-guide/operators.md
+++ b/docs/developer-guide/operators.md
@@ -30,8 +30,10 @@
 * [Dropout](#dropout)
 * [Eltwise](#eltwise)
 * [ELU](#elu)
+* [Embed](#embed)
 * [Exp](#exp)
 * [Flatten](#flatten)
+* [Fold](#fold)
 * [GELU](#gelu)
 * [GLU](#glu)
 * [Gemm](#gemm)
@@ -84,6 +86,7 @@
 * [Threshold](#threshold)
 * [Tile](#tile)
 * [UnaryOp](#unaryop)
+* [Unfold](#unfold)
 
 # AbsVal
 ```
@@ -474,12 +477,15 @@ y = crop(x)
 | --------- | ------------- | ----- | --------- | ----------------- |
 | 0         | woffset       | int   | 0         |                   |
 | 1         | hoffset       | int   | 0         |                   |
-| 2         | coffset       | int   | 1         |                   |
-| 3         | outw          | int   | 1         |                   |
+| 13        | doffset       | int   | 0         |                   |
+| 2         | coffset       | int   | 0         |                   |
+| 3         | outw          | int   | 0         |                   |
 | 4         | outh          | int   | 0         |                   |
+| 14        | outd          | int   | 0         |                   |
 | 5         | outc          | int   | 0         |                   |
 | 6         | woffset2      | int   | 0         |                   |
-| 7         | hoffset2      | int   | 1         |                   |
+| 7         | hoffset2      | int   | 0         |                   |
+| 15        | doffset2      | int   | 0         |                   |
 | 8         | coffset2      | int   | 0         |                   |
 | 9         | starts        | array | [ ]       |                   |
 | 10        | ends          | array | [ ]       |                   |
@@ -819,6 +825,23 @@ else        y = x
 | --------- | ------------- | ----- | --------- | ----------------- |
 | 0         | alpha         | float | 0.1f      |                   |
 
+# Embed
+```
+y = embedding(x)
+```
+
+| param id  | name          | type  | default   | description       |
+| --------- | ------------- | ----- | --------- | ----------------- |
+| 0         | num_output    | int   | 0         |                   |
+| 1         | input_dim     | int   | 0         |                   |
+| 2         | bias_term     | int   | 0         |                   |
+| 3         | weight_data_size | int | 0        |                   |
+
+| weight        | type  | shape                 |
+| ------------- | ----- | --------------------- |
+| weight_data   | float | [weight_data_size]    |
+| bias_term     | float | [num_output]          |
+
 # Exp
 ```
 if base == -1   y = exp(shift + x * scale)
@@ -839,6 +862,29 @@ Reshape blob to 1 dimension
 
 * one_blob_only
 
+# Fold
+```
+y = fold(x)
+```
+
+* one_blob_only
+
+| param id  | name          | type  | default   | description       |
+| --------- | ------------- | ----- | --------- | ----------------- |
+| 0         | num_output    | int   | 0         |                   |
+| 1         | kernel_w      | int   | 0         |                   |
+| 2         | dilation_w    | int   | 1         |                   |
+| 3         | stride_w      | int   | 1         |                   |
+| 4         | pad_left      | int   | 0         |                   |
+| 11        | kernel_h      | int   | kernel_w  |                   |
+| 12        | dilation_h    | int   | dilation_w |                  |
+| 13        | stride_h      | int   | stride_w  |                   |
+| 14        | pad_top       | int   | pad_left  |                   |
+| 15        | pad_right     | int   | pad_left  |                   |
+| 16        | pad_bottom    | int   | pad_top   |                   |
+| 20        | output_w      | int   | 0         |                   |
+| 21        | output_h      | int   | output_w  |                   |
+
 # GELU
 ```
 if fast_gelu == 1   y = 0.5 * x * (1 + tanh(0.79788452 * (x + 0.044715 * x * x * x)));
@@ -1187,6 +1233,7 @@ y = data
 | 1         | h             | int   | 0         |                   |
 | 11        | d             | int   | 0         |                   |
 | 2         | c             | int   | 0         |                   |
+| 21        | load_type     | int   | 1         | 1=fp32            |
 
 | weight        | type  | shape                 |
 | ------------- | ----- | --------------------- |
@@ -1537,6 +1584,7 @@ y = reduce_op(x * coeff)
 | 2         | coeff         | float | 1.f       |                   |
 | 3         | axes          | array | [ ]       |                   |
 | 4         | keepdims      | int   | 0         |                   |
+| 5         | fixbug0       | int   | 0         | hack for bug fix, should be 1 |
 
 Operation type:
 - 0 = SUM
@@ -1829,3 +1877,24 @@ Operation type:
 - 17 = LOG10
 - 18 = ROUND
 - 19 = TRUNC
+
+# Unfold
+```
+y = unfold(x)
+```
+
+* one_blob_only
+
+| param id  | name          | type  | default   | description       |
+| --------- | ------------- | ----- | --------- | ----------------- |
+| 0         | num_output    | int   | 0         |                   |
+| 1         | kernel_w      | int   | 0         |                   |
+| 2         | dilation_w    | int   | 1         |                   |
+| 3         | stride_w      | int   | 1         |                   |
+| 4         | pad_left      | int   | 0         |                   |
+| 11        | kernel_h      | int   | kernel_w  |                   |
+| 12        | dilation_h    | int   | dilation_w |                  |
+| 13        | stride_h      | int   | stride_w  |                   |
+| 14        | pad_top       | int   | pad_left  |                   |
+| 15        | pad_right     | int   | pad_left  |                   |
+| 16        | pad_bottom    | int   | pad_top   |                   |
diff --git a/tools/modelwriter.h b/tools/modelwriter.h
index fd5105e612fe..64c3549d6882 100644
--- a/tools/modelwriter.h
+++ b/tools/modelwriter.h
@@ -32,6 +32,7 @@
 #include "layer/batchnorm.h"
 #include "layer/bias.h"
 #include "layer/binaryop.h"
+#include "layer/celu.h"
 #include "layer/clip.h"
 #include "layer/concat.h"
 #include "layer/convolution.h"
@@ -51,6 +52,7 @@
 #include "layer/deconvolutiondepthwise3d.h"
 #include "layer/deformableconv2d.h"
 #include "layer/detectionoutput.h"
+#include "layer/diag.h"
 #include "layer/dropout.h"
 #include "layer/eltwise.h"
 #include "layer/elu.h"
@@ -835,6 +837,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             fprintf_param_value(" 1=%d", with_scalar)
             fprintf_param_value(" 2=%e", b)
         }
+        else if (layer->type == "CELU")
+        {
+            ncnn::CELU* op = (ncnn::CELU*)layer;
+            ncnn::CELU* op_default = (ncnn::CELU*)layer_default;
+
+            fprintf_param_value(" 0=%e", alpha)
+        }
         else if (layer->type == "Clip")
         {
             ncnn::Clip* op = (ncnn::Clip*)layer;
@@ -888,18 +897,21 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             }
             fprintf_param_value(" 19=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
 
 #if NCNN_INT8
-            // write int8_scale data
-            if (op->int8_scale_term)
-            {
-                fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
-                fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
-                fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
-            }
+                // write int8_scale data
+                if (op->int8_scale_term)
+                {
+                    fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
+                    fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
+                    fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
+                }
 #endif // NCNN_INT8
+            }
 
             if (shape_ready)
             {
@@ -931,9 +943,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
             }
+            fprintf_param_value(" 19=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
+            }
 
             if (shape_ready)
             {
@@ -1040,32 +1056,35 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             }
             fprintf_param_value(" 19=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
 
 #if NCNN_INT8
-            // write int8_scale data
-            if (op->int8_scale_term == 1 || op->int8_scale_term == 101)
-            {
-                op->bottom_blob_int8_scales.w = 1;
-            }
-            if (op->int8_scale_term == 2 || op->int8_scale_term == 102)
-            {
-                op->weight_data_int8_scales.w = 1;
-                op->bottom_blob_int8_scales.w = 1;
-            }
-            if (op->int8_scale_term > 100)
-            {
-                op->top_blob_int8_scales.w = 1;
-            }
+                // write int8_scale data
+                if (op->int8_scale_term == 1 || op->int8_scale_term == 101)
+                {
+                    op->bottom_blob_int8_scales.w = 1;
+                }
+                if (op->int8_scale_term == 2 || op->int8_scale_term == 102)
+                {
+                    op->weight_data_int8_scales.w = 1;
+                    op->bottom_blob_int8_scales.w = 1;
+                }
+                if (op->int8_scale_term > 100)
+                {
+                    op->top_blob_int8_scales.w = 1;
+                }
 
-            if (op->int8_scale_term)
-            {
-                fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
-                fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
-                fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
-            }
+                if (op->int8_scale_term)
+                {
+                    fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
+                    fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
+                    fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
+                }
 #endif // NCNN_INT8
+            }
 
             if (shape_ready)
             {
@@ -1098,9 +1117,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
             }
+            fprintf_param_value(" 19=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
+            }
 
             if (shape_ready)
             {
@@ -1261,9 +1284,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
             }
+            fprintf_param_value(" 28=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
+            }
 
             if (shape_ready)
             {
@@ -1296,9 +1323,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
             }
+            fprintf_param_value(" 28=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
+            }
 
             if (shape_ready)
             {
@@ -1418,9 +1449,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
             }
+            fprintf_param_value(" 28=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
+            }
 
             if (shape_ready)
             {
@@ -1454,9 +1489,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             {
                 if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
             }
+            fprintf_param_value(" 28=%d", dynamic_weight)
 
-            fwrite_weight_tag_data(op->weight_data, bp);
-            fwrite_weight_data(op->bias_data, bp);
+            if (dynamic_weight == 0)
+            {
+                fwrite_weight_tag_data(op->weight_data, bp);
+                fwrite_weight_data(op->bias_data, bp);
+            }
 
             if (shape_ready)
             {
@@ -1597,6 +1636,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
             fprintf_param_value(" 7=%e", variances[2])
             fprintf_param_value(" 8=%e", variances[3])
         }
+        else if (layer->type == "Diag")
+        {
+            ncnn::Diag* op = (ncnn::Diag*)layer;
+            ncnn::Diag* op_default = (ncnn::Diag*)layer_default;
+
+            fprintf_param_value(" 0=%d", diagonal)
+        }
         else if (layer->type == "Dropout")
         {
             ncnn::Dropout* op = (ncnn::Dropout*)layer;