diff --git a/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml b/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml
index d1275ad3..7ecb323c 100644
--- a/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml
+++ b/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml
@@ -106,7 +106,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml b/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml
index 4388e299..94533438 100644
--- a/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml
+++ b/configs/official/coco2017/yoshitomo-matsubara/rrpr2020/ghnd-custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml
@@ -106,7 +106,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/at-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/at-resnet18_from_resnet34.yaml
index 418988a2..a1061a5a 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/at-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/at-resnet18_from_resnet34.yaml
@@ -95,7 +95,7 @@ train:
       milestones: [30, 60, 90]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/crd-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/crd-resnet18_from_resnet34.yaml
index bb8b3642..7ee7c995 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/crd-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/crd-resnet18_from_resnet34.yaml
@@ -121,7 +121,7 @@ train:
       milestones: [30, 60, 90]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/cse_l2-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/cse_l2-resnet18_from_resnet34.yaml
index 89ed4f52..0c2c3791 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/cse_l2-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/cse_l2-resnet18_from_resnet34.yaml
@@ -95,7 +95,7 @@ train:
       milestones: [30, 60]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/ft-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/ft-resnet18_from_resnet34.yaml
index ee1a6657..9cd5bf85 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/ft-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/ft-resnet18_from_resnet34.yaml
@@ -109,7 +109,7 @@ train:
         momentum: 0.9
         weight_decay: 0.0001
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -183,7 +183,7 @@ train:
         milestones: [30, 60, 90]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'CrossEntropyLoss'
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/kd-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/kd-resnet18_from_resnet34.yaml
index 2d65fd5e..0932ae8e 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/kd-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/kd-resnet18_from_resnet34.yaml
@@ -89,7 +89,7 @@ train:
       milestones: [30, 60, 90]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/pad_l2-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/pad_l2-resnet18_from_resnet34.yaml
index 13b47ea0..156ab716 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/pad_l2-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/pad_l2-resnet18_from_resnet34.yaml
@@ -104,7 +104,7 @@ train:
         milestones: [30]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'CrossEntropyLoss'
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/sskd-resnet18_from_resnet34.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/sskd-resnet18_from_resnet34.yaml
index bf55fd4b..d8199b7d 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/sskd-resnet18_from_resnet34.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/sskd-resnet18_from_resnet34.yaml
@@ -111,7 +111,7 @@ train:
         milestones: [10, 20]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -171,7 +171,7 @@ train:
         milestones: [30, 60, 90]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
diff --git a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/tfkd-resnet18_from_resnet18.yaml b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/tfkd-resnet18_from_resnet18.yaml
index bb3287a8..0fe87130 100644
--- a/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/tfkd-resnet18_from_resnet18.yaml
+++ b/configs/official/ilsvrc2012/yoshitomo-matsubara/rrpr2020/tfkd-resnet18_from_resnet18.yaml
@@ -89,7 +89,7 @@ train:
       milestones: [30, 60, 80]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar10/ce/densenet_bc_k12_depth100-final_run.yaml b/configs/sample/cifar10/ce/densenet_bc_k12_depth100-final_run.yaml
index 7943c2ca..dba9ab61 100644
--- a/configs/sample/cifar10/ce/densenet_bc_k12_depth100-final_run.yaml
+++ b/configs/sample/cifar10/ce/densenet_bc_k12_depth100-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml
index 6455e1bf..f56a1a77 100644
--- a/configs/sample/cifar10/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/densenet_bc_k24_depth250-final_run.yaml b/configs/sample/cifar10/ce/densenet_bc_k24_depth250-final_run.yaml
index e21d8e70..899b0a8c 100644
--- a/configs/sample/cifar10/ce/densenet_bc_k24_depth250-final_run.yaml
+++ b/configs/sample/cifar10/ce/densenet_bc_k24_depth250-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml
index 45485b20..34dcc4f5 100644
--- a/configs/sample/cifar10/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/densenet_bc_k40_depth190-final_run.yaml b/configs/sample/cifar10/ce/densenet_bc_k40_depth190-final_run.yaml
index adf42395..1b0d92ed 100644
--- a/configs/sample/cifar10/ce/densenet_bc_k40_depth190-final_run.yaml
+++ b/configs/sample/cifar10/ce/densenet_bc_k40_depth190-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml
index c8ba95cb..5a6411e4 100644
--- a/configs/sample/cifar10/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet110-final_run.yaml b/configs/sample/cifar10/ce/resnet110-final_run.yaml
index d1c30767..bf7447a5 100644
--- a/configs/sample/cifar10/ce/resnet110-final_run.yaml
+++ b/configs/sample/cifar10/ce/resnet110-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet110-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/resnet110-hyperparameter_tuning.yaml
index 39ab443f..6fb16aed 100644
--- a/configs/sample/cifar10/ce/resnet110-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/resnet110-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet1202-final_run.yaml b/configs/sample/cifar10/ce/resnet1202-final_run.yaml
index 46a4e6e2..4160d308 100644
--- a/configs/sample/cifar10/ce/resnet1202-final_run.yaml
+++ b/configs/sample/cifar10/ce/resnet1202-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet1202-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/resnet1202-hyperparameter_tuning.yaml
index 7bfbe96d..cd3d01c6 100644
--- a/configs/sample/cifar10/ce/resnet1202-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/resnet1202-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet20-final_run.yaml b/configs/sample/cifar10/ce/resnet20-final_run.yaml
index 5b978844..e2083a86 100644
--- a/configs/sample/cifar10/ce/resnet20-final_run.yaml
+++ b/configs/sample/cifar10/ce/resnet20-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet20-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/resnet20-hyperparameter_tuning.yaml
index e09401d1..a4717b2a 100644
--- a/configs/sample/cifar10/ce/resnet20-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/resnet20-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet32-final_run.yaml b/configs/sample/cifar10/ce/resnet32-final_run.yaml
index 0b6db83b..b223c637 100644
--- a/configs/sample/cifar10/ce/resnet32-final_run.yaml
+++ b/configs/sample/cifar10/ce/resnet32-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet32-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/resnet32-hyperparameter_tuning.yaml
index 3d1772ac..088e59a6 100644
--- a/configs/sample/cifar10/ce/resnet32-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/resnet32-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet44-final_run.yaml b/configs/sample/cifar10/ce/resnet44-final_run.yaml
index d425bde8..e45b7c1b 100644
--- a/configs/sample/cifar10/ce/resnet44-final_run.yaml
+++ b/configs/sample/cifar10/ce/resnet44-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet44-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/resnet44-hyperparameter_tuning.yaml
index 57334acb..b1c7154e 100644
--- a/configs/sample/cifar10/ce/resnet44-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/resnet44-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet56-final_run.yaml b/configs/sample/cifar10/ce/resnet56-final_run.yaml
index 474b9444..1451b40f 100644
--- a/configs/sample/cifar10/ce/resnet56-final_run.yaml
+++ b/configs/sample/cifar10/ce/resnet56-final_run.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/resnet56-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/resnet56-hyperparameter_tuning.yaml
index a10a9757..5dad576a 100644
--- a/configs/sample/cifar10/ce/resnet56-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/resnet56-hyperparameter_tuning.yaml
@@ -84,7 +84,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/wide_resnet16_8-final_run.yaml b/configs/sample/cifar10/ce/wide_resnet16_8-final_run.yaml
index 80f8e5e4..2076bc35 100644
--- a/configs/sample/cifar10/ce/wide_resnet16_8-final_run.yaml
+++ b/configs/sample/cifar10/ce/wide_resnet16_8-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/wide_resnet16_8-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/wide_resnet16_8-hyperparameter_tuning.yaml
index cae0cfdf..4202ce6f 100644
--- a/configs/sample/cifar10/ce/wide_resnet16_8-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/wide_resnet16_8-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/wide_resnet28_10-final_run.yaml b/configs/sample/cifar10/ce/wide_resnet28_10-final_run.yaml
index 791ce7b5..cd48be0f 100644
--- a/configs/sample/cifar10/ce/wide_resnet28_10-final_run.yaml
+++ b/configs/sample/cifar10/ce/wide_resnet28_10-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/wide_resnet28_10-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/wide_resnet28_10-hyperparameter_tuning.yaml
index 437ad081..ac6cb8e6 100644
--- a/configs/sample/cifar10/ce/wide_resnet28_10-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/wide_resnet28_10-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/wide_resnet40_4-final_run.yaml b/configs/sample/cifar10/ce/wide_resnet40_4-final_run.yaml
index a2f14c08..faf76ae9 100644
--- a/configs/sample/cifar10/ce/wide_resnet40_4-final_run.yaml
+++ b/configs/sample/cifar10/ce/wide_resnet40_4-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/ce/wide_resnet40_4-hyperparameter_tuning.yaml b/configs/sample/cifar10/ce/wide_resnet40_4-hyperparameter_tuning.yaml
index 0bcb2449..a1c991ab 100644
--- a/configs/sample/cifar10/ce/wide_resnet40_4-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/ce/wide_resnet40_4-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-final_run.yaml b/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-final_run.yaml
index 2816ba98..32342ba7 100644
--- a/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-final_run.yaml
+++ b/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-final_run.yaml
@@ -97,7 +97,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-hyperparameter_tuning.yaml b/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-hyperparameter_tuning.yaml
index 3d2b5dae..bd0df2b0 100644
--- a/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/kd/resnet20_from_densenet_bc_k12_depth100-hyperparameter_tuning.yaml
@@ -97,7 +97,7 @@ train:
       milestones: [91, 136]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml b/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml
index 0d0617c1..4c93dee4 100644
--- a/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml
+++ b/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml
@@ -101,7 +101,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml b/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml
index 0c222b6e..9df8a512 100644
--- a/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar10/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml
@@ -101,7 +101,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar100/ce/densenet_bc_k12_depth100-final_run.yaml b/configs/sample/cifar100/ce/densenet_bc_k12_depth100-final_run.yaml
index dfc04682..1ed4ae79 100644
--- a/configs/sample/cifar100/ce/densenet_bc_k12_depth100-final_run.yaml
+++ b/configs/sample/cifar100/ce/densenet_bc_k12_depth100-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml b/configs/sample/cifar100/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml
index d139b523..fc297644 100644
--- a/configs/sample/cifar100/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/ce/densenet_bc_k12_depth100-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/densenet_bc_k24_depth250-final_run.yaml b/configs/sample/cifar100/ce/densenet_bc_k24_depth250-final_run.yaml
index ed9673e7..893d81b3 100644
--- a/configs/sample/cifar100/ce/densenet_bc_k24_depth250-final_run.yaml
+++ b/configs/sample/cifar100/ce/densenet_bc_k24_depth250-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml b/configs/sample/cifar100/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml
index 912842b4..a0683ade 100644
--- a/configs/sample/cifar100/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/ce/densenet_bc_k24_depth250-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/densenet_bc_k40_depth190-final_run.yaml b/configs/sample/cifar100/ce/densenet_bc_k40_depth190-final_run.yaml
index 6755ae58..8c81ff79 100644
--- a/configs/sample/cifar100/ce/densenet_bc_k40_depth190-final_run.yaml
+++ b/configs/sample/cifar100/ce/densenet_bc_k40_depth190-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml b/configs/sample/cifar100/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml
index 009e3009..872d2eb8 100644
--- a/configs/sample/cifar100/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/ce/densenet_bc_k40_depth190-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/wide_resnet16_8-final_run.yaml b/configs/sample/cifar100/ce/wide_resnet16_8-final_run.yaml
index 9bfe230f..99b7192f 100644
--- a/configs/sample/cifar100/ce/wide_resnet16_8-final_run.yaml
+++ b/configs/sample/cifar100/ce/wide_resnet16_8-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/wide_resnet16_8-hyperparameter_tuning.yaml b/configs/sample/cifar100/ce/wide_resnet16_8-hyperparameter_tuning.yaml
index 35c2895f..7088d15f 100644
--- a/configs/sample/cifar100/ce/wide_resnet16_8-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/ce/wide_resnet16_8-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/wide_resnet28_10-final_run.yaml b/configs/sample/cifar100/ce/wide_resnet28_10-final_run.yaml
index 45fccbcb..98c09a46 100644
--- a/configs/sample/cifar100/ce/wide_resnet28_10-final_run.yaml
+++ b/configs/sample/cifar100/ce/wide_resnet28_10-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/wide_resnet28_10-hyperparameter_tuning.yaml b/configs/sample/cifar100/ce/wide_resnet28_10-hyperparameter_tuning.yaml
index f07936a9..b77502d8 100644
--- a/configs/sample/cifar100/ce/wide_resnet28_10-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/ce/wide_resnet28_10-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/wide_resnet40_4-final_run.yaml b/configs/sample/cifar100/ce/wide_resnet40_4-final_run.yaml
index e09b81f3..a72d6b5e 100644
--- a/configs/sample/cifar100/ce/wide_resnet40_4-final_run.yaml
+++ b/configs/sample/cifar100/ce/wide_resnet40_4-final_run.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/ce/wide_resnet40_4-hyperparameter_tuning.yaml b/configs/sample/cifar100/ce/wide_resnet40_4-hyperparameter_tuning.yaml
index 66173855..2b18f071 100644
--- a/configs/sample/cifar100/ce/wide_resnet40_4-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/ce/wide_resnet40_4-hyperparameter_tuning.yaml
@@ -85,7 +85,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-final_run.yaml b/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-final_run.yaml
index 09afdb75..0b44408f 100644
--- a/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-final_run.yaml
+++ b/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-final_run.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-hyperparameter_tuning.yaml b/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-hyperparameter_tuning.yaml
index 28bec404..ed4806c9 100644
--- a/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/kd/densenet_bc_k12_depth100_from_wide_resnet28_10-hyperparameter_tuning.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [150, 225]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml b/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml
index b99ec484..88fab2f2 100644
--- a/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml
+++ b/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-final_run.yaml
@@ -101,7 +101,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml b/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml
index e47934a7..4f49b3d4 100644
--- a/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml
+++ b/configs/sample/cifar100/kd/wide_resnet40_1_from_wide_resnet40_4-hyperparameter_tuning.yaml
@@ -101,7 +101,7 @@ train:
       milestones: [60, 120, 160]
       gamma: 0.2
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/coco2017/multi_stage/ft/custom_fasterrcnn_resnet18_fpn_from_fasterrcnn_resnet50_fpn.yaml b/configs/sample/coco2017/multi_stage/ft/custom_fasterrcnn_resnet18_fpn_from_fasterrcnn_resnet50_fpn.yaml
index dfcab62f..cf42c333 100644
--- a/configs/sample/coco2017/multi_stage/ft/custom_fasterrcnn_resnet18_fpn_from_fasterrcnn_resnet50_fpn.yaml
+++ b/configs/sample/coco2017/multi_stage/ft/custom_fasterrcnn_resnet18_fpn_from_fasterrcnn_resnet50_fpn.yaml
@@ -108,7 +108,7 @@ train:
         momentum: 0.9
         weight_decay: 0.0005
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -178,7 +178,7 @@ train:
         milestones: [5, 10]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       func2extract_org_loss: 'extract_org_loss_dict'
       org_term:
         factor: 1.0
diff --git a/configs/sample/coco2017/multi_stage/ktaad/lraspp_mobilenet_v3_large_from_deeplabv3_resnet50.yaml b/configs/sample/coco2017/multi_stage/ktaad/lraspp_mobilenet_v3_large_from_deeplabv3_resnet50.yaml
index 158e8b45..8c9e6572 100644
--- a/configs/sample/coco2017/multi_stage/ktaad/lraspp_mobilenet_v3_large_from_deeplabv3_resnet50.yaml
+++ b/configs/sample/coco2017/multi_stage/ktaad/lraspp_mobilenet_v3_large_from_deeplabv3_resnet50.yaml
@@ -120,7 +120,7 @@ train:
         momentum: 0.9
         weight_decay: 0.0001
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -214,7 +214,7 @@ train:
         power: 0.9
       scheduling_step: 1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       func2extract_org_loss: 'extract_simple_org_loss_dict'
       org_term:
         criterion:
diff --git a/configs/sample/coco2017/single_stage/ce/deeplabv3_resnet50.yaml b/configs/sample/coco2017/single_stage/ce/deeplabv3_resnet50.yaml
index f2c56d7e..b3ba9860 100644
--- a/configs/sample/coco2017/single_stage/ce/deeplabv3_resnet50.yaml
+++ b/configs/sample/coco2017/single_stage/ce/deeplabv3_resnet50.yaml
@@ -102,7 +102,7 @@ train:
       power: 0.9
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_simple_org_loss_dict'
     org_term:
       criterion:
diff --git a/configs/sample/coco2017/single_stage/ghnd/custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml b/configs/sample/coco2017/single_stage/ghnd/custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml
index fd69ba45..4dd4ae52 100644
--- a/configs/sample/coco2017/single_stage/ghnd/custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml
+++ b/configs/sample/coco2017/single_stage/ghnd/custom_fasterrcnn_resnet50_fpn_from_fasterrcnn_resnet50_fpn.yaml
@@ -106,7 +106,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/coco2017/single_stage/ghnd/custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml b/configs/sample/coco2017/single_stage/ghnd/custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml
index 83be9f07..2c677c28 100644
--- a/configs/sample/coco2017/single_stage/ghnd/custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml
+++ b/configs/sample/coco2017/single_stage/ghnd/custom_maskrcnn_resnet50_fpn_from_maskrcnn_resnet50_fpn.yaml
@@ -106,7 +106,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/glue/cola/ce/bert_base_uncased.yaml b/configs/sample/glue/cola/ce/bert_base_uncased.yaml
index 253dc1a8..63235474 100644
--- a/configs/sample/glue/cola/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/cola/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/cola/ce/bert_large_uncased.yaml b/configs/sample/glue/cola/ce/bert_large_uncased.yaml
index 78ed4485..c9f06c81 100644
--- a/configs/sample/glue/cola/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/cola/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/cola/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/cola/kd/bert_base_uncased_from_bert_large_uncased.yaml
index c6493057..db435b3a 100644
--- a/configs/sample/glue/cola/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/cola/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/mnli/ce/bert_base_uncased.yaml b/configs/sample/glue/mnli/ce/bert_base_uncased.yaml
index d71e99aa..ca97603a 100644
--- a/configs/sample/glue/mnli/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/mnli/ce/bert_base_uncased.yaml
@@ -76,7 +76,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/mnli/ce/bert_large_uncased.yaml b/configs/sample/glue/mnli/ce/bert_large_uncased.yaml
index db4b265d..3256b87d 100644
--- a/configs/sample/glue/mnli/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/mnli/ce/bert_large_uncased.yaml
@@ -76,7 +76,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/mnli/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/mnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
index 3fa7a898..ef756ffe 100644
--- a/configs/sample/glue/mnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/mnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -95,7 +95,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/mrpc/ce/bert_base_uncased.yaml b/configs/sample/glue/mrpc/ce/bert_base_uncased.yaml
index 18411503..3c77449e 100644
--- a/configs/sample/glue/mrpc/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/mrpc/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/mrpc/ce/bert_large_uncased.yaml b/configs/sample/glue/mrpc/ce/bert_large_uncased.yaml
index 0e6666da..ed0fb8ce 100644
--- a/configs/sample/glue/mrpc/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/mrpc/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/mrpc/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/mrpc/kd/bert_base_uncased_from_bert_large_uncased.yaml
index 209f8ac3..11d2b951 100644
--- a/configs/sample/glue/mrpc/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/mrpc/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/qnli/ce/bert_base_uncased.yaml b/configs/sample/glue/qnli/ce/bert_base_uncased.yaml
index 1ea11ff1..21c80030 100644
--- a/configs/sample/glue/qnli/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/qnli/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/qnli/ce/bert_large_uncased.yaml b/configs/sample/glue/qnli/ce/bert_large_uncased.yaml
index d33979b3..4dcb3e90 100644
--- a/configs/sample/glue/qnli/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/qnli/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/qnli/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/qnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
index d455070d..5f9fe1ff 100644
--- a/configs/sample/glue/qnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/qnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/qqp/ce/bert_base_uncased.yaml b/configs/sample/glue/qqp/ce/bert_base_uncased.yaml
index 3221fb26..5ff75429 100644
--- a/configs/sample/glue/qqp/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/qqp/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/qqp/ce/bert_large_uncased.yaml b/configs/sample/glue/qqp/ce/bert_large_uncased.yaml
index a83b59df..5b3cd5a6 100644
--- a/configs/sample/glue/qqp/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/qqp/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/qqp/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/qqp/kd/bert_base_uncased_from_bert_large_uncased.yaml
index cebcec78..854448b3 100644
--- a/configs/sample/glue/qqp/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/qqp/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/rte/ce/bert_base_uncased.yaml b/configs/sample/glue/rte/ce/bert_base_uncased.yaml
index 09474b39..5f1dc7f6 100644
--- a/configs/sample/glue/rte/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/rte/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/rte/ce/bert_large_uncased.yaml b/configs/sample/glue/rte/ce/bert_large_uncased.yaml
index a0ba45b4..6a3d29ec 100644
--- a/configs/sample/glue/rte/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/rte/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/rte/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/rte/kd/bert_base_uncased_from_bert_large_uncased.yaml
index f98da790..d57d658b 100644
--- a/configs/sample/glue/rte/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/rte/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/sst2/ce/bert_base_uncased.yaml b/configs/sample/glue/sst2/ce/bert_base_uncased.yaml
index 756ce570..5cd3c8d3 100644
--- a/configs/sample/glue/sst2/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/sst2/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/sst2/ce/bert_large_uncased.yaml b/configs/sample/glue/sst2/ce/bert_large_uncased.yaml
index 18abf0e2..d1bc956d 100644
--- a/configs/sample/glue/sst2/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/sst2/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/sst2/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/sst2/kd/bert_base_uncased_from_bert_large_uncased.yaml
index f9712893..68bdf459 100644
--- a/configs/sample/glue/sst2/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/sst2/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/glue/stsb/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/stsb/kd/bert_base_uncased_from_bert_large_uncased.yaml
index 86194fc9..ad5f248e 100644
--- a/configs/sample/glue/stsb/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/stsb/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -91,7 +91,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/stsb/mse/bert_base_uncased.yaml b/configs/sample/glue/stsb/mse/bert_base_uncased.yaml
index f7e97c6c..446266d3 100644
--- a/configs/sample/glue/stsb/mse/bert_base_uncased.yaml
+++ b/configs/sample/glue/stsb/mse/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/stsb/mse/bert_large_uncased.yaml b/configs/sample/glue/stsb/mse/bert_large_uncased.yaml
index 93d98aba..d217a1ee 100644
--- a/configs/sample/glue/stsb/mse/bert_large_uncased.yaml
+++ b/configs/sample/glue/stsb/mse/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/wnli/ce/bert_base_uncased.yaml b/configs/sample/glue/wnli/ce/bert_base_uncased.yaml
index d653ceb6..2879c05c 100644
--- a/configs/sample/glue/wnli/ce/bert_base_uncased.yaml
+++ b/configs/sample/glue/wnli/ce/bert_base_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/wnli/ce/bert_large_uncased.yaml b/configs/sample/glue/wnli/ce/bert_large_uncased.yaml
index e86dc312..7a9acb9d 100644
--- a/configs/sample/glue/wnli/ce/bert_large_uncased.yaml
+++ b/configs/sample/glue/wnli/ce/bert_large_uncased.yaml
@@ -66,7 +66,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_transformers_loss'
     org_term:
       factor: 1.0
diff --git a/configs/sample/glue/wnli/kd/bert_base_uncased_from_bert_large_uncased.yaml b/configs/sample/glue/wnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
index 2e9ed23e..c95719c1 100644
--- a/configs/sample/glue/wnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
+++ b/configs/sample/glue/wnli/kd/bert_base_uncased_from_bert_large_uncased.yaml
@@ -85,7 +85,7 @@ train:
       num_training_steps:
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss4Transformer'
diff --git a/configs/sample/ilsvrc2012/multi_stage/dab/resnet18_from_resnet50.yaml b/configs/sample/ilsvrc2012/multi_stage/dab/resnet18_from_resnet50.yaml
index a6365308..1d38d53c 100644
--- a/configs/sample/ilsvrc2012/multi_stage/dab/resnet18_from_resnet50.yaml
+++ b/configs/sample/ilsvrc2012/multi_stage/dab/resnet18_from_resnet50.yaml
@@ -143,7 +143,7 @@ train:
         momentum: 0.9
         weight_decay: 0.0001
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -211,7 +211,7 @@ train:
         milestones: [5, 10]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/multi_stage/fitnet/resnet18_from_resnet152.yaml b/configs/sample/ilsvrc2012/multi_stage/fitnet/resnet18_from_resnet152.yaml
index 994d1e1b..d90acd31 100644
--- a/configs/sample/ilsvrc2012/multi_stage/fitnet/resnet18_from_resnet152.yaml
+++ b/configs/sample/ilsvrc2012/multi_stage/fitnet/resnet18_from_resnet152.yaml
@@ -103,7 +103,7 @@ train:
         milestones: [3]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -146,7 +146,7 @@ train:
         milestones: [5, 10]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/multi_stage/fsp/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/multi_stage/fsp/resnet18_from_resnet34.yaml
index e123af01..0841d2da 100644
--- a/configs/sample/ilsvrc2012/multi_stage/fsp/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/multi_stage/fsp/resnet18_from_resnet34.yaml
@@ -94,7 +94,7 @@ train:
         milestones: [3]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -184,7 +184,7 @@ train:
         milestones: [5, 10]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/multi_stage/ft/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/multi_stage/ft/resnet18_from_resnet34.yaml
index 6ceb0d51..c73225db 100644
--- a/configs/sample/ilsvrc2012/multi_stage/ft/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/multi_stage/ft/resnet18_from_resnet34.yaml
@@ -109,7 +109,7 @@ train:
         momentum: 0.9
         weight_decay: 0.0005
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -177,7 +177,7 @@ train:
         milestones: [5, 10]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/multi_stage/pad/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/multi_stage/pad/resnet18_from_resnet34.yaml
index 45434d40..fc7e1295 100644
--- a/configs/sample/ilsvrc2012/multi_stage/pad/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/multi_stage/pad/resnet18_from_resnet34.yaml
@@ -96,7 +96,7 @@ train:
         milestones: [5]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'CrossEntropyLoss'
@@ -142,7 +142,7 @@ train:
     optimizer: *optimizer
     scheduler: *scheduler
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         criterion:
           type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/multi_stage/sskd/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/multi_stage/sskd/resnet18_from_resnet34.yaml
index bfcbcb33..87594ef1 100644
--- a/configs/sample/ilsvrc2012/multi_stage/sskd/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/multi_stage/sskd/resnet18_from_resnet34.yaml
@@ -111,7 +111,7 @@ train:
         milestones: [10, 20]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
@@ -171,7 +171,7 @@ train:
         milestones: [30, 60, 90]
         gamma: 0.1
     criterion:
-      type: 'GeneralizedCustomLoss'
+      type: 'WeightedSumLoss'
       org_term:
         factor: 0.0
       sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/at/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/single_stage/at/resnet18_from_resnet34.yaml
index 52b5c43a..ca349516 100644
--- a/configs/sample/ilsvrc2012/single_stage/at/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/at/resnet18_from_resnet34.yaml
@@ -95,7 +95,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/cckd/resnet18_from_resnet50.yaml b/configs/sample/ilsvrc2012/single_stage/cckd/resnet18_from_resnet50.yaml
index f9a64016..b2e73ae7 100644
--- a/configs/sample/ilsvrc2012/single_stage/cckd/resnet18_from_resnet50.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/cckd/resnet18_from_resnet50.yaml
@@ -116,7 +116,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/crd/resnet18_from_resnet50.yaml b/configs/sample/ilsvrc2012/single_stage/crd/resnet18_from_resnet50.yaml
index 6d356374..fff29795 100644
--- a/configs/sample/ilsvrc2012/single_stage/crd/resnet18_from_resnet50.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/crd/resnet18_from_resnet50.yaml
@@ -121,7 +121,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/cse_l2/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/single_stage/cse_l2/resnet18_from_resnet34.yaml
index 6d2b2db9..8029b2dd 100644
--- a/configs/sample/ilsvrc2012/single_stage/cse_l2/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/cse_l2/resnet18_from_resnet34.yaml
@@ -95,7 +95,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet169_from_densenet169.yaml b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet169_from_densenet169.yaml
index df91c7b0..2d5c7288 100644
--- a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet169_from_densenet169.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet169_from_densenet169.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet201_from_densenet201.yaml b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet201_from_densenet201.yaml
index 54837a50..e0bb0ac2 100644
--- a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet201_from_densenet201.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_densenet201_from_densenet201.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_inception_v3_from_inception_v3.yaml b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_inception_v3_from_inception_v3.yaml
index 4f19b608..e992770e 100644
--- a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_inception_v3_from_inception_v3.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_inception_v3_from_inception_v3.yaml
@@ -109,7 +109,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_resnet152_from_resnet152.yaml b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_resnet152_from_resnet152.yaml
index 8a3d900b..85260544 100644
--- a/configs/sample/ilsvrc2012/single_stage/ghnd/custom_resnet152_from_resnet152.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/ghnd/custom_resnet152_from_resnet152.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet169_from_densenet169.yaml b/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet169_from_densenet169.yaml
index 6a245666..399b4fdf 100644
--- a/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet169_from_densenet169.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet169_from_densenet169.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet201_from_densenet201.yaml b/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet201_from_densenet201.yaml
index 28b63114..0928dbe9 100644
--- a/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet201_from_densenet201.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/hnd/custom_densenet201_from_densenet201.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/hnd/custom_inception_v3_from_inception_v3.yaml b/configs/sample/ilsvrc2012/single_stage/hnd/custom_inception_v3_from_inception_v3.yaml
index c04a1dcf..c68e1778 100644
--- a/configs/sample/ilsvrc2012/single_stage/hnd/custom_inception_v3_from_inception_v3.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/hnd/custom_inception_v3_from_inception_v3.yaml
@@ -109,7 +109,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/hnd/custom_resnet152_from_resnet152.yaml b/configs/sample/ilsvrc2012/single_stage/hnd/custom_resnet152_from_resnet152.yaml
index f37ac521..37ca401b 100644
--- a/configs/sample/ilsvrc2012/single_stage/hnd/custom_resnet152_from_resnet152.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/hnd/custom_resnet152_from_resnet152.yaml
@@ -98,7 +98,7 @@ train:
       milestones: [5, 10, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/kd/alexnet_from_resnet152.yaml b/configs/sample/ilsvrc2012/single_stage/kd/alexnet_from_resnet152.yaml
index c4421036..c79524d6 100644
--- a/configs/sample/ilsvrc2012/single_stage/kd/alexnet_from_resnet152.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/kd/alexnet_from_resnet152.yaml
@@ -89,7 +89,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/kd/resnet18_from_resnet152.yaml b/configs/sample/ilsvrc2012/single_stage/kd/resnet18_from_resnet152.yaml
index a0b5bd22..6008a5c8 100644
--- a/configs/sample/ilsvrc2012/single_stage/kd/resnet18_from_resnet152.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/kd/resnet18_from_resnet152.yaml
@@ -89,7 +89,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/kr/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/single_stage/kr/resnet18_from_resnet34.yaml
index 0f8fe202..f7481dca 100644
--- a/configs/sample/ilsvrc2012/single_stage/kr/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/kr/resnet18_from_resnet34.yaml
@@ -130,7 +130,7 @@ train:
       milestones: [30, 60, 90]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/pkt/resnet18_from_resnet152.yaml b/configs/sample/ilsvrc2012/single_stage/pkt/resnet18_from_resnet152.yaml
index f88b5888..a2d90171 100644
--- a/configs/sample/ilsvrc2012/single_stage/pkt/resnet18_from_resnet152.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/pkt/resnet18_from_resnet152.yaml
@@ -93,7 +93,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       factor: 0.0
     sub_terms:
diff --git a/configs/sample/ilsvrc2012/single_stage/rkd/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/single_stage/rkd/resnet18_from_resnet34.yaml
index 633032bc..46270be2 100644
--- a/configs/sample/ilsvrc2012/single_stage/rkd/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/rkd/resnet18_from_resnet34.yaml
@@ -94,7 +94,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/spkd/resnet18_from_resnet34.yaml b/configs/sample/ilsvrc2012/single_stage/spkd/resnet18_from_resnet34.yaml
index 85019153..6bece45a 100644
--- a/configs/sample/ilsvrc2012/single_stage/spkd/resnet18_from_resnet34.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/spkd/resnet18_from_resnet34.yaml
@@ -95,7 +95,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'CrossEntropyLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/tfkd/resnet18_from_resnet18.yaml b/configs/sample/ilsvrc2012/single_stage/tfkd/resnet18_from_resnet18.yaml
index 2afccee3..b3fd7162 100644
--- a/configs/sample/ilsvrc2012/single_stage/tfkd/resnet18_from_resnet18.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/tfkd/resnet18_from_resnet18.yaml
@@ -89,7 +89,7 @@ train:
       milestones: [5, 15]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/ilsvrc2012/single_stage/vid/resnet18_from_resnet50.yaml b/configs/sample/ilsvrc2012/single_stage/vid/resnet18_from_resnet50.yaml
index b64bf6b5..c7029511 100644
--- a/configs/sample/ilsvrc2012/single_stage/vid/resnet18_from_resnet50.yaml
+++ b/configs/sample/ilsvrc2012/single_stage/vid/resnet18_from_resnet50.yaml
@@ -131,7 +131,7 @@ train:
       milestones: [5, 10]
       gamma: 0.1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     org_term:
       criterion:
         type: 'KDLoss'
diff --git a/configs/sample/pascal_voc2012/ce/deeplabv3_resnet101.yaml b/configs/sample/pascal_voc2012/ce/deeplabv3_resnet101.yaml
index 54e7f07e..df3c6090 100644
--- a/configs/sample/pascal_voc2012/ce/deeplabv3_resnet101.yaml
+++ b/configs/sample/pascal_voc2012/ce/deeplabv3_resnet101.yaml
@@ -115,7 +115,7 @@ train:
       power: 0.9
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_simple_org_loss_dict'
     org_term:
       criterion:
diff --git a/configs/sample/pascal_voc2012/ce/deeplabv3_resnet50.yaml b/configs/sample/pascal_voc2012/ce/deeplabv3_resnet50.yaml
index 2f5e0c3b..3f27159d 100644
--- a/configs/sample/pascal_voc2012/ce/deeplabv3_resnet50.yaml
+++ b/configs/sample/pascal_voc2012/ce/deeplabv3_resnet50.yaml
@@ -115,7 +115,7 @@ train:
       power: 0.9
     scheduling_step: 1
   criterion:
-    type: 'GeneralizedCustomLoss'
+    type: 'WeightedSumLoss'
     func2extract_org_loss: 'extract_simple_org_loss_dict'
     org_term:
       criterion:
diff --git a/tests/registry_test.py b/tests/registry_test.py
index 0b0896ea..81367e0f 100644
--- a/tests/registry_test.py
+++ b/tests/registry_test.py
@@ -7,10 +7,10 @@
     register_sample_loader_func, register_batch_sampler, register_transform, register_dataset_wrapper, \
     DATASET_DICT, COLLATE_FUNC_DICT, SAMPLE_LOADER_CLASS_DICT, SAMPLE_LOADER_FUNC_DICT, BATCH_SAMPLER_DICT, \
     TRANSFORM_DICT, DATASET_WRAPPER_DICT
-from torchdistill.losses.registry import register_high_level_loss, CUSTOM_LOSS_DICT, register_loss_wrapper, \
+from torchdistill.losses.registry import register_high_level_loss, HIGH_LEVEL_LOSS_DICT, register_loss_wrapper, \
     register_single_loss, LOSS_WRAPPER_DICT, SINGLE_LOSS_DICT, register_func2extract_org_output, \
     FUNC2EXTRACT_ORG_OUTPUT_DICT
-from torchdistill.models.registry import get_model, register_adaptation_module, ADAPTATION_MODULE_DICT, \
+from torchdistill.models.registry import register_adaptation_module, ADAPTATION_MODULE_DICT, \
     register_model_class, register_model_func, MODEL_CLASS_DICT, MODEL_FUNC_DICT, register_auxiliary_model_wrapper, \
     AUXILIARY_MODEL_WRAPPER_DICT
 from torchdistill.optim.registry import register_optimizer, register_scheduler, OPTIM_DICT, SCHEDULER_DICT
@@ -197,14 +197,14 @@ class TestCustomLoss0(object):
             def __init__(self):
                 self.name = 'test0'
 
-        assert CUSTOM_LOSS_DICT['TestCustomLoss0'] == TestCustomLoss0
+        assert HIGH_LEVEL_LOSS_DICT['TestCustomLoss0'] == TestCustomLoss0
 
         @register_high_level_loss()
         class TestCustomLoss1(object):
             def __init__(self):
                 self.name = 'test1'
 
-        assert CUSTOM_LOSS_DICT['TestCustomLoss1'] == TestCustomLoss1
+        assert HIGH_LEVEL_LOSS_DICT['TestCustomLoss1'] == TestCustomLoss1
         random_name = 'custom_loss_class_name2'
 
         @register_high_level_loss(key=random_name)
@@ -212,7 +212,7 @@ class TestCustomLoss2(object):
             def __init__(self):
                 self.name = 'test2'
 
-        assert CUSTOM_LOSS_DICT[random_name] == TestCustomLoss2
+        assert HIGH_LEVEL_LOSS_DICT[random_name] == TestCustomLoss2
 
     def test_register_loss_wrapper_class(self):
         @register_loss_wrapper
diff --git a/torchdistill/core/distillation.py b/torchdistill/core/distillation.py
index e5130b73..9d8f21e5 100644
--- a/torchdistill/core/distillation.py
+++ b/torchdistill/core/distillation.py
@@ -18,7 +18,7 @@
 from ..common.module_util import check_if_wrapped, freeze_module_params, get_module, \
     unfreeze_module_params, get_updatable_param_names
 from ..datasets.util import build_data_loaders
-from ..losses.registry import get_custom_loss, get_single_loss, get_func2extract_org_output
+from ..losses.registry import get_high_level_loss, get_single_loss, get_func2extract_org_output
 from ..models.util import redesign_model
 from ..models.wrapper import AuxiliaryModelWrapper, build_auxiliary_model_wrapper
 from ..optim.registry import get_optimizer, get_scheduler
@@ -89,7 +89,7 @@ def setup_loss(self, train_config):
         org_criterion_config = org_term_config.get('criterion', dict()) if isinstance(org_term_config, dict) else None
         self.org_criterion = None if org_criterion_config is None or len(org_criterion_config) == 0 \
             else get_single_loss(org_criterion_config)
-        self.criterion = get_custom_loss(criterion_config)
+        self.criterion = get_high_level_loss(criterion_config)
         logger.info(self.criterion)
         self.extract_org_loss = get_func2extract_org_output(criterion_config.get('func2extract_org_loss', None))
 
diff --git a/torchdistill/core/forward_hook.py b/torchdistill/core/forward_hook.py
index fa05a5df..8359b9dc 100644
--- a/torchdistill/core/forward_hook.py
+++ b/torchdistill/core/forward_hook.py
@@ -1,7 +1,6 @@
 from collections import abc
 
 import torch
-from torch._six import string_classes
 from torch.nn.parallel.scatter_gather import gather
 
 from ..common.module_util import check_if_wrapped, get_module
@@ -21,7 +20,7 @@ def get_device_index(data):
             result = get_device_index(d)
             if result is not None:
                 return result
-    elif isinstance(data, abc.Sequence) and not isinstance(data, string_classes):
+    elif isinstance(data, abc.Sequence) and not isinstance(data, (list, tuple)):
         for d in data:
             result = get_device_index(d)
             if result is not None:
diff --git a/torchdistill/core/training.py b/torchdistill/core/training.py
index 39c34c0d..80a8044b 100644
--- a/torchdistill/core/training.py
+++ b/torchdistill/core/training.py
@@ -14,7 +14,7 @@
 from ..common.module_util import check_if_wrapped, freeze_module_params, get_module, \
     unfreeze_module_params, get_updatable_param_names
 from ..datasets.util import build_data_loaders
-from ..losses.registry import get_custom_loss, get_single_loss, get_func2extract_org_output
+from ..losses.registry import get_high_level_loss, get_single_loss, get_func2extract_org_output
 from ..models.util import redesign_model
 from ..models.wrapper import AuxiliaryModelWrapper, build_auxiliary_model_wrapper
 from ..optim.registry import get_optimizer, get_scheduler
@@ -64,7 +64,7 @@ def setup_loss(self, train_config):
         org_criterion_config = org_term_config.get('criterion', dict()) if isinstance(org_term_config, dict) else None
         self.org_criterion = None if org_criterion_config is None or len(org_criterion_config) == 0 \
             else get_single_loss(org_criterion_config)
-        self.criterion = get_custom_loss(criterion_config)
+        self.criterion = get_high_level_loss(criterion_config)
         logger.info(self.criterion)
         self.extract_org_loss = get_func2extract_org_output(criterion_config.get('func2extract_org_loss', None))
 
diff --git a/torchdistill/eval/coco.py b/torchdistill/eval/coco.py
index fee9e3e7..3b7ee51c 100644
--- a/torchdistill/eval/coco.py
+++ b/torchdistill/eval/coco.py
@@ -9,7 +9,6 @@
 import torch.distributed as dist
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
-from torch._six import string_classes
 
 from ..common.main_util import get_world_size
 
@@ -325,7 +324,7 @@ def loadRes(self, resFile):
 
     # print('Loading and preparing results...')
     # tic = time.time()
-    if isinstance(resFile, string_classes):
+    if isinstance(resFile, (list, tuple)):
         anns = json.load(open(resFile))
     elif type(resFile) == np.ndarray:
         anns = self.loadNumpyAnnotations(resFile)
diff --git a/torchdistill/losses/__init__.py b/torchdistill/losses/__init__.py
index 7edfeb20..4d720820 100644
--- a/torchdistill/losses/__init__.py
+++ b/torchdistill/losses/__init__.py
@@ -1 +1 @@
-from . import custom, single, util
+from . import high_level, single, util
diff --git a/torchdistill/losses/custom.py b/torchdistill/losses/high_level.py
similarity index 93%
rename from torchdistill/losses/custom.py
rename to torchdistill/losses/high_level.py
index 2c30adb0..2a671f7f 100644
--- a/torchdistill/losses/custom.py
+++ b/torchdistill/losses/high_level.py
@@ -1,12 +1,12 @@
 from torch import nn
 
-from .registry import register_custom_loss, get_single_loss
+from .registry import register_high_level_loss, get_single_loss
 from ..common.constant import def_logger
 
 logger = def_logger.getChild(__name__)
 
 
-class CustomLoss(nn.Module):
+class AbstractLoss(nn.Module):
     def __init__(self, criterion_config):
         super().__init__()
         term_dict = dict()
@@ -27,8 +27,8 @@ def __str__(self):
         return desc
 
 
-@register_custom_loss
-class GeneralizedCustomLoss(CustomLoss):
+@register_high_level_loss
+class WeightedSumLoss(AbstractLoss):
     def __init__(self, criterion_config):
         super().__init__(criterion_config)
         self.org_loss_factor = criterion_config['org_term'].get('factor', None)
diff --git a/torchdistill/losses/registry.py b/torchdistill/losses/registry.py
index 6aa71969..88068065 100644
--- a/torchdistill/losses/registry.py
+++ b/torchdistill/losses/registry.py
@@ -1,24 +1,24 @@
 from ..common import misc_util
 
 LOSS_DICT = misc_util.get_classes_as_dict('torch.nn.modules.loss')
-CUSTOM_LOSS_DICT = dict()
+HIGH_LEVEL_LOSS_DICT = dict()
 LOSS_WRAPPER_DICT = dict()
 SINGLE_LOSS_DICT = dict()
 FUNC2EXTRACT_ORG_OUTPUT_DICT = dict()
 
 
-def register_custom_loss(arg=None, **kwargs):
-    def _register_custom_loss(cls_or_func):
+def register_high_level_loss(arg=None, **kwargs):
+    def _register_high_level_loss(cls_or_func):
         key = kwargs.get('key')
         if key is None:
             key = cls_or_func.__name__
 
-        CUSTOM_LOSS_DICT[key] = cls_or_func
+        HIGH_LEVEL_LOSS_DICT[key] = cls_or_func
         return cls_or_func
 
     if callable(arg):
-        return _register_custom_loss(arg)
-    return _register_custom_loss
+        return _register_high_level_loss(arg)
+    return _register_high_level_loss
 
 
 def register_loss_wrapper(arg=None, **kwargs):
@@ -72,11 +72,11 @@ def get_loss(key, param_dict=None, **kwargs):
     raise ValueError('No loss `{}` registered'.format(key))
 
 
-def get_custom_loss(criterion_config):
+def get_high_level_loss(criterion_config):
     criterion_type = criterion_config['type']
-    if criterion_type in CUSTOM_LOSS_DICT:
-        return CUSTOM_LOSS_DICT[criterion_type](criterion_config)
-    raise ValueError('No custom loss `{}` registered'.format(criterion_type))
+    if criterion_type in HIGH_LEVEL_LOSS_DICT:
+        return HIGH_LEVEL_LOSS_DICT[criterion_type](criterion_config)
+    raise ValueError('No high-level loss `{}` registered'.format(criterion_type))
 
 
 def get_loss_wrapper(single_loss, params_config, wrapper_config):
diff --git a/torchdistill/losses/single.py b/torchdistill/losses/single.py
index 639144cc..df09e9cb 100644
--- a/torchdistill/losses/single.py
+++ b/torchdistill/losses/single.py
@@ -363,7 +363,7 @@ def forward(self, student_io_dict, teacher_io_dict, *args, **kwargs):
 class CCKDLoss(nn.Module):
     """
     "Correlation Congruence for Knowledge Distillation"
-    Configure KDLoss in a yaml file to meet eq. (7), using GeneralizedCustomLoss
+    Configure KDLoss in a yaml file to meet eq. (7), using WeightedSumLoss
     """
     def __init__(self, student_linear_path, teacher_linear_path, kernel_params, reduction, **kwargs):
         super().__init__()
diff --git a/torchdistill/models/custom/bottleneck/detection/rcnn.py b/torchdistill/models/custom/bottleneck/detection/rcnn.py
index 41aa4223..f75913a4 100644
--- a/torchdistill/models/custom/bottleneck/detection/rcnn.py
+++ b/torchdistill/models/custom/bottleneck/detection/rcnn.py
@@ -1,7 +1,7 @@
 from torch.hub import load_state_dict_from_url
-from torchvision.models.detection.faster_rcnn import FasterRCNN, model_urls as fasterrcnn_model_urls
-from torchvision.models.detection.keypoint_rcnn import KeypointRCNN, model_urls as keypointrcnn_model_urls
-from torchvision.models.detection.mask_rcnn import MaskRCNN, model_urls as maskrcnn_model_urls
+from torchvision.models.detection.faster_rcnn import FasterRCNN
+from torchvision.models.detection.keypoint_rcnn import KeypointRCNN
+from torchvision.models.detection.mask_rcnn import MaskRCNN
 from torchvision.ops import MultiScaleRoIAlign
 
 from .resnet_backbone import custom_resnet_fpn_backbone
@@ -28,7 +28,9 @@ def custom_fasterrcnn_resnet_fpn(backbone, pretrained=True, progress=True,
                                 output_size=7, sampling_ratio=2)
     model = FasterRCNN(backbone_model, num_classes, box_roi_pool=box_roi_pool, **kwargs)
     if pretrained and backbone_name.endswith('resnet50'):
-        state_dict = load_state_dict_from_url(fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress)
+        state_dict = \
+            load_state_dict_from_url('https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth',
+                                     progress=progress)
         model.load_state_dict(state_dict, strict=False)
     return model
 
@@ -56,7 +58,9 @@ def custom_maskrcnn_resnet_fpn(backbone, pretrained=True, progress=True,
                                 output_size=14, sampling_ratio=2)
     model = MaskRCNN(backbone_model, num_classes, box_roi_pool=box_roi_pool, mask_roi_pool=mask_roi_pool **kwargs)
     if pretrained and backbone_name.endswith('resnet50'):
-        state_dict = load_state_dict_from_url(maskrcnn_model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress)
+        state_dict = \
+            load_state_dict_from_url('https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth',
+                                     progress=progress)
         model.load_state_dict(state_dict, strict=False)
     return model
 
@@ -86,6 +90,7 @@ def custom_keypointrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num
                          keypoint_roi_pool=keypoint_roi_pool, **kwargs)
     if pretrained and backbone_name.endswith('resnet50'):
         state_dict = \
-            load_state_dict_from_url(keypointrcnn_model_urls['keypointrcnn_resnet50_fpn_coco'], progress=progress)
+            load_state_dict_from_url('https://download.pytorch.org/models/keypointrcnn_resnet50_fpn_coco-fc266e95.pth',
+                                     progress=progress)
         model.load_state_dict(state_dict, strict=False)
     return model