@@ -69,7 +69,8 @@ void generic_moe_gemm_kernelLauncher(const T* A,
6969 cudaStream_t stream,
7070 int * kernel_occupancy = nullptr ) {
7171 if (gemm_config.split_k_style != SplitKStyle::NO_SPLIT_K) {
72- PADDLE_FATAL (" [MoeGemm] Grouped gemm does not support split-k" );
72+ throw std::runtime_error (
73+ " [MoeGemm] Grouped gemm does not support split-k" );
7374 }
7475
7576#ifdef PADDLE_CUDA_BF16
@@ -169,7 +170,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
169170 int occupancy = std::min (2 , GemmGrouped::maximum_active_blocks ());
170171
171172 if (occupancy == 0 ) {
172- PADDLE_FATAL (
173+ throw std::runtime_error (
173174 " [MoE Runner] GPU lacks the shared memory resources to run "
174175 " GroupedGEMM kernel" );
175176 }
@@ -197,7 +198,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
197198 if (can_implement != cutlass::Status::kSuccess ) {
198199 std::string err_msg = " MoEFC kernel will fail for params. Error: " +
199200 std::string (cutlassGetStatusString (can_implement));
200- PADDLE_FATAL (" [MoE Runner] " + err_msg);
201+ throw std::runtime_error (" [MoE Runner] " + err_msg);
201202 }
202203
203204 auto init_status = gemm.initialize (args);
@@ -243,7 +244,7 @@ struct dispatch_stages {
243244 std::string err_msg = " Cutlass fpA_intB gemm. Not instantiates for arch " +
244245 std::to_string (arch::kMinComputeCapability ) +
245246 " with stages set to " + std::to_string (Stages);
246- PADDLE_FATAL (" [dispatch_stages::dispatch] " + err_msg);
247+ throw std::runtime_error (" [dispatch_stages::dispatch] " + err_msg);
247248 }
248249};
249250
@@ -394,7 +395,8 @@ void dispatch_gemm_config(const T* A,
394395 default :
395396 std::string err_msg = " dispatch_gemm_config does not support stages " +
396397 std::to_string (gemm_config.stages );
397- PADDLE_FATAL (" [MoE][dispatch_gemm_config] " + err_msg);
398+ throw std::runtime_error (
399+ " [MoE][dispatch_gemm_config] " + err_msg);
398400 break ;
399401 }
400402}
@@ -452,15 +454,16 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
452454 dispatch_gemm_config_macro (64 , 128 , 64 , 32 , 64 , 64 );
453455 dispatch_gemm_config_macro (128 , 128 , 64 , 64 , 32 , 64 );
454456 case CutlassTileConfig::Undefined:
455- PADDLE_FATAL (" [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
457+ throw std::runtime_error (
458+ " [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
456459 break ;
457460 case CutlassTileConfig::ChooseWithHeuristic:
458- PADDLE_FATAL (
461+ throw std::runtime_error (
459462 " [dispatch_moe_gemm_to_cutlass] gemm config should have "
460463 " already been set by heuristic." );
461464 break ;
462465 default :
463- PADDLE_FATAL (
466+ throw std::runtime_error (
464467 " [dispatch_moe_gemm_to_cutlass] Config is invalid for same "
465468 " type MoE tensorop GEMM." );
466469 break ;
@@ -497,38 +500,44 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
497500 dispatch_gemm_config_macro (32 , 128 , 64 , 32 , 32 , 64 );
498501 dispatch_gemm_config_macro (64 , 128 , 64 , 64 , 64 , 64 );
499502 case CutlassTileConfig::Undefined:
500- PADDLE_FATAL (" [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
503+ throw std::runtime_error (
504+ " [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
501505 break ;
502506 case CutlassTileConfig::ChooseWithHeuristic:
503- PADDLE_FATAL (
507+ throw std::runtime_error (
504508 " [dispatch_moe_gemm_to_cutlass] gemm config should have "
505509 " already been set by heuristic." );
506510 break ;
507511 default :
508- PADDLE_FATAL (
512+ throw std::runtime_error (
509513 " [dispatch_moe_gemm_to_cutlass] Config is invalid for "
510514 " mixed type tensorop GEMM." );
511515 break ;
512516 }
513517 } else {
514518 switch (gemm_config.tile_config ) {
515519 dispatch_gemm_config_macro (16 , 128 , 64 , 16 , 32 , 64 );
520+ dispatch_gemm_config_macro (16 , 256 , 64 , 16 , 64 , 64 );
521+ dispatch_gemm_config_macro (64 , 64 , 64 , 32 , 32 , 64 );
516522 dispatch_gemm_config_macro (32 , 128 , 64 , 32 , 32 , 64 );
523+ dispatch_gemm_config_macro (128 , 64 , 64 , 64 , 32 , 64 );
517524 dispatch_gemm_config_macro (64 , 128 , 64 , 64 , 64 , 64 );
518525 dispatch_gemm_config_macro (128 , 128 , 64 , 64 , 64 , 64 );
519526 dispatch_gemm_config_macro (128 , 128 , 64 , 128 , 32 , 64 );
520527 dispatch_gemm_config_macro (128 , 256 , 64 , 64 , 64 , 64 );
521528 dispatch_gemm_config_macro (64 , 128 , 64 , 64 , 32 , 64 );
529+ dispatch_gemm_config_macro (256 , 128 , 64 , 64 , 64 , 64 );
522530 case CutlassTileConfig::Undefined:
523- PADDLE_FATAL (" [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
531+ throw std::runtime_error (
532+ " [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
524533 break ;
525534 case CutlassTileConfig::ChooseWithHeuristic:
526- PADDLE_FATAL (
535+ throw std::runtime_error (
527536 " [dispatch_moe_gemm_to_cutlass] gemm config should have "
528537 " already been set by heuristic." );
529538 break ;
530539 default :
531- PADDLE_FATAL (
540+ throw std::runtime_error (
532541 " [dispatch_moe_gemm_to_cutlass] Config is invalid for "
533542 " mixed type tensorop GEMM." );
534543 break ;
@@ -561,17 +570,17 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
561570 switch (gemm_config.tile_config ) {
562571 dispatch_gemm_config_macro (128 , 128 , 8 , 64 , 64 , 8 );
563572 case CutlassTileConfig::Undefined:
564- PADDLE_FATAL (
573+ throw std::runtime_error (
565574 " [dispatch_moe_gemm_to_cutlass][SIMT] gemm config "
566575 " undefined." );
567576 break ;
568577 case CutlassTileConfig::ChooseWithHeuristic:
569- PADDLE_FATAL (
578+ throw std::runtime_error (
570579 " [dispatch_moe_gemm_to_cutlass][SIMT] gemm config should "
571580 " have already been set by heuristic." );
572581 break ;
573582 default :
574- PADDLE_FATAL (
583+ throw std::runtime_error (
575584 " [dispatch_moe_gemm_to_cutlass][SIMT] Unsupported config "
576585 " for float MoE gemm." );
577586 break ;
0 commit comments