-
Notifications
You must be signed in to change notification settings - Fork 409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix mpp hang error if some error happens during compile of mpp plan in TiFlash #1533
Merged
ti-srebot
merged 12 commits into
pingcap:master
from
windtalker:mpp_hangs_for_complex_sql
Mar 11, 2021
Merged
Changes from 7 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
25fc741
fix mpp query hang if some error happens
windtalker ff68e3e
add ut
windtalker b0abeb3
revert unused change
windtalker ee75903
Merge branch 'master' of https://github.com/pingcap/tics into mpp_han…
windtalker 6e89bad
Merge branch 'master' into mpp_hangs_for_complex_sql
windtalker 84ac264
address comments
windtalker 15773a7
Merge branch 'master' of github.com:pingcap/tics into mpp_hangs_for_c…
windtalker 8db7a7e
address comments
windtalker 42cb7c4
Merge branch 'master' into mpp_hangs_for_complex_sql
windtalker 2748388
Merge branch 'master' of github.com:pingcap/tics into mpp_hangs_for_c…
windtalker 3499d4e
Merge branch 'mpp_hangs_for_complex_sql' of github.com:windtalker/tic…
windtalker 14752c0
Merge branch 'master' into mpp_hangs_for_complex_sql
windtalker File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,7 +67,7 @@ struct MPPTunnel | |
} | ||
catch (...) | ||
{ | ||
LOG_WARNING(log, "Error in destructor function of MPPTunnel"); | ||
tryLogCurrentException(log, "Error in destructor function of MPPTunnel"); | ||
} | ||
} | ||
|
||
|
@@ -126,6 +126,18 @@ struct MPPTunnel | |
cv_for_finished.notify_all(); | ||
} | ||
|
||
/// finish the tunnel without checking the connect status, this function | ||
/// should only be used when handling error if DispatchMPPTask fails for | ||
/// root task. Because for root task, if DispatchMPPTask fails, TiDB does | ||
/// not sending establish MPP connection request at all, it is meaningless | ||
/// to check the connect status in this case, just finish the tunnel. | ||
void finish() | ||
{ | ||
std::unique_lock<std::mutex> lk(mu); | ||
finished = true; | ||
cv_for_finished.notify_all(); | ||
} | ||
|
||
// a MPPConn request has arrived. it will build connection by this tunnel; | ||
void connect(::grpc::ServerWriter<::mpp::MPPDataPacket> * writer_) | ||
{ | ||
|
@@ -256,7 +268,7 @@ struct MPPTask : std::enable_shared_from_this<MPPTask>, private boost::noncopyab | |
// which targeted task we should send data by which tunnel. | ||
std::map<MPPTaskId, MPPTunnelPtr> tunnel_map; | ||
|
||
MPPTaskManager * manager; | ||
MPPTaskManager * manager = nullptr; | ||
|
||
Logger * log; | ||
|
||
|
@@ -279,6 +291,20 @@ struct MPPTask : std::enable_shared_from_this<MPPTask>, private boost::noncopyab | |
|
||
void cancel(); | ||
|
||
void finishAllTunnel() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto. |
||
{ | ||
try | ||
{ | ||
for (auto & it : tunnel_map) | ||
{ | ||
it.second->finish(); | ||
} | ||
} | ||
catch (...) | ||
{ | ||
tryLogCurrentException(log, "Failed to finish all tunnels"); | ||
} | ||
} | ||
void writeErrToAllTunnel(const String & e) | ||
{ | ||
try | ||
|
@@ -294,7 +320,7 @@ struct MPPTask : std::enable_shared_from_this<MPPTask>, private boost::noncopyab | |
} | ||
catch (...) | ||
{ | ||
LOG_WARNING(log, "Failed to write error " + e + " to all tunnel"); | ||
tryLogCurrentException(log, "Failed to write error " + e + " to all tunnels"); | ||
} | ||
} | ||
|
||
|
@@ -516,6 +542,7 @@ class MPPHandler | |
public: | ||
MPPHandler(const mpp::DispatchTaskRequest & task_request_) : task_request(task_request_), log(&Logger::get("MPPHandler")) {} | ||
grpc::Status execute(Context & context, mpp::DispatchTaskResponse * response); | ||
void handleError(MPPTaskPtr task, String error); | ||
}; | ||
|
||
} // namespace DB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# Preparation. | ||
=> DBGInvoke __init_fail_point() | ||
|
||
mysql> drop table if exists test.t | ||
mysql> create table test.t (id int, value varchar(64)) | ||
mysql> insert into test.t values(1,'a'),(2,'b'),(3,'c') | ||
mysql> alter table test.t set tiflash replica 1 | ||
|
||
func> wait_table test t | ||
|
||
|
||
# Data. | ||
|
||
## exception before mpp register non root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_non_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_non_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_non_root_mpp_task) | ||
|
||
## exception before mpp register root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_root_mpp_task) | ||
|
||
## exception before mpp register tunnel for non root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_tunnel_for_non_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_tunnel_for_non_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_tunnel_for_non_root_mpp_task) | ||
|
||
## exception before mpp register tunnel for root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_register_tunnel_for_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_register_tunnel_for_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_register_tunnel_for_root_mpp_task) | ||
|
||
## exception during mpp register tunnel for non root mpp task | ||
=> DBGInvoke __enable_fail_point(exception_during_mpp_register_tunnel_for_non_root_mpp_task) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_during_mpp_register_tunnel_for_non_root_mpp_task is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_during_mpp_register_tunnel_for_non_root_mpp_task) | ||
|
||
## exception before mpp run non root task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_non_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_non_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_non_root_task_run) | ||
|
||
## exception before mpp run root task | ||
=> DBGInvoke __enable_fail_point(exception_before_mpp_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: DB::Exception: Fail point FailPoints::exception_before_mpp_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_before_mpp_root_task_run) | ||
|
||
## exception during mpp run non root task | ||
=> DBGInvoke __enable_fail_point(exception_during_mpp_non_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: exchange receiver meet error : DB::Exception: Fail point FailPoints::exception_during_mpp_non_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_during_mpp_non_root_task_run) | ||
|
||
## exception during mpp run root task | ||
=> DBGInvoke __enable_fail_point(exception_during_mpp_root_task_run) | ||
mysql> use test; set @@tidb_isolation_read_engines='tiflash'; set @@tidb_allow_mpp=1; select count(value), id from t group by id; | ||
ERROR 1105 (HY000) at line 1: other error for mpp stream: DB::Exception: Fail point FailPoints::exception_during_mpp_root_task_run is triggered. | ||
=> DBGInvoke __disable_fail_point(exception_during_mpp_root_task_run) | ||
|
||
# Clean up. | ||
mysql> drop table if exists test.t |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about use a more critical word like
destroy
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, I will use
close