-
Notifications
You must be signed in to change notification settings - Fork 852
TS-4916: Fix for an H2-infinite-loop deadlock #1100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -881,11 +881,10 @@ Http2ConnectionState::create_stream(Http2StreamId new_id) | |
|
|
||
| Http2Stream *new_stream = THREAD_ALLOC_INIT(http2StreamAllocator, this_ethread()); | ||
| new_stream->init(new_id, client_settings.get(HTTP2_SETTINGS_INITIAL_WINDOW_SIZE)); | ||
| stream_list.push(new_stream); | ||
| add_to_active_streams(new_stream); | ||
|
|
||
| latest_streamid = new_id; | ||
|
|
||
| ink_assert(client_streams_count < UINT32_MAX); | ||
| ++client_streams_count; | ||
| new_stream->set_parent(ua_session); | ||
| new_stream->mutex = ua_session->mutex; | ||
| ua_session->get_netvc()->add_to_active_queue(); | ||
|
|
@@ -899,6 +898,7 @@ Http2ConnectionState::find_stream(Http2StreamId id) const | |
| for (Http2Stream *s = stream_list.head; s; s = s->link.next) { | ||
| if (s->get_id() == id) | ||
| return s; | ||
| ink_assert(s != s->link.next); | ||
| } | ||
| return NULL; | ||
| } | ||
|
|
@@ -913,6 +913,7 @@ Http2ConnectionState::restart_streams() | |
| if (s->get_state() == HTTP2_STREAM_STATE_HALF_CLOSED_REMOTE && min(this->client_rwnd, s->client_rwnd) > 0) { | ||
| s->send_response_body(); | ||
| } | ||
| ink_assert(s != next); | ||
| s = next; | ||
| } | ||
| } | ||
|
|
@@ -924,10 +925,13 @@ Http2ConnectionState::cleanup_streams() | |
| while (s) { | ||
| Http2Stream *next = s->link.next; | ||
| this->delete_stream(s); | ||
| ink_assert(s != next); | ||
| s = next; | ||
| } | ||
|
|
||
| client_streams_count = 0; | ||
| ink_assert(0 == client_streams_count); | ||
| ink_assert(NULL == stream_list.head); | ||
|
|
||
| if (!is_state_closed()) { | ||
| ua_session->get_netvc()->add_to_keep_alive_queue(); | ||
| } | ||
|
|
@@ -936,30 +940,70 @@ Http2ConnectionState::cleanup_streams() | |
| void | ||
| Http2ConnectionState::delete_stream(Http2Stream *stream) | ||
| { | ||
| // The following check allows the method to be called safely on already deleted streams. | ||
| if (deleted_from_active_streams(stream)) { | ||
| return; | ||
| } | ||
|
|
||
| SCOPED_MUTEX_LOCK(lock, this->mutex, this_ethread()); | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need to grab a lock here? Given the mutex sharing between vc and session and ConnectionState, we should already be holding this mutex.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we are sure That would mean that at least in one thread was not holding the right lock. In that case would not that mean that “rearranging some of the stream_count book keeping” would rather hide the problem than to fix it? Trying to help based on the comment decided to trace few paths in the source code that may not be holding ConnectionState lock (theoretically) and grabbed the lock on the common path closest to the structures that needed protection (which based on my understanding should not be a problem if the thread is already holding the lock). Actually never noticed the race condition in my debugging so I am going to remove this line from the PR and I will consider limiting my future changes to the issue I am trying to fix. |
||
| if (Http2::stream_priority_enabled) { | ||
| DependencyTree::Node *node = stream->priority_node; | ||
| if (node != NULL && node->active) { | ||
| dependency_tree->deactivate(node, 0); | ||
| } | ||
| } | ||
|
|
||
| stream_list.remove(stream); | ||
| rm_from_active_streams(stream); | ||
| stream->initiating_close(); | ||
|
|
||
| ink_assert(client_streams_count > 0); | ||
| --client_streams_count; | ||
|
|
||
| if (client_streams_count == 0 && ua_session) { | ||
| ua_session->get_netvc()->add_to_keep_alive_queue(); | ||
| } | ||
| } | ||
|
|
||
| inline bool | ||
| Http2ConnectionState::deleted_from_active_streams(Http2Stream *stream) | ||
| { | ||
| // Is is a deleted DLL<> element ? | ||
| return !stream_list.in(stream); | ||
| } | ||
|
|
||
| void | ||
| Http2ConnectionState::add_to_active_streams(Http2Stream *stream) | ||
| { | ||
| // Make sure that we are not breaking the DLL<> structure. | ||
| ink_assert(NULL != stream); | ||
| ink_assert(!stream_list.in(stream)); | ||
| ink_assert(this->mutex->thread_holding == this_ethread()); | ||
|
|
||
| stream_list.push(stream); | ||
|
|
||
| ink_assert(client_streams_count < UINT32_MAX); | ||
| ++client_streams_count; | ||
| } | ||
|
|
||
| void | ||
| Http2ConnectionState::rm_from_active_streams(Http2Stream *stream) | ||
| { | ||
| // Make sure that we are not breaking the DLL<> structure. | ||
| ink_assert(NULL != stream); | ||
| ink_assert(stream_list.in(stream)); | ||
| ink_assert(this->mutex->thread_holding == this_ethread()); | ||
|
|
||
| stream_list.remove(stream); | ||
|
|
||
| ink_assert(client_streams_count > 0); | ||
| --client_streams_count; | ||
| } | ||
|
|
||
| void | ||
| Http2ConnectionState::update_initial_rwnd(Http2WindowSize new_size) | ||
| { | ||
| // Update stream level window sizes | ||
| for (Http2Stream *s = stream_list.head; s; s = s->link.next) { | ||
| s->client_rwnd = new_size - (client_settings.get(HTTP2_SETTINGS_INITIAL_WINDOW_SIZE) - s->client_rwnd); | ||
| ink_assert(s != s->link.next); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1097,6 +1141,7 @@ void | |
| Http2ConnectionState::send_data_frames(Http2Stream *stream) | ||
| { | ||
| if (stream->get_state() == HTTP2_STREAM_STATE_CLOSED) { | ||
| this->delete_stream(stream); | ||
| return; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Were we leaking a stream without a delete call?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I may have overdone it here. I thought I would add this deletion because few lines below there is deleting if state == |
||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -267,10 +267,12 @@ Http2Stream::do_io_close(int /* flags */) | |
| // Make sure any trailing end of stream frames are sent | ||
| // Ourselve will be removed at send_data_frames or closing connection phase | ||
| static_cast<Http2ClientSession *>(parent)->connection_state.send_data_frames(this); | ||
|
|
||
| // Make sure the stream is deleted at this point since next step is self destroy. | ||
| this->delete_stream(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It isn't clear to me that this delete_stream is necessary either. The parent is notified to release the stream via the "static_cast<Http2ClientSession *>(parent)->connection_state.release_stream(this);" in the Http2Stream handler. But that might be just 7.0. There are some fixes in this area that are marked for backport to 6.2.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what actually made sure we delete the stream from the In the version 6.2.1 we have Then in a later version we added What caused the destroying of It seemed to me we have been always vulnerable to this problem despite the fixes. That is why I thought I would add this “catch-all-delete-stream” line here for all the current and future missing states. After this point we are going to |
||
| } | ||
|
|
||
| parent = NULL; | ||
| // Check to see if the stream is in the closed state | ||
| ink_assert(get_state() == HTTP2_STREAM_STATE_CLOSED); | ||
|
|
||
| clear_timers(); | ||
| clear_io_events(); | ||
|
|
@@ -558,6 +560,14 @@ Http2Stream::reenable(VIO *vio) | |
| } | ||
| } | ||
|
|
||
| void | ||
| Http2Stream::delete_stream() | ||
| { | ||
| if (parent) { | ||
| static_cast<Http2ClientSession *>(parent)->connection_state.delete_stream(this); | ||
| } | ||
| } | ||
|
|
||
| void | ||
| Http2Stream::destroy() | ||
| { | ||
|
|
@@ -591,6 +601,14 @@ Http2Stream::destroy() | |
| } | ||
| chunked_handler.clear(); | ||
| super::destroy(); | ||
|
|
||
| // Current Http2ConnectionState implementation uses a memory pool for instantiating streams and DLL<> stream_list for storing | ||
| // active streams. Destroying a stream before deleting it from stream_list and then creating a new one + reusing the same chunk | ||
| // from the memory pool right away always leads to destroying the DLL structure (deadlocks, inconsistencies). | ||
| // The following is meant as a safety net since the consequences are disastrous. Until the design/implementation changes it seems | ||
| // less error prone to (double) delete before destroying (noop if already deleted). | ||
| this->delete_stream(); | ||
|
|
||
| THREAD_FREE(this, http2StreamAllocator, this_ethread()); | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, added this check to 7.0 via the TS-4813 fix. Though as a direct "if (!stream_list.in(stream))" check.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, needed that for calling
delete_stream()on already deleted streams for my “catch-all-delete-stream” calls (6.2.1).Also grouped some of the
DLL<>+client_streams_countupdates in separate functions.Looking at how the master changed since I started working on this I may need to revert that to be consistent with master.