From cff5064d0b463a863d7e3a061514f9ff44a39eed Mon Sep 17 00:00:00 2001 From: Winlin Date: Mon, 28 Aug 2023 11:31:36 +0800 Subject: [PATCH] HLS: Fix on_hls and hls_dispose critical zone issue. v5.0.174 v6.0.69 (#3781) on_hls and hls_dispose are two coroutines, with potential race conditions. That is, during on_hls, if the API Server being accessed is slower, it will switch to the hls_dispose coroutine to start cleaning up. However, when the API Server is processing the slice, a situation may occur where the slice does not exist, resulting in the following log: ``` [2023-08-22 12:03:20.309][WARN][40][x5l48q7b][11] ignore task failed code=4005(HttpStatus)(Invalid HTTP status code) : callback on_hls http://localhost:2024/terraform/v1/hooks/srs/hls : http: post http://localhost:2024/terraform/v1/hooks/srs/hls with {"server_id":"vid-5d7dxn8","service_id":"cu153o7g","action":"on_hls","client_id":"x5l48q7b","ip":"172.17.0.1","vhost":"__defaultVhost__","app":"live","tcUrl":"srt://172.17.0.2/live","stream":"stream-44572-2739617660809856576","param":"secret=1ed8e0ffbc53439c8fc8da30ab8c19f0","duration":4.57,"cwd":"/usr/local/srs-stack/platform","file":"./objs/nginx/html/live/stream-44572-2739617660809856576-1.ts","url":"live/stream-44572-2739617660809856576-1.ts","m3u8":"./objs/nginx/html/live/stream-44572-2739617660809856576.m3u8","m3u8_url":"live/stream-44572-2739617660809856576.m3u8","seq_no":1,"stream_url":"/live/stream-44572-2739617660809856576","stream_id":"vid-0n9zoz3"}, status=500, res=invalid ts file ./objs/nginx/html/live/stream-44572-2739617660809856576-1.ts: stat ./objs/nginx/html/live/stream-44572-2739617660809856576-1.ts: no such file or directory thread [40][x5l48q7b]: call() [./src/app/srs_app_hls.cpp:122][errno=11] thread [40][x5l48q7b]: on_hls() [./src/app/srs_app_http_hooks.cpp:401][errno=11] thread [40][x5l48q7b]: do_post() [./src/app/srs_app_http_hooks.cpp:638][errno=11] [error] 2023/08/22 12:03:20.076984 [52][1001] Serve /terraform/v1/hooks/srs/hls failed, err is stat ./objs/nginx/html/live/stream-44572-2739617660809856576-1.ts: no such file or directory invalid ts file ./objs/nginx/html/live/stream-44572-2739617660809856576-1.ts main.handleOnHls.func1.1 /g/platform/srs-hooks.go:684 main.handleOnHls.func1 /g/platform/srs-hooks.go:720 net/http.HandlerFunc.ServeHTTP /usr/local/go/src/net/http/server.go:2084 net/http.(*ServeMux).ServeHTTP /usr/local/go/src/net/http/server.go:2462 net/http.serverHandler.ServeHTTP /usr/local/go/src/net/http/server.go:2916 net/http.(*conn).serve /usr/local/go/src/net/http/server.go:1966 runtime.goexit /usr/local/go/src/runtime/asm_amd64.s:1571 ``` Similarly, when stopping the stream, on_hls will also be called to handle the last slice. If the API Server is slower at this time, it will enter hls_dispose and call unpublish repeatedly. Since the previous unpublish is still blocked in on_hls, the following interference log will appear: ``` [2023-08-22 12:03:18.748][INFO][40][6498088c] hls cycle to dispose hls /live/stream-44572-2739617660809856576, timeout=10000000ms [2023-08-22 12:03:18.752][WARN][40][6498088c][115] flush audio ignored, for segment is not open. [2023-08-22 12:03:18.752][WARN][40][6498088c][115] ignore the segment close, for segment is not open. ``` Although this log will not cause problems, it can interfere with judgment. The solution is to add an 'unpublishing' status. If it is in the 'unpublishing' status, then do not clean up the slices. --------- Co-authored-by: Haibo Chen <495810242@qq.com> --- .run/srs-stack.run.xml | 11 +++++++++++ README.md | 2 +- trunk/doc/CHANGELOG.md | 2 ++ trunk/src/app/srs_app_hls.cpp | 21 ++++++++++++++++----- trunk/src/app/srs_app_hls.hpp | 4 +++- trunk/src/core/srs_core_version5.hpp | 2 +- trunk/src/core/srs_core_version6.hpp | 2 +- 7 files changed, 35 insertions(+), 9 deletions(-) create mode 100644 .run/srs-stack.run.xml diff --git a/.run/srs-stack.run.xml b/.run/srs-stack.run.xml new file mode 100644 index 0000000000..720752edf6 --- /dev/null +++ b/.run/srs-stack.run.xml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/README.md b/README.md index 7060ca71e2..88415ab927 100755 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ developers listed below: [![](https://opencollective.com/srs-server/backers.svg?width=800&button=false)](https://opencollective.com/srs-server) We at SRS aim to establish a non-profit, open-source community that assists developers worldwide in creating -their own high-quality streaming and RTC platforms to support your businesses. +your own high-quality streaming and RTC platforms to support your businesses. ## AUTHORS diff --git a/trunk/doc/CHANGELOG.md b/trunk/doc/CHANGELOG.md index 062a8f9259..14d6c823e8 100644 --- a/trunk/doc/CHANGELOG.md +++ b/trunk/doc/CHANGELOG.md @@ -7,6 +7,7 @@ The changelog for SRS. ## SRS 6.0 Changelog +* v6.0, 2023-08-28, Merge [#3781](https://github.com/ossrs/srs/pull/3781): HLS: Fix on_hls and hls_dispose critical zone issue. v6.0.69 (#3781) * v6.0, 2023-08-28, Merge [#3768](https://github.com/ossrs/srs/pull/3768): Support include empty config file. v6.0.68 (#3768) * v6.0, 2023-08-25, Merge [#3782](https://github.com/ossrs/srs/pull/3782): HLS: Support reload HLS asynchronously. v6.0.67 (#3782) * v6.0, 2023-08-22, Merge [#3775](https://github.com/ossrs/srs/pull/3775): Bugfix: Log format output type does not match. v6.0.66 (#3699) @@ -80,6 +81,7 @@ The changelog for SRS. ## SRS 5.0 Changelog +* v5.0, 2023-08-28, Merge [#3781](https://github.com/ossrs/srs/pull/3781): HLS: Fix on_hls and hls_dispose critical zone issue. v5.0.174 (#3781) * v5.0, 2023-08-28, Merge [#3768](https://github.com/ossrs/srs/pull/3768): Support include empty config file. v5.0.173 (#3768) * v5.0, 2023-08-25, Merge [#3782](https://github.com/ossrs/srs/pull/3782): HLS: Support reload HLS asynchronously. v5.0.172 (#3782) * v5.0, 2023-08-22, Merge [#3775](https://github.com/ossrs/srs/pull/3775): Bugfix: Log format output type does not match. v5.0.171 (#3699) diff --git a/trunk/src/app/srs_app_hls.cpp b/trunk/src/app/srs_app_hls.cpp index 8ac72547b0..7fa2fc2990 100644 --- a/trunk/src/app/srs_app_hls.cpp +++ b/trunk/src/app/srs_app_hls.cpp @@ -1136,6 +1136,7 @@ SrsHls::SrsHls() enabled = false; disposable = false; + unpublishing_ = false; async_reload_ = reloading_ = false; last_update_time = 0; hls_dts_directly = false; @@ -1222,7 +1223,7 @@ void SrsHls::dispose() srs_error_t SrsHls::cycle() { srs_error_t err = srs_success; - + if (last_update_time <= 0) { last_update_time = srs_get_system_time(); } @@ -1231,6 +1232,9 @@ srs_error_t SrsHls::cycle() return err; } + // When unpublishing, we must wait for it done. + if (unpublishing_) return err; + // When reloading, we must wait for it done. if (async_reload_) return err; @@ -1243,12 +1247,12 @@ srs_error_t SrsHls::cycle() return err; } last_update_time = srs_get_system_time(); - + if (!disposable) { return err; } disposable = false; - + srs_trace("hls cycle to dispose hls %s, timeout=%dms", req->get_stream_url().c_str(), hls_dispose); dispose(); @@ -1295,6 +1299,8 @@ srs_error_t SrsHls::on_publish() // if enabled, open the muxer. enabled = true; + // Reset the unpublishing state. + unpublishing_ = false; // ok, the hls can be dispose, or need to be dispose. disposable = true; @@ -1310,6 +1316,10 @@ void SrsHls::on_unpublish() if (!enabled) { return; } + + // During unpublishing, there maybe callback that switch to other coroutines. + if (unpublishing_) return; + unpublishing_ = true; if ((err = controller->on_unpublish()) != srs_success) { srs_warn("hls: ignore unpublish failed %s", srs_error_desc(err).c_str()); @@ -1317,6 +1327,7 @@ void SrsHls::on_unpublish() } enabled = false; + unpublishing_ = false; } srs_error_t SrsHls::on_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* format) @@ -1324,7 +1335,7 @@ srs_error_t SrsHls::on_audio(SrsSharedPtrMessage* shared_audio, SrsFormat* forma srs_error_t err = srs_success; // If not able to transmux to HLS, ignore. - if (!enabled) return err; + if (!enabled || unpublishing_) return err; if (async_reload_) return reload(); // Ignore if no format->acodec, it means the codec is not parsed, or unknown codec. @@ -1406,7 +1417,7 @@ srs_error_t SrsHls::on_video(SrsSharedPtrMessage* shared_video, SrsFormat* forma srs_error_t err = srs_success; // If not able to transmux to HLS, ignore. - if (!enabled) return err; + if (!enabled || unpublishing_) return err; if (async_reload_) return reload(); // Ignore if no format->vcodec, it means the codec is not parsed, or unknown codec. diff --git a/trunk/src/app/srs_app_hls.hpp b/trunk/src/app/srs_app_hls.hpp index 7975230e07..e111b68b69 100644 --- a/trunk/src/app/srs_app_hls.hpp +++ b/trunk/src/app/srs_app_hls.hpp @@ -279,10 +279,12 @@ class SrsHls bool enabled; // Whether the HLS stream is able to be disposed. bool disposable; + // Whether the HLS stream is unpublishing. + bool unpublishing_; // Whether requires HLS to do reload asynchronously. bool async_reload_; bool reloading_; - // To detect heartbeat and dipose it if configured. + // To detect heartbeat and dispose it if configured. srs_utime_t last_update_time; private: // If the diff=dts-previous_audio_dts is about 23, diff --git a/trunk/src/core/srs_core_version5.hpp b/trunk/src/core/srs_core_version5.hpp index 044a8e80d9..89715af75b 100644 --- a/trunk/src/core/srs_core_version5.hpp +++ b/trunk/src/core/srs_core_version5.hpp @@ -9,6 +9,6 @@ #define VERSION_MAJOR 5 #define VERSION_MINOR 0 -#define VERSION_REVISION 173 +#define VERSION_REVISION 174 #endif diff --git a/trunk/src/core/srs_core_version6.hpp b/trunk/src/core/srs_core_version6.hpp index 8631d85106..19b1b1920b 100644 --- a/trunk/src/core/srs_core_version6.hpp +++ b/trunk/src/core/srs_core_version6.hpp @@ -9,6 +9,6 @@ #define VERSION_MAJOR 6 #define VERSION_MINOR 0 -#define VERSION_REVISION 68 +#define VERSION_REVISION 69 #endif