diff --git a/include/tscore/Diags.h b/include/tscore/Diags.h index 23c669637a6..21e8909babd 100644 --- a/include/tscore/Diags.h +++ b/include/tscore/Diags.h @@ -219,6 +219,7 @@ class Diags bool setup_diagslog(BaseLogFile *blf); void config_roll_diagslog(RollingEnabledValues re, int ri, int rs); void config_roll_outputlog(RollingEnabledValues re, int ri, int rs); + bool reseat_diagslog(); bool should_roll_diagslog(); bool should_roll_outputlog(); diff --git a/proxy/logging/Log.cc b/proxy/logging/Log.cc index fe4afa38d7e..10bf1132682 100644 --- a/proxy/logging/Log.cc +++ b/proxy/logging/Log.cc @@ -140,6 +140,12 @@ Log::change_configuration() Debug("log-config", "... new configuration in place"); } +void +Log::reopen_moved_log_files() +{ + Log::config->log_object_manager.reopen_moved_log_files(); +} + /*------------------------------------------------------------------------- PERIODIC EVENTS diff --git a/proxy/logging/Log.h b/proxy/logging/Log.h index 2de6d87d792..6b3c542e2b8 100644 --- a/proxy/logging/Log.h +++ b/proxy/logging/Log.h @@ -200,6 +200,14 @@ class Log // reconfiguration stuff static void change_configuration(); + + /** Check each log file path to see whether it exists and re-open if not. + * + * This is called when an external log rotation entity has moved log files to + * rolled names. This checks whether the original log file exists and, if + * not, closes the file descriptor and re-opens the file. + */ + static void reopen_moved_log_files(); static int handle_logging_mode_change(const char *name, RecDataT data_type, RecData data, void *cookie); static int handle_periodic_tasks_int_change(const char *name, RecDataT data_type, RecData data, void *cookie); diff --git a/proxy/logging/LogFile.cc b/proxy/logging/LogFile.cc index 89bdc808d59..8998776c791 100644 --- a/proxy/logging/LogFile.cc +++ b/proxy/logging/LogFile.cc @@ -391,6 +391,35 @@ LogFile::roll(long interval_start, long interval_end, bool reopen_after_rolling) return 0; } +/*------------------------------------------------------------------------- + LogFile::reopen_if_moved + + Check whether the file at the log's filename exists and, if not, close + the current file descriptor and reopen it. This function can be used to + facilitate external log rotation mechanisms which will move the original + file to a rolled filename. Logging will happen to that same file + descriptor until this function is called, at which point the non-existent + original file will be detected, the file descriptor will be closed, and + the log file will be re-opened. + + Returns True if the file was re-opened, false otherwise. + -------------------------------------------------------------------------*/ +bool +LogFile::reopen_if_moved() +{ + if (!m_name) { + return false; + } + if (LogFile::exists(m_name)) { + return false; + } + + // Both of the following log if there are problems. + close_file(); + open_file(); + return true; +} + /*------------------------------------------------------------------------- LogFile::preproc_and_try_delete diff --git a/proxy/logging/LogFile.h b/proxy/logging/LogFile.h index 1a9a7f1ee8c..7a7618265a2 100644 --- a/proxy/logging/LogFile.h +++ b/proxy/logging/LogFile.h @@ -61,6 +61,13 @@ class LogFile : public LogBufferSink, public RefCountObj bool trim_rolled(size_t rolling_max_count); int roll(long interval_start, long interval_end, bool reopen_after_rolling = false); + /** Check whether the file at the log's filename exists and, if not, close + * the current file descriptor and reopen it. + * + * @return True if the file was re-opened, false otherwise. + */ + bool reopen_if_moved(); + const char * get_name() const { diff --git a/proxy/logging/LogObject.cc b/proxy/logging/LogObject.cc index 86d2a454599..1df7c81ea2f 100644 --- a/proxy/logging/LogObject.cc +++ b/proxy/logging/LogObject.cc @@ -1241,6 +1241,22 @@ LogObjectManager::roll_files(long time_now) return num_rolled; } +void +LogObjectManager::reopen_moved_log_files() +{ + for (auto &_object : this->_objects) { + _object->m_logFile->reopen_if_moved(); + } + + ACQUIRE_API_MUTEX("A LogObjectManager::reopen_moved_log_files"); + + for (auto &_APIobject : this->_APIobjects) { + _APIobject->m_logFile->reopen_if_moved(); + } + + RELEASE_API_MUTEX("R LogObjectManager::reopen_moved_log_files"); +} + void LogObjectManager::display(FILE *str) { diff --git a/proxy/logging/LogObject.h b/proxy/logging/LogObject.h index 87f5c8061aa..fa16f0fa7c6 100644 --- a/proxy/logging/LogObject.h +++ b/proxy/logging/LogObject.h @@ -388,6 +388,7 @@ class LogObjectManager void check_buffer_expiration(long time_now); unsigned roll_files(long time_now); + void reopen_moved_log_files(); int log(LogAccess *lad); void display(FILE *str = stdout); diff --git a/src/traffic_manager/traffic_manager.cc b/src/traffic_manager/traffic_manager.cc index 0ddb653f0a7..4d507b07e42 100644 --- a/src/traffic_manager/traffic_manager.cc +++ b/src/traffic_manager/traffic_manager.cc @@ -83,14 +83,14 @@ extern "C" int getpwnam_r(const char *name, struct passwd *result, char *buffer, static AppVersionInfo appVersionInfo; // Build info for this application -static inkcoreapi DiagsConfig *diagsConfig; -static char debug_tags[1024] = ""; -static char action_tags[1024] = ""; -static int proxy_off = false; -static int listen_off = false; -static char bind_stdout[512] = ""; -static char bind_stderr[512] = ""; -static const char *mgmt_path = nullptr; +static inkcoreapi DiagsConfig *diagsConfig = nullptr; +static char debug_tags[1024] = ""; +static char action_tags[1024] = ""; +static int proxy_off = false; +static int listen_off = false; +static char bind_stdout[512] = ""; +static char bind_stderr[512] = ""; +static const char *mgmt_path = nullptr; // By default, set the current directory as base static const char *recs_conf = ts::filename::RECORDS; @@ -923,6 +923,22 @@ SignalHandler(int sig) return; } + if (sig == SIGUSR2) { + fprintf(stderr, "[TrafficManager] ==> received SIGUSR2, rotating the logs.\n"); + mgmt_log("[TrafficManager] ==> received SIGUSR2, rotating the logs.\n"); + if (lmgmt && lmgmt->watched_process_pid != -1) { + kill(lmgmt->watched_process_pid, sig); + } + diags->set_std_output(StdStream::STDOUT, bind_stdout); + diags->set_std_output(StdStream::STDERR, bind_stderr); + if (diags->reseat_diagslog()) { + Note("Reseated %s", DIAGS_LOG_FILENAME); + } else { + Note("Could not reseat %s", DIAGS_LOG_FILENAME); + } + return; + } + fprintf(stderr, "[TrafficManager] ==> Cleaning up and reissuing signal #%d\n", sig); mgmt_log("[TrafficManager] ==> Cleaning up and reissuing signal #%d\n", sig); diff --git a/src/traffic_server/traffic_server.cc b/src/traffic_server/traffic_server.cc index 97391c03134..4f54e9846ce 100644 --- a/src/traffic_server/traffic_server.cc +++ b/src/traffic_server/traffic_server.cc @@ -281,10 +281,16 @@ class SignalContinuation : public Continuation signal_received[SIGUSR2] = false; Debug("log", "received SIGUSR2, reloading traffic.out"); - // reload output logfile (file is usually called traffic.out) diags->set_std_output(StdStream::STDOUT, bind_stdout); diags->set_std_output(StdStream::STDERR, bind_stderr); + if (diags->reseat_diagslog()) { + Note("Reseated %s", DIAGS_LOG_FILENAME); + } else { + Note("Could not reseat %s", DIAGS_LOG_FILENAME); + } + // Reload any of the other moved log files (such as the ones in logging.yaml). + Log::reopen_moved_log_files(); } if (signal_received[SIGTERM] || signal_received[SIGINT]) { diff --git a/src/tscore/Diags.cc b/src/tscore/Diags.cc index bc143f06da8..f5e6372be10 100644 --- a/src/tscore/Diags.cc +++ b/src/tscore/Diags.cc @@ -516,6 +516,40 @@ Diags::config_roll_outputlog(RollingEnabledValues re, int ri, int rs) outputlog_rolling_size = rs; } +/* + * Update diags_log to use the underlying file on disk. + * + * This function will replace the current BaseLogFile object with a new one, as + * each BaseLogFile object logically represents one file on disk. It can be + * used when we want to re-open the log file if the initial one was moved. + * + * Note that, however, cross process race conditions may still exist, + * especially with the metafile, and further work with flock() for fcntl() may + * still need to be done. + * + * Returns true if the log was reseated, false otherwise. + */ +bool +Diags::reseat_diagslog() +{ + if (diags_log == nullptr || !diags_log->is_init()) { + return false; + } + fflush(diags_log->m_fp); + char *oldname = ats_strdup(diags_log->get_name()); + log_log_trace("in %s for diags.log, oldname=%s\n", __func__, oldname); + BaseLogFile *n = new BaseLogFile(oldname); + if (setup_diagslog(n)) { + BaseLogFile *old_diags = diags_log; + lock(); + diags_log = n; + unlock(); + delete old_diags; + } + ats_free(oldname); + return true; +} + /* * Checks diags_log 's underlying file on disk and see if it needs to be rolled, * and does so if necessary. diff --git a/tests/gold_tests/autest-site/trafficserver.test.ext b/tests/gold_tests/autest-site/trafficserver.test.ext index b1481ca3994..ee1c08fd51a 100755 --- a/tests/gold_tests/autest-site/trafficserver.test.ext +++ b/tests/gold_tests/autest-site/trafficserver.test.ext @@ -193,6 +193,11 @@ def MakeATSProcess(obj, name, command='traffic_server', select_ports=True, p.Disk.diags_log.Content += Testers.ExcludesExpression( "FATAL:", "diags.log should not contain errors") + if command == "traffic_manager": + fname = "manager.log" + tmpname = os.path.join(log_dir, fname) + p.Disk.File(tmpname, id=make_id(fname)) + # config files def MakeConfigFile(self, fname): tmpname = os.path.join(config_dir, fname) diff --git a/tests/gold_tests/logging/sigusr2.test.py b/tests/gold_tests/logging/sigusr2.test.py new file mode 100644 index 00000000000..a1d2ed83bdd --- /dev/null +++ b/tests/gold_tests/logging/sigusr2.test.py @@ -0,0 +1,250 @@ +''' +Verify support of external log rotation via SIGUSR2. +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + + +TRAFFIC_MANAGER_PID_SCRIPT = 'ts_process_handler.py' + + +class Sigusr2Test: + """ + Handle this test-specific Traffic Server configuration. + """ + + __ts_counter = 1 + __server = None + + def __init__(self): + self.server = self.__configure_server() + self.ts = self.__configure_traffic_manager() + + def __configure_traffic_manager(self): + self._ts_name = "sigusr2_ts{}".format(Sigusr2Test.__ts_counter) + Sigusr2Test.__ts_counter += 1 + self.ts = Test.MakeATSProcess(self._ts_name, command="traffic_manager") + self.ts.Disk.records_config.update({ + 'proxy.config.http.wait_for_cache': 1, + 'proxy.config.diags.debug.enabled': 1, + 'proxy.config.diags.debug.tags': 'log', + + # All log rotation should be handled externally. + 'proxy.config.log.rolling_enabled': 0, + 'proxy.config.log.auto_delete_rolled_files': 0, + }) + + # For this test, more important than the listening port is the existence of the + # log files. In particular, it can take a few seconds for traffic_manager to + # open diags.log. + self.diags_log = self.ts.Disk.diags_log.AbsPath + self.ts.Ready = When.FileExists(self.diags_log) + + # Add content handles for the rotated logs. + self.rotated_diags_log = self.diags_log + "_old" + self.ts.Disk.File(self.rotated_diags_log, id="diags_log_old") + + self.log_dir = os.path.dirname(self.diags_log) + self.manager_log = os.path.join(self.log_dir, "manager.log") + self.rotated_manager_log = self.manager_log + "_old" + self.ts.Disk.File(self.rotated_manager_log, id="manager_log_old") + + self.ts.Disk.remap_config.AddLine( + 'map http://127.0.0.1:{0} http://127.0.0.1:{1}'.format( + self.ts.Variables.port, self.server.Variables.Port) + ) + self.ts.Disk.logging_yaml.AddLine(''' + logging: + formats: + - name: has_path + format: "%: %" + logs: + - filename: test_rotation + format: has_path + ''') + self.configured_log = os.path.join(self.log_dir, "test_rotation.log") + self.ts.Disk.File(self.configured_log, id="configured_log") + + self.rotated_configured_log = self.configured_log + "_old" + self.ts.Disk.File(self.rotated_configured_log, id="configured_log_old") + self.ts.StartBefore(self.server) + return self.ts + + def __configure_server(self): + if Sigusr2Test.__server: + return Sigusr2Test.__server + server = Test.MakeOriginServer("server") + Sigusr2Test.__server = server + for path in ['/first', '/second', '/third']: + request_header = {"headers": "GET {} HTTP/1.1\r\n" + "Host: does.not.matter\r\n\r\n".format(path), + "timestamp": "1469733493.993", "body": ""} + response_header = {"headers": "HTTP/1.1 200 OK\r\n" + "Connection: close\r\n" + "Cache-control: max-age=85000\r\n\r\n", + "timestamp": "1469733493.993", "body": "xxx"} + server.addResponse("sessionlog.json", request_header, response_header) + return server + + def get_sigusr2_signal_command(self): + """ + Return the command that will send a USR2 signal to the traffic manager + process. + """ + return r"python3 {} --parent --signal SIGUSR2 {}".format( + TRAFFIC_MANAGER_PID_SCRIPT, self._ts_name) + + +Test.Summary = ''' +Verify support of external log rotation via SIGUSR2. +''' + +Test.Setup.CopyAs(TRAFFIC_MANAGER_PID_SCRIPT, Test.RunDirectory) + +# +# Test 1: Verify SIGUSR2 behavior for system logs. +# +tr1 = Test.AddTestRun("Verify system logs (manager.log, etc.) can be rotated") + +# Configure Traffic Manager/Server. +diags_test = Sigusr2Test() + +# Configure our rotation processes. +rotate_diags_log = tr1.Processes.Process("rotate_diags_log", "mv {} {}".format( + diags_test.diags_log, diags_test.rotated_diags_log)) +rotate_manager_log = tr1.Processes.Process("rotate_manager_log", "mv {} {}".format( + diags_test.manager_log, diags_test.rotated_manager_log)) + +# Configure the signaling of SIGUSR2 to traffic_manaager. +tr1.Processes.Default.Command = diags_test.get_sigusr2_signal_command() +tr1.Processes.Default.Return = 0 +tr1.Processes.Default.Ready = When.FileExists(diags_test.diags_log) + +# Configure process order. +tr1.Processes.Default.StartBefore(rotate_diags_log) +rotate_diags_log.StartBefore(rotate_manager_log) +rotate_manager_log.StartBefore(diags_test.ts) +tr1.StillRunningAfter = diags_test.ts +tr1.StillRunningAfter = diags_test.server + +# manager.log should have been rotated. Check for the expected content in the +# old file and the newly created file. +diags_test.ts.Disk.manager_log_old.Content += Testers.ContainsExpression( + "received SIGUSR2, rotating the logs", + "manager.log_old should explain that SIGUSR2 was passed to it") + +diags_test.ts.Disk.manager_log.Content += Testers.ContainsExpression( + "Reseated manager.log", + "The new manager.log should indicate the newly opened manager.log") + +# diags.log should have been rotated. The old one had the reference to traffic +# server running, this new one shouldn't. But it should indicate that the new +# diags.log was opened. +diags_test.ts.Disk.diags_log.Content += Testers.ExcludesExpression( + "traffic server running", + "The new diags.log should not reference the running traffic server") + +diags_test.ts.Disk.diags_log.Content += Testers.ContainsExpression( + "Reseated diags.log", + "The new diags.log should indicate the newly opened diags.log") + +# +# Test 2: Verify SIGUSR2 isn't needed for rotated configured logs. +# +tr2 = Test.AddTestRun("Verify yaml.log logs can be rotated") +configured_test = Sigusr2Test() + +first_curl = tr2.Processes.Process( + "first_curl", + 'curl "http://127.0.0.1:{0}/first" --verbose'.format(configured_test.ts.Variables.port)) +# Note that for each of these processes, aside from the final Default one, they +# are all treated like long-running servers to AuTest. Thus the long sleeps +# only allow us to wait until the logs get populated with the desired content, +# the test will not wait the entire time for them to complete. +first_curl_ready = tr2.Processes.Process("first_curl_ready", 'sleep 30') +# In the autest enironment, it can take more than 10 seconds for the log file to be created. +first_curl_ready.StartupTimeout = 30 +first_curl_ready.Ready = When.FileContains(configured_test.configured_log, "/first") + +rotate_log = tr2.Processes.Process("rotate_log_file", "mv {} {}".format( + configured_test.configured_log, configured_test.rotated_configured_log)) + +second_curl = tr2.Processes.Process( + "second_curl", + 'curl "http://127.0.0.1:{0}/second" --verbose'.format(configured_test.ts.Variables.port)) + +second_curl_ready = tr2.Processes.Process("second_curl_ready", 'sleep 30') +# In the autest enironment, it can take more than 10 seconds for the log file to be created. +second_curl_ready.StartupTimeout = 30 +second_curl_ready.Ready = When.FileContains(configured_test.rotated_configured_log, "/second") + +send_pkill = tr2.Processes.Process("Send_SIGUSR2", configured_test.get_sigusr2_signal_command()) +send_pkill_ready = tr2.Processes.Process("send_pkill_ready", 'sleep 30') +send_pkill_ready.StartupTimeout = 30 +send_pkill_ready.Ready = When.FileExists(configured_test.configured_log) + +third_curl = tr2.Processes.Process( + "third_curl", + 'curl "http://127.0.0.1:{0}/third" --verbose'.format(configured_test.ts.Variables.port)) +third_curl_ready = tr2.Processes.Process("third_curl_ready", 'sleep 30') +# In the autest enironment, it can take more than 10 seconds for the log file to be created. +third_curl_ready.StartupTimeout = 30 +third_curl_ready.Ready = When.FileContains(configured_test.configured_log, "/third") + +tr2.Processes.Default.Command = "echo waiting for test processes to be done" +tr2.Processes.Default.Return = 0 + +# Configure process order: +# 1. curl /first. The entry should be logged to current log which will be _old. +# 2. mv the log to _old. +# 3. curl /second. The entry should end up in _old log. +# 4. Send a SIGUSR2 to traffic_manager. The log should be recreated. +# 5. curl /third. The entry should end up in the new, non-old, log file. +# +tr2.Processes.Default.StartBefore(third_curl_ready) +third_curl_ready.StartBefore(third_curl) +third_curl.StartBefore(send_pkill_ready) +send_pkill_ready.StartBefore(send_pkill) +send_pkill.StartBefore(second_curl_ready) +second_curl_ready.StartBefore(second_curl) +second_curl.StartBefore(rotate_log) +rotate_log.StartBefore(first_curl_ready) +first_curl_ready.StartBefore(first_curl) +first_curl.StartBefore(configured_test.ts) +tr2.StillRunningAfter = configured_test.ts + +# Verify that the logs are in the correct files. +configured_test.ts.Disk.configured_log.Content += Testers.ExcludesExpression( + "/first", + "The new test_rotation.log should not have the first GET retrieval in it.") +configured_test.ts.Disk.configured_log.Content += Testers.ExcludesExpression( + "/second", + "The new test_rotation.log should not have the second GET retrieval in it.") +configured_test.ts.Disk.configured_log.Content += Testers.ContainsExpression( + "/third", + "The new test_rotation.log should have the third GET retrieval in it.") + +configured_test.ts.Disk.configured_log_old.Content += Testers.ContainsExpression( + "/first", + "test_rotation.log_old should have the first GET retrieval in it.") +configured_test.ts.Disk.configured_log_old.Content += Testers.ContainsExpression( + "/second", + "test_rotation.log_old should have the second GET retrieval in it.") +configured_test.ts.Disk.configured_log_old.Content += Testers.ExcludesExpression( + "/third", + "test_rotation.log_old should not have the third GET retrieval in it.") diff --git a/tests/gold_tests/logging/ts_process_handler.py b/tests/gold_tests/logging/ts_process_handler.py new file mode 100644 index 00000000000..4bcfa217707 --- /dev/null +++ b/tests/gold_tests/logging/ts_process_handler.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +''' +Interact with a Traffic Server process. +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import psutil +import signal +import sys +import traceback + + +class GetPidError(Exception): + """ Raised when there was an error retrieving the specified PID.""" + + def __init__(self, message): + self.message = message + super().__init__(self.message) + + +def get_desired_process(ts_identifier, get_ppid=False): + for proc in psutil.process_iter(['cmdline']): + commandline = ' '.join(proc.info['cmdline']) + if '/traffic_server' in commandline and ts_identifier in commandline: + if not get_ppid: + return proc + parent = proc.parent() + with parent.oneshot(): + if 'traffic_manager' not in parent.cmdline(): + raise GetPidError("Found a traffic server process, " + "but its parent is not traffic_manager") + return parent + raise GetPidError("Could not find a traffic_server process") + + +def convert_signal_name_to_signal(signal_name): + """ + >>> convert_signal_name_to_signal('-SIGUSR2') + + >>> convert_signal_name_to_signal('SIGUSR2') + + >>> convert_signal_name_to_signal('USR2') + + >>> convert_signal_name_to_signal('-USR2') + + >>> convert_signal_name_to_signal('KILL') + + >>> convert_signal_name_to_signal('notasignal') + Traceback (most recent call last): + ... + ValueError: Could not find a signal matching SIGnotasignal + + """ + if signal_name.startswith('-'): + signal_name = signal_name[1:] + if not signal_name.startswith('SIG'): + signal_name = 'SIG' + signal_name + for signal_value in dir(signal): + if not signal_value.startswith('SIG'): + continue + if signal_name == signal_value: + return getattr(signal, signal_value) + raise ValueError("Could not find a signal matching {}".format(signal_name)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Interact with a Traffic Server process') + parser.add_argument( + 'ts_identifier', + help='An identifier in the command line for the desired ' + 'Traffic Server process.') + parser.add_argument( + '--signal', + help='Send the given signal to the process.') + parser.add_argument( + '--parent', action="store_true", default=False, + help='Interact with the parent process of the Traffic Server process') + + return parser.parse_args() + + +def main(): + args = parse_args() + try: + process = get_desired_process(args.ts_identifier, args.parent) + except GetPidError as e: + print(traceback.format_exception(None, e, e.__traceback__), + file=sys.stderr, flush=True) + return 1 + + if args.signal: + signal_constant = convert_signal_name_to_signal(args.signal) + process.send_signal(signal_constant) + else: + print(process.pid) + return 0 + + +if __name__ == '__main__': + import doctest + doctest.testmod() + sys.exit(main())