-
Notifications
You must be signed in to change notification settings - Fork 0
/
mk_logwatch.py
1367 lines (1099 loc) · 47.1 KB
/
mk_logwatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Checkmk GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.
"""mk_logwatch
This is the Check_MK Agent plugin. If configured it will be called by the
agent without arguments.
Options:
-d Debug mode: Colored output, no saving of status.
-c CONFIG_FILE Use this config file
-h Show help.
--no_state No state
-v Verbose output for debugging purposes (no debug mode).
You should find an example configuration file at
'../cfg_examples/logwatch.cfg' relative to this file.
"""
from __future__ import with_statement
__version__ = "2.2.0p5"
import sys
if sys.version_info < (2, 6):
sys.stderr.write("ERROR: Python 2.5 is not supported. Please use Python 2.6 or newer.\n")
sys.exit(1)
import ast
import binascii
import codecs
import glob
import io
import itertools
import locale
import logging
import os
import platform
import re
import shlex
import shutil
import socket
import time
try:
from typing import ( # noqa: F401 # pylint: disable=unused-import
Any,
Collection,
Dict,
Iterable,
Iterator,
Sequence,
Tuple,
)
except ImportError:
# We need typing only for testing
pass
DEFAULT_LOG_LEVEL = "."
DUPLICATE_LINE_MESSAGE_FMT = "[the above message was repeated %d times]"
MK_VARDIR = os.getenv("LOGWATCH_DIR") or os.getenv("MK_VARDIR") or os.getenv("MK_STATEDIR") or "."
MK_CONFDIR = os.getenv("LOGWATCH_DIR") or os.getenv("MK_CONFDIR") or "."
REMOTE = (
os.getenv("REMOTE")
or os.getenv("REMOTE_ADDR")
or ("local" if sys.stdout.isatty() else "remote-unknown")
)
LOGGER = logging.getLogger(__name__)
IPV4_REGEX = re.compile(r"^(::ffff:|::ffff:0:|)(?:[0-9]{1,3}\.){3}[0-9]{1,3}$")
IPV6_REGEX = re.compile(r"^(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}$")
ENCODINGS = (
(b"\xFF\xFE", "utf_16"),
(b"\xFE\xFF", "utf_16_be"),
)
TTY_COLORS = {
"C": "\033[1;31m", # red
"W": "\033[1;33m", # yellow
"O": "\033[1;32m", # green
"I": "\033[1;34m", # blue
".": "", # remain same
"normal": "\033[0m",
}
CONFIG_ERROR_PREFIX = "CANNOT READ CONFIG FILE: " # detected by check plugin
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
PY_GE_35 = PY3 and sys.version_info[1] >= 5
if PY3:
text_type = str
binary_type = bytes
else:
text_type = unicode # pylint: disable=undefined-variable
binary_type = str
if PY3:
# For Python 3 sys.stdout creates \r\n as newline for Windows.
# Checkmk can't handle this therefore we rewrite sys.stdout to a new_stdout function.
# If you want to use the old behaviour just use old_stdout.
new_stdout = io.TextIOWrapper(
sys.stdout.buffer,
newline="\n",
# Write out in utf-8, independently of any encodings preferred on the system. For Python 2,
# this is the case because we write str (aka encoded) to sys.stdout and we encode in UTF-8.
encoding="utf-8",
errors=sys.stdout.errors,
)
old_stdout, sys.stdout = sys.stdout, new_stdout
# Borrowed from six
def ensure_str(s, encoding="utf-8", errors="strict"):
# type: (text_type | binary_type, str, str) -> str
"""Coerce *s* to `str`.
For Python 2:
- `unicode` -> encoded to `str`
- `str` -> `str`
For Python 3:
- `str` -> `str`
- `bytes` -> decoded to `str`
"""
if not isinstance(s, (text_type, binary_type)):
raise TypeError("not expecting type '%s'" % type(s))
if PY2 and isinstance(s, text_type):
s = s.encode(encoding, errors)
elif PY3 and isinstance(s, binary_type):
s = s.decode(encoding, errors)
return str(s)
def ensure_text_type(s, encoding="utf-8", errors="strict"):
# type: (text_type | binary_type, str, str) -> text_type
"""Coerce *s* to `text_type`.
For Python 2:
- `unicode` -> `unicode`
- `str` -> decoded to `unicode`
For Python 3:
- `str` -> `str`
- `bytes` -> decoded to `str`
"""
return s if isinstance(s, text_type) else s.decode(encoding, errors)
def int_to_escaped_char(char):
# type: (int) -> text_type
return ensure_text_type("\\x{:02x}".format(char))
def bytestring_to_escaped_char(char):
# type: (binary_type) -> text_type
return ensure_text_type("\\x{:02x}".format(ord(char)))
if PY3:
escaped = int_to_escaped_char
else:
escaped = bytestring_to_escaped_char
if PY_GE_35:
backslashreplace_decode = codecs.backslashreplace_errors
else:
# Python 2 and Python < 3.4 don't support decoding with "backslashreplace" error handler,
# but we need it to uniquely represent UNIX paths in monitoring.
def backslashreplace_decode(exception):
# type: (UnicodeError) -> Tuple[text_type, int]
if not isinstance(exception, UnicodeDecodeError):
# We'll use this error handler only for decoding, as the original
# "backslashreplace" handler is capable of encoding in all Python versions.
raise exception
bytestring, start, end = exception.object, exception.start, exception.end
return (
ensure_text_type("").join(escaped(c) for c in bytestring[start:end]),
end,
)
codecs.register_error("backslashreplace_decode", backslashreplace_decode)
def init_logging(verbosity):
if verbosity == 0:
LOGGER.propagate = False
logging.basicConfig(level=logging.ERROR, format="%(levelname)s: %(message)s")
elif verbosity == 1:
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
else:
logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(lineno)s: %(message)s")
class ArgsParser: # pylint: disable=too-few-public-methods
"""
Custom argument parsing.
(Neither use optparse which is Python 2.3 to 2.7 only.
Nor use argparse which is Python 2.7 onwards only.)
"""
def __init__(self, argv):
# type: (Sequence[str]) -> None
super().__init__()
if "-h" in argv:
sys.stderr.write(ensure_str(__doc__))
sys.exit(0)
self.verbosity = argv.count("-v") + 2 * argv.count("-vv")
self.config = argv[argv.index("-c") + 1] if "-c" in argv else None
self.debug = "-d" in argv or "--debug" in argv
self.no_state = "--no_state" in argv
def get_status_filename(cluster_config, remote):
# type: (Sequence[ClusterConfigBlock], str) -> str
"""
Side effect:
- In case agent plugin is called with debug option set -> depends on global
LOGGER and stdout.
Determine the name of the state file dependent on ENV variable and config:
$REMOTE set, no cluster set or no ip match -> logwatch.state.<formatted-REMOTE>
$REMOTE set, cluster set and ip match -> logwatch.state.<cluster-name>
$REMOTE not set and a tty -> logwatch.state.local
$REMOTE not set and not a tty -> logwatch.state
$REMOTE is determined by the check_mk_agent and varies dependent on how the
check_mk_agent is accessed:
- telnet ($REMOTE_HOST): $REMOTE is in IPv6 notation. IPv4 is extended to IPv6
notation e.g. ::ffff:127.0.0.1
- ssh ($SSH_CLIENT): $REMOTE is either in IPv4 or IPv6 notation dependent on the
IP family of the remote host.
<formatted-REMOTE> is REMOTE with colons (:) replaced with underscores (_) for
IPv6 address, is to IPv6 notation extended address with colons (:) replaced with
underscores (_) for IPv4 address or is plain $REMOTE in case it does not match
an IPv4 or IPv6 address.
"""
remote_hostname = remote.replace(":", "_")
match = IPV4_REGEX.match(remote) or IPV6_REGEX.match(remote)
if not match:
LOGGER.debug("REMOTE %r neither IPv4 nor IPv6 address.", remote)
return os.path.join(MK_VARDIR, "logwatch.state.%s" % remote_hostname)
remote_ip = match.group()
# in case of IPv4 extended to IPv6 get rid of prefix for ip match lookup
if remote_ip.startswith("::ffff:"):
remote_ip = remote_ip[7:]
# In case cluster configured map ip to cluster name if configured.
# key "name" is mandatory and unique for cluster dicts
cluster_name = remote_hostname
for conf in cluster_config:
for ip_or_subnet in conf.ips_or_subnets:
if ip_in_subnetwork(remote_ip, ip_or_subnet):
# Cluster name may not contain whitespaces (must be provided from
# the WATO config as type ID or hostname).
cluster_name = conf.name
LOGGER.info("Matching cluster ip %s", remote_ip)
LOGGER.info("Matching cluster name %s", cluster_name)
status_filename = os.path.join(MK_VARDIR, "logwatch.state.%s" % cluster_name)
LOGGER.info("Status filename: %s", status_filename)
return status_filename
def is_comment(line):
# type: (text_type) -> bool
return line.lstrip().startswith("#")
def is_empty(line):
# type: (text_type) -> bool
return line.strip() == ""
def is_indented(line):
# type: (text_type) -> bool
return line.startswith(" ")
def parse_filenames(line):
# type: (text_type) -> list[text_type]
if platform.system() == "Windows":
# we can't use pathlib: Python 2.5 has no pathlib
# to garantie that backslash is escaped
_processed_line = line.replace("\\", "/")
_processed_line = os.path.normpath(_processed_line)
_processed_line = _processed_line.replace("\\", "\\\\")
return shlex.split(_processed_line)
if sys.version_info[0] < 3:
return [x.decode("utf-8") for x in shlex.split(line.encode("utf-8"))]
return shlex.split(line)
def get_config_files(directory, config_file_arg=None):
# type: (str, str | None) -> list[str]
if config_file_arg is not None:
return [config_file_arg]
config_file_paths = []
config_file_paths.append(os.path.join(directory, "logwatch.cfg"))
# Add config file paths from a logwatch.d folder
for config_file in glob.glob(os.path.join(directory, "logwatch.d", "*.cfg")):
config_file_paths.append(config_file)
LOGGER.info("Configuration file paths: %r", config_file_paths)
return config_file_paths
def iter_config_lines(files):
# type: (Iterable[str]) -> Iterator[text_type]
LOGGER.debug("Config files: %r", files)
for file_ in files:
try:
with open(file_, "rb") as fid:
try:
for line in fid:
yield line.decode("utf-8")
except UnicodeDecodeError:
msg = "Error reading file %r (please use utf-8 encoding!)\n" % file_
sys.stdout.write(CONFIG_ERROR_PREFIX + msg)
except IOError:
pass
def consume_global_options_block(config_lines):
# type (list[text_type]) -> GlobalOptions
config_lines.pop(0)
options = GlobalOptions()
while config_lines and is_indented(config_lines[0]):
attr, value = config_lines.pop(0).split(None, 1)
if attr == "retention_period":
options.retention_period = int(value)
return options
def consume_cluster_definition(config_lines):
# type: (list[text_type]) -> ClusterConfigBlock
cluster_name = config_lines.pop(0)[8:].strip() # e.g.: CLUSTER duck
ips_or_subnets = []
LOGGER.debug("new ClusterConfigBlock: %s", cluster_name)
while config_lines and is_indented(config_lines[0]):
ips_or_subnets.append(config_lines.pop(0).strip())
return ClusterConfigBlock(cluster_name, ips_or_subnets)
def consume_logfile_definition(config_lines):
# type: (list[text_type]) -> PatternConfigBlock
cont_list = []
rewrite_list = []
filenames = parse_filenames(config_lines.pop(0))
patterns = []
LOGGER.debug("new PatternConfigBlock: %s", filenames)
while config_lines and is_indented(config_lines[0]):
line = config_lines.pop(0)
level, raw_pattern = line.split(None, 1)
if level == "A":
cont_list.append(raw_pattern)
elif level == "R":
rewrite_list.append(raw_pattern)
elif level in ("C", "W", "I", "O"):
# New pattern for line matching => clear continuation and rewrite patterns
cont_list = []
rewrite_list = []
pattern = (level, raw_pattern, cont_list, rewrite_list)
patterns.append(pattern)
LOGGER.debug("pattern %s", pattern)
else:
raise ValueError("Invalid level in pattern line %r" % line)
return PatternConfigBlock(filenames, patterns)
def read_config(config_lines, files, debug=False):
# type: (Iterable[text_type], Iterable[str], bool) -> tuple[GlobalOptions, list[PatternConfigBlock], list[ClusterConfigBlock]]
"""
Read logwatch.cfg (patterns, cluster mapping, etc.).
Returns configuration as list. List elements are namedtuples.
Namedtuple either describes logile patterns and is PatternConfigBlock(files, patterns).
Or tuple describes optional cluster mapping and is ClusterConfigBlock(name, ips_or_subnets)
with ips as list of strings.
"""
config_lines = [l.rstrip() for l in config_lines if not is_comment(l) and not is_empty(l)]
if debug and not config_lines:
# We need at least one config file *with* content in one of the places:
# logwatch.d or MK_CONFDIR
raise IOError("Did not find any content in config files: %s" % ", ".join(files))
logfiles_configs = []
cluster_configs = []
global_options = GlobalOptions()
# parsing has to consider the following possible lines:
# - comment lines (begin with #)
# - global options (block begins with "GLOBAL OPTIONS")
# - logfiles line (begin not with #, are not empty and do not contain CLUSTER)
# - cluster lines (begin with CLUSTER)
# - logfiles patterns (follow logfiles lines, begin with whitespace)
# - cluster ips or subnets (follow cluster lines, begin with whitespace)
# Needs to consider end of lines to append ips/subnets to clusters as well.
while config_lines:
first_line = config_lines[0]
if is_indented(first_line):
raise ValueError("Missing block definition for line %r" % first_line)
if first_line.startswith("GLOBAL OPTIONS"):
global_options = consume_global_options_block(config_lines)
if first_line.startswith("CLUSTER "):
cluster_configs.append(consume_cluster_definition(config_lines))
else:
logfiles_configs.append(consume_logfile_definition(config_lines))
LOGGER.info("Logfiles configurations: %r", logfiles_configs)
LOGGER.info("Optional cluster configurations: %r", cluster_configs)
return global_options, logfiles_configs, cluster_configs
class State:
def __init__(self, filename):
# type: (str) -> None
super().__init__()
self.filename = filename
self._data = {} # type: dict[text_type | binary_type, dict[str, Any]]
@staticmethod
def _load_line(line):
# type: (str) -> dict[str, Any]
try:
return ast.literal_eval(line)
except (NameError, SyntaxError, ValueError):
# Support status files with the following structure:
# /var/log/messages|7767698|32455445
# These were used prior to to 1.7.0i1
parts = line.split("|")
filename, offset = parts[0], int(parts[1])
file_id = int(parts[2]) if len(parts) >= 3 else -1
return {"file": filename, "offset": offset, "inode": file_id}
def read(self):
# type: () -> State
"""Read state from file
Support state files with the following structure:
{'file': b'/var/log/messages', 'offset': 7767698, 'inode': 32455445}
"""
LOGGER.debug("Reading state file: %r", self.filename)
if not os.path.exists(self.filename):
return self
with open(self.filename, "rb") as stat_fh:
for line in stat_fh:
line_data = self._load_line(ensure_text_type(line))
self._data[line_data["file"]] = line_data
LOGGER.info("Read state: %r", self._data)
return self
def write(self):
# type: () -> None
LOGGER.debug("Writing state: %r", self._data)
LOGGER.debug("State filename: %r", self.filename)
with open(self.filename, "wb") as stat_fh:
for data in self._data.values():
stat_fh.write(repr(data).encode("utf-8") + b"\n")
def get(self, key):
# type: (text_type | binary_type) -> dict[str, Any]
return self._data.setdefault(key, {"file": key})
class LogLinesIter:
# this is supposed to become a proper iterator.
# for now, we need a persistent buffer to fix things
BLOCKSIZE = 8192
def __init__(self, logfile, encoding):
super().__init__()
self._fd = os.open(logfile, os.O_RDONLY)
self._lines = [] # List[Text]
self._buffer = b""
self._reached_end = False # used for optimization only
self._enc = encoding or self._get_encoding()
self._nl = "\n"
# for Windows we need a bit special processing. It is difficult to fit this processing
# in current architecture smoothly
self._utf16 = self._enc == "utf_16"
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
return False # Do not swallow exceptions
def close(self):
os.close(self._fd)
def _get_encoding(self):
# In 1.5 this was only used when logwatch is executed on windows.
# On linux the log lines were not decoded at all.
#
# For 1.6 we want to follow the standard approach to decode things read
# from external sources as soon as possible. We also want to ensure that
# the output of this script is always UTF-8 encoded later.
#
# In case the current approach does not work out, then have a look here
# for possible more robust solutions:
# http://python-notes.curiousefficiency.org/en/latest/python3/text_file_processing.html
enc_bytes_len = max(len(bom) for bom, _enc in ENCODINGS)
self._buffer = os.read(self._fd, enc_bytes_len)
for bom, encoding in ENCODINGS:
if self._buffer.startswith(bom):
self._buffer = self._buffer[len(bom) :]
LOGGER.debug("Detected %r encoding by BOM", encoding)
return encoding
pref_encoding = locale.getpreferredencoding()
encoding = (
"utf_8" if not pref_encoding or pref_encoding == "ANSI_X3.4-1968" else pref_encoding
)
LOGGER.debug("Locale Preferred encoding is %s, using %s", pref_encoding, encoding)
return encoding
def _update_lines(self):
"""
Try to read more lines from file.
"""
binary_nl = self._nl.encode(self._enc)
while binary_nl not in self._buffer:
new_bytes = os.read(self._fd, LogLinesIter.BLOCKSIZE)
if not new_bytes:
break
self._buffer += new_bytes
# in case of decoding error, replace with U+FFFD REPLACEMENT CHARACTER
raw_lines = self._buffer.decode(self._enc, "replace").split(self._nl)
self._buffer = raw_lines.pop().encode(self._enc) # unfinished line
self._lines.extend(l + self._nl for l in raw_lines)
def set_position(self, position):
if position is None:
return
self._buffer = b""
self._lines = []
os.lseek(self._fd, position, os.SEEK_SET)
def get_position(self):
"""
Return the position where we want to continue next time
"""
pointer_pos = os.lseek(self._fd, 0, os.SEEK_CUR)
bytes_unused = sum((len(l.encode(self._enc)) for l in self._lines), len(self._buffer))
return pointer_pos - bytes_unused
def skip_remaining(self):
os.lseek(self._fd, 0, os.SEEK_END)
self._buffer = b""
self._lines = []
def push_back_line(self, line):
self._lines.insert(0, line)
def next_line(self):
# type: () -> text_type | None
if self._reached_end: # optimization only
return None
if not self._lines:
self._update_lines()
if self._lines:
return self._lines.pop(0)
self._reached_end = True
return None
def get_file_info(path):
stat = os.stat(path)
system = platform.system().lower()
if system == "windows":
return (stat.st_ctime_ns, stat.st_size)
if system in ("linux", "aix", "sunos"):
return (stat.st_ino, stat.st_size)
return (1, stat.st_size)
def get_formatted_line(line, level):
# type: (text_type, str) -> text_type
formatted_line = "%s %s" % (level, line)
if sys.stdout.isatty():
formatted_line = "%s%s%s" % (
TTY_COLORS[level],
formatted_line.replace("\1", "\nCONT:"),
TTY_COLORS["normal"],
)
return formatted_line
def should_log_line_with_level(level, nocontext):
# type: (str, bool | None) -> bool
return not (nocontext and level == ".")
def process_logfile(section, filestate, debug): # pylint: disable=too-many-branches
# type: (LogfileSection, dict[str, Any], object) -> tuple[text_type, list[text_type]]
"""
Returns tuple of (
logfile lines,
warning and/or error indicator,
warning and/or error lines,
).
In case the file has never been seen before returns a list of logfile lines
and None in case the logfile cannot be opened.
"""
# TODO: Make use of the ContextManager feature of LogLinesIter
try:
log_iter = LogLinesIter(section.name_fs, section.options.encoding)
except OSError:
if debug:
raise
return "[[[%s:cannotopen]]]\n" % section.name_write, []
try:
header = "[[[%s]]]\n" % section.name_write
file_id, size = get_file_info(section.name_fs)
prev_file_id = filestate.get("inode", -1)
filestate["inode"] = file_id
# Look at which file offset we have finished scanning the logfile last time.
offset = filestate.get("offset")
# Set the current pointer to the file end
filestate["offset"] = size
# If we have never seen this file before, we do not want
# to make a fuss about ancient log messages... (unless configured to)
if offset is None and not (section.options.fromstart or debug):
return header, []
# If the inode of the logfile has changed it has appearently
# been started from new (logfile rotation). At least we must
# assume that. In some rare cases (restore of a backup, etc)
# we are wrong and resend old log messages
if prev_file_id >= 0 and file_id != prev_file_id:
offset = None
# Our previously stored offset is the current end ->
# no new lines in this file
if offset == size:
return header, []
# If our offset is beyond the current end, the logfile has been
# truncated or wrapped while keeping the same file_id. We assume
# that it contains all new data in that case and restart from
# beginning.
if offset is not None and offset > size:
offset = None
# now seek to offset where interesting data begins
log_iter.set_position(offset)
worst = -1
warnings_and_errors = []
lines_parsed = 0
start_time = time.time()
while True:
line = log_iter.next_line()
if line is None:
break # End of file
# Handle option maxlinesize
if section.options.maxlinesize is not None and len(line) > section.options.maxlinesize:
line = line[: section.options.maxlinesize] + "[TRUNCATED]\n"
lines_parsed += 1
# Check if maximum number of new log messages is exceeded
if section.options.maxlines is not None and lines_parsed > section.options.maxlines:
warnings_and_errors.append(
"%s Maximum number (%d) of new log messages exceeded.\n"
% (
section.options.overflow,
section.options.maxlines,
)
)
worst = max(worst, section.options.overflow_level)
log_iter.skip_remaining()
break
# Check if maximum processing time (per file) is exceeded. Check only
# every 100'th line in order to save system calls
if (
section.options.maxtime is not None
and lines_parsed % 100 == 10
and time.time() - start_time > section.options.maxtime
):
warnings_and_errors.append(
"%s Maximum parsing time (%.1f sec) of this log file exceeded.\n"
% (
section.options.overflow,
section.options.maxtime,
)
)
worst = max(worst, section.options.overflow_level)
log_iter.skip_remaining()
break
level = DEFAULT_LOG_LEVEL
for lev, pattern, cont_patterns, replacements in section.compiled_patterns:
matches = pattern.search(line[:-1])
if matches:
level = lev
levelint = {"C": 2, "W": 1, "O": 0, "I": -1, ".": -1}[lev]
worst = max(levelint, worst)
# TODO: the following for block should be a method of the iterator
# Check for continuation lines
for cont_pattern in cont_patterns:
if isinstance(cont_pattern, int): # add that many lines
for _unused_x in range(cont_pattern):
cont_line = log_iter.next_line()
if cont_line is None: # end of file
break
line = line[:-1] + "\1" + cont_line
else: # pattern is regex
while True:
cont_line = log_iter.next_line()
if cont_line is None: # end of file
break
if cont_pattern.search(cont_line[:-1]):
line = line[:-1] + "\1" + cont_line
else:
log_iter.push_back_line(
cont_line
) # sorry for stealing this line
break
# Replacement
for replace in replacements:
line = replace.replace("\\0", line.rstrip()) + "\n"
for num, group in enumerate(matches.groups()):
if group is not None:
line = line.replace("\\%d" % (num + 1), group)
break # matching rule found and executed
if level == "I":
level = "."
if not should_log_line_with_level(level, section.options.nocontext):
continue
out_line = get_formatted_line(line[:-1], level)
warnings_and_errors.append("%s\n" % out_line)
new_offset = log_iter.get_position()
finally:
log_iter.close()
filestate["offset"] = new_offset
# Handle option maxfilesize, regardless of warning or errors that have happened
if section.options.maxfilesize:
offset_wrap = new_offset // section.options.maxfilesize
if ((offset or 0) // section.options.maxfilesize) < offset_wrap:
warnings_and_errors.append(
"%sW Maximum allowed logfile size (%d bytes) exceeded for the %dth time.%s\n"
% (
TTY_COLORS["W"] if sys.stdout.isatty() else "",
section.options.maxfilesize,
offset_wrap,
TTY_COLORS["normal"] if sys.stdout.isatty() else "",
)
)
# output all lines if at least one warning, error or ok has been found
if worst > -1:
return header, warnings_and_errors
return header, []
class Options:
"""Options w.r.t. logfile patterns (not w.r.t. cluster mapping)."""
MAP_OVERFLOW = {"C": 2, "W": 1, "I": 0, "O": 0} # case-insensitive, see set_opt
MAP_BOOL = {"true": True, "false": False, "1": True, "0": False, "yes": True, "no": False}
DEFAULTS = {
"encoding": None,
"maxfilesize": None,
"maxlines": None,
"maxtime": None,
"maxlinesize": None,
"regex": None,
"overflow": "C",
"nocontext": None,
"maxcontextlines": None,
"maxoutputsize": 500000, # same as logwatch_max_filesize in check plugin
"fromstart": False,
"skipconsecutiveduplicated": False,
}
def __init__(self):
# type: () -> None
self.values = {} # type: Dict
@property
def encoding(self):
return self._attr_or_default("encoding")
@property
def maxfilesize(self):
return self._attr_or_default("maxfilesize")
@property
def maxlines(self):
return self._attr_or_default("maxlines")
@property
def maxtime(self):
return self._attr_or_default("maxtime")
@property
def maxlinesize(self):
return self._attr_or_default("maxlinesize")
@property
def regex(self):
return self._attr_or_default("regex")
@property
def overflow(self):
return self._attr_or_default("overflow")
@property
def nocontext(self):
# type: () -> bool | None
return self._attr_or_default("nocontext")
@property
def maxcontextlines(self):
return self._attr_or_default("maxcontextlines")
@property
def maxoutputsize(self):
return self._attr_or_default("maxoutputsize")
@property
def fromstart(self):
return self._attr_or_default("fromstart")
@property
def skipconsecutiveduplicated(self):
return self._attr_or_default("skipconsecutiveduplicated")
def _attr_or_default(self, key):
if key in self.values:
return self.values[key]
return Options.DEFAULTS[key]
@property
def overflow_level(self):
return self.MAP_OVERFLOW[self.overflow]
def update(self, other):
self.values.update(other.values)
def set_opt(self, opt_str):
try:
key, value = opt_str.split("=", 1)
if key == "encoding":
"".encode(value) # make sure it's an encoding
self.values[key] = value
elif key in ("maxlines", "maxlinesize", "maxfilesize", "maxoutputsize"):
self.values[key] = int(value)
elif key in ("maxtime",):
self.values[key] = float(value)
elif key == "overflow":
if value.upper() not in Options.MAP_OVERFLOW:
raise ValueError(
"Invalid overflow: %r (choose from %r)"
% (
value,
Options.MAP_OVERFLOW.keys(),
)
)
self.values["overflow"] = value.upper()
elif key in ("regex", "iregex"):
flags = (re.IGNORECASE if key.startswith("i") else 0) | re.UNICODE
self.values["regex"] = re.compile(value, flags)
elif key in ("nocontext", "fromstart", "skipconsecutiveduplicated"):
if value.lower() not in Options.MAP_BOOL:
raise ValueError(
"Invalid %s: %r (choose from %r)"
% (
key,
value,
Options.MAP_BOOL.keys(),
)
)
self.values[key] = Options.MAP_BOOL[value.lower()]
elif key == "maxcontextlines":
before, after = (int(i) for i in value.split(","))
self.values[key] = (before, after)
else:
raise ValueError("Invalid option: %r" % opt_str)
except (ValueError, LookupError) as exc:
sys.stdout.write("INVALID CONFIGURATION: %s\n" % exc)
raise
class GlobalOptions:
def __init__(self):
super().__init__()
self.retention_period = 60
class PatternConfigBlock:
def __init__(self, files, patterns):
# type: (Sequence[text_type], Sequence[tuple[text_type, text_type, Sequence[text_type], Sequence[text_type]]]) -> None
super().__init__()
self.files = files
self.patterns = patterns
# First read all the options like 'maxlines=100' or 'maxtime=10'
self.options = Options()
for item in self.files:
if "=" in item:
self.options.set_opt(item)
class ClusterConfigBlock:
def __init__(self, name, ips_or_subnets):
# type: (text_type, Sequence[text_type]) -> None
super().__init__()
self.name = name
self.ips_or_subnets = ips_or_subnets
def find_matching_logfiles(glob_pattern):
# type: (text_type) -> list[tuple[text_type | binary_type, text_type]]
"""
Evaluate globbing pattern to a list of logfile IDs
Return a list of Tuples:
* one identifier for opening the file as used by os.open (byte str or unicode)
* one unicode str, safe for writing
Glob matching of hard linked, unbroken soft linked/symlinked files.
No tilde expansion is done, but *, ?, and character ranges expressed with []
will be correctly matched.
No support for recursive globs ** (supported beginning with Python3.5 only).
Hard linked dublicates of files are not filtered.
Soft links may not be detected properly dependent on the Python runtime
[Python Standard Lib, os.path.islink()].
"""
if platform.system() == "Windows":
# windows is the easy case:
# provide unicode, and let python deal with the rest
# (see https://www.python.org/dev/peps/pep-0277)
matches = list(glob.glob(glob_pattern)) # type: Iterable[text_type | binary_type]
else:
# we can't use glob on unicode, as it would try to re-decode matches with ascii
matches = glob.glob(glob_pattern.encode("utf8"))