-
Notifications
You must be signed in to change notification settings - Fork 190
/
__init__.py
executable file
·2212 lines (1864 loc) · 77.7 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
#
# Copyright 2017-2018 Amazon.com, Inc. and its affiliates. All Rights Reserved.
#
# Licensed under the MIT License. See the LICENSE accompanying this file
# for the specific language governing permissions and limitations under
# the License.
#
import base64
import errno
import hashlib
import hmac
import json
import logging
import logging.handlers
import os
import platform
import pwd
import re
import shutil
import socket
import subprocess
import sys
import time
from collections import namedtuple
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from logging.handlers import RotatingFileHandler
from signal import SIGHUP, SIGKILL, SIGTERM
try:
from configparser import ConfigParser, NoOptionError, NoSectionError
except ImportError:
import ConfigParser
from ConfigParser import NoOptionError, NoSectionError
try:
from urllib.parse import quote_plus
except ImportError:
from urllib import quote_plus
try:
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
except ImportError:
from urllib import urlencode
from urllib2 import HTTPError, HTTPHandler, Request, URLError, build_opener, urlopen
AMAZON_LINUX_2_RELEASE_ID = "Amazon Linux release 2 (Karoo)"
AMAZON_LINUX_2_PRETTY_NAME = "Amazon Linux 2"
AMAZON_LINUX_2_RELEASE_VERSIONS = [
AMAZON_LINUX_2_RELEASE_ID,
AMAZON_LINUX_2_PRETTY_NAME,
]
VERSION = "2.1.0"
SERVICE = "elasticfilesystem"
CONFIG_FILE = "/etc/amazon/efs/efs-utils.conf"
CONFIG_SECTION = "mount-watchdog"
MOUNT_CONFIG_SECTION = "mount"
CLIENT_INFO_SECTION = "client-info"
CLIENT_SOURCE_STR_LEN_LIMIT = 100
DISABLE_FETCH_EC2_METADATA_TOKEN_ITEM = "disable_fetch_ec2_metadata_token"
DEFAULT_UNKNOWN_VALUE = "unknown"
DEFAULT_MACOS_VALUE = "macos"
# 50ms
DEFAULT_TIMEOUT = 0.05
LOG_DIR = "/var/log/amazon/efs"
LOG_FILE = "mount-watchdog.log"
STATE_FILE_DIR = "/var/run/efs"
STUNNEL_PID_FILE = "stunnel.pid"
DEFAULT_NFS_PORT = "2049"
PRIVATE_KEY_FILE = "/etc/amazon/efs/privateKey.pem"
DEFAULT_REFRESH_SELF_SIGNED_CERT_INTERVAL_MIN = 60
DEFAULT_STUNNEL_HEALTH_CHECK_INTERVAL_MIN = 5
DEFAULT_STUNNEL_HEALTH_CHECK_TIMEOUT_SEC = 30
NOT_BEFORE_MINS = 15
NOT_AFTER_HOURS = 3
DATE_ONLY_FORMAT = "%Y%m%d"
SIGV4_DATETIME_FORMAT = "%Y%m%dT%H%M%SZ"
CERT_DATETIME_FORMAT = "%y%m%d%H%M%SZ"
AWS_CREDENTIALS_FILES = {
"credentials": os.path.expanduser(
os.path.join("~" + pwd.getpwuid(os.getuid()).pw_name, ".aws", "credentials")
),
"config": os.path.expanduser(
os.path.join("~" + pwd.getpwuid(os.getuid()).pw_name, ".aws", "config")
),
}
CA_CONFIG_BODY = """dir = %s
RANDFILE = $dir/database/.rand
[ ca ]
default_ca = local_ca
[ local_ca ]
database = $dir/database/index.txt
serial = $dir/database/serial
private_key = %s
cert = $dir/certificate.pem
new_certs_dir = $dir/certs
default_md = sha256
preserve = no
policy = efsPolicy
x509_extensions = v3_ca
[ efsPolicy ]
CN = supplied
[ req ]
prompt = no
distinguished_name = req_distinguished_name
[ req_distinguished_name ]
CN = %s
%s
%s
%s
"""
# SigV4 Auth
ALGORITHM = "AWS4-HMAC-SHA256"
AWS4_REQUEST = "aws4_request"
HTTP_REQUEST_METHOD = "GET"
CANONICAL_URI = "/"
CANONICAL_HEADERS_DICT = {"host": "%s"}
CANONICAL_HEADERS = "\n".join(
["%s:%s" % (k, v) for k, v in sorted(CANONICAL_HEADERS_DICT.items())]
)
SIGNED_HEADERS = ";".join(CANONICAL_HEADERS_DICT.keys())
REQUEST_PAYLOAD = ""
AP_ID_RE = re.compile("^fsap-[0-9a-f]{17}$")
ECS_TASK_METADATA_API = "http://169.254.170.2"
STS_ENDPOINT_URL_FORMAT = "https://sts.{}.amazonaws.com/"
INSTANCE_IAM_URL = "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
INSTANCE_METADATA_TOKEN_URL = "http://169.254.169.254/latest/api/token"
SECURITY_CREDS_ECS_URI_HELP_URL = (
"https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html"
)
SECURITY_CREDS_WEBIDENTITY_HELP_URL = "https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html"
SECURITY_CREDS_IAM_ROLE_HELP_URL = (
"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html"
)
NAMED_PROFILE_HELP_URL = (
"https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html"
)
CONFIG_FILE_SETTINGS_HELP_URL = (
"https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html"
"#cli-configure-files-settings"
)
Mount = namedtuple(
"Mount", ["server", "mountpoint", "type", "options", "freq", "passno"]
)
NFSSTAT_TIMEOUT = 5
# Unmount difference time in seconds
UNMOUNT_DIFF_TIME = 30
# Default unmount count for consistency
DEFAULT_UNMOUNT_COUNT_FOR_CONSISTENCY = 5
MAC_OS_PLATFORM_LIST = ["darwin"]
SYSTEM_RELEASE_PATH = "/etc/system-release"
OS_RELEASE_PATH = "/etc/os-release"
STUNNEL_INSTALLATION_MESSAGE = "Please install it following the instructions at: https://docs.aws.amazon.com/efs/latest/ug/using-amazon-efs-utils.html#upgrading-stunnel"
EFS_PROXY_INSTALLATION_MESSAGE = "Please install it by reinstalling amazon-efs-utils"
EFS_PROXY_BIN = "efs-proxy"
def fatal_error(user_message, log_message=None):
if log_message is None:
log_message = user_message
sys.stderr.write("%s\n" % user_message)
logging.error(log_message)
sys.exit(1)
def get_aws_security_credentials(config, credentials_source, region):
"""
Lookup AWS security credentials (access key ID and secret access key). Adapted credentials provider chain from:
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html and
https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html
"""
method, value = credentials_source.split(":", 1)
if method == "credentials":
return get_aws_security_credentials_from_file("credentials", value)
elif method == "named_profile":
return get_aws_security_credentials_from_assumed_profile(value)
elif method == "config":
return get_aws_security_credentials_from_file("config", value)
elif method == "ecs":
return get_aws_security_credentials_from_ecs(config, value)
elif method == "webidentity":
return get_aws_security_credentials_from_webidentity(
config, *(value.split(",")), region=region
)
elif method == "metadata":
return get_aws_security_credentials_from_instance_metadata(config)
else:
logging.error(
'Improper credentials source string "%s" found from mount state file',
credentials_source,
)
return None
def get_boolean_config_item_value(
config, config_section, config_item, default_value, emit_warning_message=False
):
warning_message = None
if not config.has_section(config_section):
warning_message = (
"Warning: config file does not have section %s." % config_section
)
elif not config.has_option(config_section, config_item):
warning_message = (
"Warning: config file does not have %s item in section %s."
% (config_item, config_section)
)
if warning_message:
if emit_warning_message:
sys.stdout.write(
"%s. You should be able to find a new config file in the same folder as current config file %s. "
"Consider update the new config file to latest config file. Use the default value [%s = %s]."
% (warning_message, CONFIG_FILE, config_item, default_value)
)
return default_value
return config.getboolean(config_section, config_item)
def fetch_ec2_metadata_token_disabled(config):
return get_boolean_config_item_value(
config,
MOUNT_CONFIG_SECTION,
DISABLE_FETCH_EC2_METADATA_TOKEN_ITEM,
default_value=False,
)
def get_aws_ec2_metadata_token(timeout=DEFAULT_TIMEOUT):
# Normally the session token is fetched within 10ms, setting a timeout of 50ms here to abort the request
# and return None if the token has not returned within 50ms
try:
opener = build_opener(HTTPHandler)
request = Request(INSTANCE_METADATA_TOKEN_URL)
request.add_header("X-aws-ec2-metadata-token-ttl-seconds", "21600")
request.get_method = lambda: "PUT"
try:
res = opener.open(request, timeout=timeout)
return res.read()
except socket.timeout:
exception_message = "Timeout when getting the aws ec2 metadata token"
except HTTPError as e:
exception_message = "Failed to fetch token due to %s" % e
except Exception as e:
exception_message = (
"Unknown error when fetching aws ec2 metadata token, %s" % e
)
logging.debug(exception_message)
return None
except NameError:
headers = {"X-aws-ec2-metadata-token-ttl-seconds": "21600"}
req = Request(INSTANCE_METADATA_TOKEN_URL, headers=headers, method="PUT")
try:
res = urlopen(req, timeout=timeout)
return res.read()
except socket.timeout:
exception_message = "Timeout when getting the aws ec2 metadata token"
except HTTPError as e:
exception_message = "Failed to fetch token due to %s" % e
except Exception as e:
exception_message = (
"Unknown error when fetching aws ec2 metadata token, %s" % e
)
logging.debug(exception_message)
return None
def get_aws_security_credentials_from_file(file_name, awsprofile):
# attempt to lookup AWS security credentials in AWS credentials file (~/.aws/credentials) and configs file (~/.aws/config)
file_path = AWS_CREDENTIALS_FILES.get(file_name)
if file_path and os.path.exists(file_path):
credentials = credentials_file_helper(file_path, awsprofile)
if credentials["AccessKeyId"]:
return credentials
logging.error(
"AWS security credentials not found in %s under named profile [%s]",
file_path,
awsprofile,
)
return None
def get_aws_security_credentials_from_assumed_profile(awsprofile):
credentials = botocore_credentials_helper(awsprofile)
if credentials["AccessKeyId"]:
return credentials
logging.error(
"AWS security credentials not found via assuming named profile [%s] using botocore",
awsprofile,
)
return None
def botocore_credentials_helper(awsprofile):
credentials = {"AccessKeyId": None, "SecretAccessKey": None, "Token": None}
try:
import botocore.session
from botocore.exceptions import ProfileNotFound
except ImportError:
logging.error(
"Named profile credentials cannot be retrieved without botocore, please install botocore first."
)
return credentials
session = botocore.session.get_session()
session.set_config_variable("profile", awsprofile)
try:
frozen_credentials = session.get_credentials().get_frozen_credentials()
except ProfileNotFound as e:
logging.error(
"%s, please add the [profile %s] section in the aws config file following %s and %s."
% (e, awsprofile, NAMED_PROFILE_HELP_URL, CONFIG_FILE_SETTINGS_HELP_URL)
)
return credentials
credentials["AccessKeyId"] = frozen_credentials.access_key
credentials["SecretAccessKey"] = frozen_credentials.secret_key
credentials["Token"] = frozen_credentials.token
return credentials
def get_aws_security_credentials_from_ecs(config, uri):
# through ECS security credentials uri found in AWS_CONTAINER_CREDENTIALS_RELATIVE_URI environment variable
dict_keys = ["AccessKeyId", "SecretAccessKey", "Token"]
ecs_uri = ECS_TASK_METADATA_API + uri
ecs_unsuccessful_resp = (
"Unsuccessful retrieval of AWS security credentials at %s." % ecs_uri
)
ecs_url_error_msg = (
"Unable to reach %s to retrieve AWS security credentials. See %s for more info."
% (ecs_uri, SECURITY_CREDS_ECS_URI_HELP_URL)
)
ecs_security_dict = url_request_helper(
config, ecs_uri, ecs_unsuccessful_resp, ecs_url_error_msg
)
if ecs_security_dict and all(k in ecs_security_dict for k in dict_keys):
return ecs_security_dict
return None
def get_aws_security_credentials_from_webidentity(config, role_arn, token_file, region):
try:
with open(token_file, "r") as f:
token = f.read()
except Exception as e:
logging.error("Error reading token file %s: %s", token_file, e)
return None
STS_ENDPOINT_URL = STS_ENDPOINT_URL_FORMAT.format(region)
webidentity_url = (
STS_ENDPOINT_URL
+ "?"
+ urlencode(
{
"Version": "2011-06-15",
"Action": "AssumeRoleWithWebIdentity",
"RoleArn": role_arn,
"RoleSessionName": "efs-mount-helper",
"WebIdentityToken": token,
}
)
)
unsuccessful_resp = (
"Unsuccessful retrieval of AWS security credentials at %s." % STS_ENDPOINT_URL
)
url_error_msg = (
"Unable to reach %s to retrieve AWS security credentials. See %s for more info."
% (STS_ENDPOINT_URL, SECURITY_CREDS_WEBIDENTITY_HELP_URL)
)
resp = url_request_helper(
config,
webidentity_url,
unsuccessful_resp,
url_error_msg,
headers={"Accept": "application/json"},
)
if resp:
creds = (
resp.get("AssumeRoleWithWebIdentityResponse", {})
.get("AssumeRoleWithWebIdentityResult", {})
.get("Credentials", {})
)
if all(k in creds for k in ["AccessKeyId", "SecretAccessKey", "SessionToken"]):
return {
"AccessKeyId": creds["AccessKeyId"],
"SecretAccessKey": creds["SecretAccessKey"],
"Token": creds["SessionToken"],
}
return None
def get_aws_security_credentials_from_instance_metadata(config):
# through IAM role name security credentials lookup uri (after lookup for IAM role name attached to instance)
dict_keys = ["AccessKeyId", "SecretAccessKey", "Token"]
iam_role_unsuccessful_resp = (
"Unsuccessful retrieval of IAM role name at %s." % INSTANCE_IAM_URL
)
iam_role_url_error_msg = (
"Unable to reach %s to retrieve IAM role name. See %s for more info."
% (INSTANCE_IAM_URL, SECURITY_CREDS_IAM_ROLE_HELP_URL)
)
iam_role_name = url_request_helper(
config, INSTANCE_IAM_URL, iam_role_unsuccessful_resp, iam_role_url_error_msg
)
if iam_role_name:
security_creds_lookup_url = INSTANCE_IAM_URL + iam_role_name
unsuccessful_resp = (
"Unsuccessful retrieval of AWS security credentials at %s."
% security_creds_lookup_url
)
url_error_msg = (
"Unable to reach %s to retrieve AWS security credentials. See %s for more info."
% (security_creds_lookup_url, SECURITY_CREDS_IAM_ROLE_HELP_URL)
)
iam_security_dict = url_request_helper(
config, security_creds_lookup_url, unsuccessful_resp, url_error_msg
)
if iam_security_dict and all(k in iam_security_dict for k in dict_keys):
return iam_security_dict
return None
def credentials_file_helper(file_path, awsprofile):
aws_credentials_configs = read_config(file_path)
credentials = {"AccessKeyId": None, "SecretAccessKey": None, "Token": None}
try:
aws_access_key_id = aws_credentials_configs.get(awsprofile, "aws_access_key_id")
secret_access_key = aws_credentials_configs.get(
awsprofile, "aws_secret_access_key"
)
session_token = aws_credentials_configs.get(awsprofile, "aws_session_token")
credentials["AccessKeyId"] = aws_access_key_id
credentials["SecretAccessKey"] = secret_access_key
credentials["Token"] = session_token
except NoOptionError as e:
if "aws_access_key_id" in str(e) or "aws_secret_access_key" in str(e):
logging.debug(
"aws_access_key_id or aws_secret_access_key not found in %s under named profile [%s]",
file_path,
awsprofile,
)
if "aws_session_token" in str(e):
logging.debug("aws_session_token not found in %s", file_path)
credentials["AccessKeyId"] = aws_credentials_configs.get(
awsprofile, "aws_access_key_id"
)
credentials["SecretAccessKey"] = aws_credentials_configs.get(
awsprofile, "aws_secret_access_key"
)
except NoSectionError:
logging.debug("No [%s] section found in config file %s", awsprofile, file_path)
return credentials
def is_instance_metadata_url(url):
return url.startswith("http://169.254.169.254")
def url_request_helper(config, url, unsuccessful_resp, url_error_msg, headers={}):
try:
req = Request(url)
for k, v in headers.items():
req.add_header(k, v)
if not fetch_ec2_metadata_token_disabled(config) and is_instance_metadata_url(
url
):
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html
# IMDSv1 is a request/response method to access instance metadata
# IMDSv2 is a session-oriented method to access instance metadata
# We expect the token retrieve will fail in bridge networking environment (e.g. container) since the default hop
# limit for getting the token is 1. If the token retrieve does timeout, we fallback to use IMDSv1 instead
token = get_aws_ec2_metadata_token()
if token:
req.add_header("X-aws-ec2-metadata-token", token)
request_resp = urlopen(req, timeout=1)
return get_resp_obj(request_resp, url, unsuccessful_resp)
except socket.timeout:
err_msg = "Request timeout"
except HTTPError as e:
# For instance enable with IMDSv2 and fetch token disabled, Unauthorized 401 error will be thrown
if (
e.code == 401
and fetch_ec2_metadata_token_disabled(config)
and is_instance_metadata_url(url)
):
logging.warning(
"Unauthorized request to instance metadata url %s, IMDSv2 is enabled on the instance, while fetching "
"ec2 metadata token is disabled. Please set the value of config item "
'"%s" to "false" in config file %s.'
% (url, DISABLE_FETCH_EC2_METADATA_TOKEN_ITEM, CONFIG_FILE)
)
err_msg = "Unable to reach the url at %s: status=%d, reason is %s" % (
url,
e.code,
e.reason,
)
except URLError as e:
err_msg = "Unable to reach the url at %s, reason is %s" % (url, e.reason)
if err_msg:
logging.debug("%s %s", url_error_msg, err_msg)
return None
def get_resp_obj(request_resp, url, unsuccessful_resp):
if request_resp.getcode() != 200:
logging.debug(
unsuccessful_resp + " %s: ResponseCode=%d", url, request_resp.getcode()
)
return None
resp_body = request_resp.read()
resp_body_type = type(resp_body)
try:
if resp_body_type is str:
resp_dict = json.loads(resp_body)
else:
resp_dict = json.loads(
resp_body.decode(
request_resp.headers.get_content_charset() or "us-ascii"
)
)
return resp_dict
except ValueError:
return resp_body if resp_body_type is str else resp_body.decode("utf-8")
def bootstrap_logging(config, log_dir=LOG_DIR):
raw_level = config.get(CONFIG_SECTION, "logging_level")
levels = {
"debug": logging.DEBUG,
"info": logging.INFO,
"warning": logging.WARNING,
"error": logging.ERROR,
"critical": logging.CRITICAL,
}
level = levels.get(raw_level.lower())
level_error = False
if not level:
# delay logging error about malformed log level until after logging is configured
level_error = True
level = logging.INFO
max_bytes = config.getint(CONFIG_SECTION, "logging_max_bytes")
file_count = config.getint(CONFIG_SECTION, "logging_file_count")
handler = RotatingFileHandler(
os.path.join(log_dir, LOG_FILE), maxBytes=max_bytes, backupCount=file_count
)
handler.setFormatter(
logging.Formatter(
fmt="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S %Z",
)
)
logger = logging.getLogger()
logger.setLevel(level)
logger.addHandler(handler)
if level_error:
logging.error(
'Malformed logging level "%s", setting logging level to %s',
raw_level,
level,
)
def parse_options(options):
opts = {}
for o in options.split(","):
if "=" in o:
k, v = o.split("=")
opts[k] = v
else:
opts[o] = None
return opts
def get_file_safe_mountpoint(mount):
mountpoint = os.path.abspath(mount.mountpoint).replace(os.sep, ".")
if mountpoint.startswith("."):
mountpoint = mountpoint[1:]
opts = parse_options(mount.options)
if "port" not in opts:
if not check_if_running_on_macos():
# /proc/mounts provides a list of all mounts in use by the system (including the mount options used).
# In the case of tls mount: stunnel establishes a localhost port connection in order to listen on the requests,
# and then send packets further to the server:2049. If the port is 2049 which is the default nfs port,
# /proc/mounts will not display the port number in the options information, thus watchdog process will not treat
# the mount as EFS mount and won't restart the killed stunnel which cause the mount hang.
# So, tlsport=2049 is being added here by appending with the mountpoint.
# Putting a default port 2049 to fix the Stunnel process being killed issue.
opts["port"] = DEFAULT_NFS_PORT
# some other localhost nfs mount not running over stunnel.
# For MacOS, we ignore the port if the port is missing in mount options.
else:
return mountpoint
return mountpoint + "." + opts["port"]
def get_current_local_nfs_mounts(mount_file="/proc/mounts"):
"""
Return a dict of the current NFS mounts for servers running on localhost, keyed by the mountpoint and port as it
appears in EFS watchdog state files.
"""
mounts = []
if not check_if_running_on_macos():
with open(mount_file) as f:
for mount in f:
try:
mounts.append(Mount._make(mount.strip().split()))
except Exception as e:
# Make sure nfs mounts being skipped are made apparent
if " nfs4 " in mount:
logging.warning(
'Watchdog ignoring malformed nfs4 mount "%s": %s', mount, e
)
else:
logging.debug(
'Watchdog ignoring malformed mount "%s": %s', mount, e
)
else:
# stat command on MacOS does not have '--file-system' option to verify the filesystem type of a mount point,
# traverse all the mounts, and find if current mount point is already mounted
process = subprocess.run(
["mount", "-t", "nfs"],
check=True,
stdout=subprocess.PIPE,
universal_newlines=True,
)
stdout = process.stdout
if stdout:
output = stdout.split("\n")
for mount in output:
_mount = mount.split()
if len(_mount) >= 4:
mount_ops = get_nfs_mount_options_on_macos(_mount[2], _mount[0])
# Sample output: 127.0.0.1:/ on /Users/ec2-user/efs (nfs)
mounts.append(
Mount._make(
[
_mount[0],
_mount[2],
_mount[3],
mount_ops if mount_ops else "",
0,
0,
]
)
)
else:
logging.warning("No nfs mounts found")
mounts = [m for m in mounts if m.server.startswith("127.0.0.1") and "nfs" in m.type]
mount_dict = {}
for m in mounts:
safe_mnt = get_file_safe_mountpoint(m)
if safe_mnt:
mount_dict[safe_mnt] = m
return mount_dict
def get_nfs_mount_options_on_macos(mount_point, mount_server="127.0.0.1:/"):
if not mount_point:
logging.warning("Unable to get local mount options with empty mount point")
return None
try:
process = subprocess.run(
["nfsstat", "-f", "JSON", "-m", mount_point],
check=True,
stdout=subprocess.PIPE,
universal_newlines=True,
timeout=NFSSTAT_TIMEOUT,
)
stdout = process.stdout
if not stdout:
logging.warning(
"Unable to get local mount options with mount point: %s", mount_point
)
return None
try:
state_json = json.loads(stdout)
except ValueError:
logging.exception("Unable to parse json of %s", stdout)
return None
try:
return ",".join(
state_json.get(mount_server)
.get("Original mount options")
.get("NFS parameters")
)
except AttributeError:
logging.exception("Unable to get object in %s", state_json)
return None
except subprocess.TimeoutExpired:
logging.warning(
"Fetching nfs mount parameters timed out for mount point %s. Ignoring port option.",
mount_point,
)
return None
def get_state_files(state_file_dir):
"""
Return a dict of the absolute path of state files in state_file_dir,
keyed by the mountpoint and port portion of the filename.
"""
state_files = {}
if os.path.isdir(state_file_dir):
for sf in os.listdir(state_file_dir):
if not sf.startswith("fs-") or os.path.isdir(
os.path.join(state_file_dir, sf)
):
continue
# This translates the state file name "fs-deadbeaf.home.user.mnt.12345"
# into file-safe mountpoint "home.user.mnt.12345"
first_period = sf.find(".")
mount_point_and_port = sf[first_period + 1 :]
logging.debug(
'Translating "%s" into mount point and port "%s"',
sf,
mount_point_and_port,
)
state_files[mount_point_and_port] = sf
return state_files
def get_pid_in_state_dir(state_file, state_file_dir):
"""
:param state_file: The state file path, e.g. fs-deadbeef.mnt.20560.
:param state_file_dir: The state file dir path, e.g. /var/run/efs.
"""
state_dir_pid_path = os.path.join(
state_file_dir, state_file + "+", STUNNEL_PID_FILE
)
if os.path.exists(state_dir_pid_path):
with open(state_dir_pid_path) as f:
return f.read()
return None
def is_mount_stunnel_proc_running(state_pid, state_file, state_file_dir):
"""
Check whether the PID in the state file corresponds to a running efs-proxy/stunnel process.
Although this code was originally written to check if stunnel is running, we've modified
it to support the efs-proxy process as well.
The proxy or stunnel process is counted as running iff:
1. The pid in state file is not None.
2. The process running with the pid is a stunnel process. This is validated through process command name.
3. The process can be reached via os.kill(pid, 0).
4. Every launched stunnel process will write its process id to the pid file in the mount state_file_dir, and only
when the stunnel is terminated this pid file can be removed. Check whether the stunnel pid file exists and its
value is equal to the pid documented in state file. This step is to make sure we don't send signal later to any
stunnel process that is not owned by the mount.
:param state_pid: The pid in state file.
:param state_file: The state file path, e.g. fs-deadbeef.mnt.20560.
:param state_file_dir: The state file dir path, e.g. /var/run/efs.
"""
if not state_pid:
logging.debug("State pid is None for %s", state_file)
return False
process_name = check_process_name(state_pid)
if not process_name or (
"efs-proxy" not in str(process_name) and "stunnel" not in str(process_name)
):
logging.debug(
"Process running on %s is not an efs-proxy or stunnel process, full command: %s.",
state_pid,
str(process_name) if process_name else "",
)
return False
if not is_pid_running(state_pid):
logging.debug(
"Stunnel or efs-proxy process with pid %s is not running anymore for %s.",
state_pid,
state_file,
)
return False
pid_in_stunnel_pid_file = get_pid_in_state_dir(state_file, state_file_dir)
# efs-utils versions older than 1.32.2 does not create a pid file in state dir
# To avoid the healthy stunnel established by those version to be treated as not running due to the missing pid file, which can result in stunnel being constantly restarted,
# assuming the stunnel is still running even if the stunnel pid file does not exist.
if not pid_in_stunnel_pid_file:
logging.debug(
"Pid file of stunnel does not exist for %s. It is possible that the stunnel is no longer running or the mount was mounted using an older version efs-utils (<1.32.2). Assuming the stunnel with pid %s is still running.",
state_file,
state_pid,
)
elif int(state_pid) != int(pid_in_stunnel_pid_file):
logging.warning(
"Stunnel pid mismatch in state file (pid = %s) and stunnel pid file (pid = %s). Assuming the "
"stunnel is not running.",
int(state_pid),
int(pid_in_stunnel_pid_file),
)
return False
logging.debug("TLS tunnel for %s is running with pid %s", state_file, state_pid)
return True
def is_pid_running(pid):
if not pid:
return False
try:
os.kill(pid, 0)
return True
except OSError:
return False
def check_if_platform_is_mac():
return sys.platform in MAC_OS_PLATFORM_LIST
def get_system_release_version():
# MacOS does not maintain paths /etc/os-release and /etc/sys-release
if check_if_platform_is_mac():
return platform.platform()
try:
with open(SYSTEM_RELEASE_PATH) as f:
return f.read().strip()
except IOError:
logging.debug("Unable to read %s", SYSTEM_RELEASE_PATH)
try:
with open(OS_RELEASE_PATH) as f:
for line in f:
if "PRETTY_NAME" in line:
return line.split("=")[1].strip()
except IOError:
logging.debug("Unable to read %s", OS_RELEASE_PATH)
return DEFAULT_UNKNOWN_VALUE
def find_command_path(command, install_method):
# If not running on macOS, use linux paths
if not check_if_platform_is_mac():
env_path = (
"/sbin:/usr/sbin:/usr/local/sbin:/root/bin:/usr/local/bin:/usr/bin:/bin"
)
# Homebrew on x86 macOS uses /usr/local/bin; Homebrew on Apple Silicon macOS uses /opt/homebrew/bin since v3.0.0
# For more information, see https://brew.sh/2021/02/05/homebrew-3.0.0/
else:
env_path = "/opt/homebrew/bin:/usr/local/bin"
os.putenv("PATH", env_path)
try:
path = subprocess.check_output(["which", command])
return path.strip().decode()
except subprocess.CalledProcessError as e:
fatal_error(
"Failed to locate %s in %s - %s" % (command, env_path, install_method), e
)
# In ECS amazon linux 2, we start stunnel using `nsenter` which will run as a subprocess of bash, utilizes the `setns`
# system call to join an existing namespace and then executes the specified program using `exec`. Any exception won't
# be caught properly by subprocess.
# As a precaution on ECS AL2 that stunnel bin is removed after installing new efs-utils, and watchdog cannot launch
# stunnel for previous old mount, we do a replacement of stunnel path in the command to the stunnel5 path.
#
def update_stunnel_command_for_ecs_amazon_linux_2(
command, state, state_file_dir, state_file
):
if (
"nsenter" in command
and "stunnel5" not in " ".join(command)
and get_system_release_version() in AMAZON_LINUX_2_RELEASE_VERSIONS
):
for i in range(len(command)):
if "stunnel" in command[i] and "stunnel-config" not in command[i]:
command[i] = find_command_path("stunnel5", STUNNEL_INSTALLATION_MESSAGE)
break
logging.info(
"Rewriting %s with new stunnel cmd: %s for ECS Amazon Linux 2 platform.",
state_file,
" ".join(state["cmd"]),
)
rewrite_state_file(state, state_file_dir, state_file)
return command
def command_uses_efs_proxy(command):
"""
Accepts a list of strings which represents the command that was used
to start or efs-proxy. If the command contains efs-proxy, return True.
Since we control the filepath in which the efs-proxy executable is stored, we
know that we will not run into situations where a directory on the filepath is named
efs-proxy but the executable command is something else, like stunnel.
"""
for i in range(len(command)):
if EFS_PROXY_BIN in command[i]:
return True
return False
def start_tls_tunnel(child_procs, state, state_file_dir, state_file):
"""
Reads the command from the state file, and uses it to start a subprocess.
This is the command that efs-utils used to spin up the efs-proxy or stunnel process.
We launch the stunnel and efs-proxy process in a process group so that child processes can be easily killed.
:param child_procs: list that contains efs-proxy / stunnel processes that the Watchdog instance has spawned
:param state: the state corresponding to a given mount - the proxy process associated with this mount will be started
:param state_file_dir: the directory where mount state files are stored
:param state_file: this function may rewrite the command used to start up the proxy or stunnel process, and thus needs a handle on the state file to update it.
:return: the pid of the proxy or stunnel process that was spawned
"""
command = state["cmd"]
logging.info('Starting TLS tunnel: "%s"', " ".join(command))
efs_proxy_enabled = command_uses_efs_proxy(command)
command = update_stunnel_command_for_ecs_amazon_linux_2(
command, state, state_file_dir, state_file
)
tunnel = None
try:
tunnel = subprocess.Popen(
command,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
preexec_fn=os.setsid,
close_fds=True,
)
except FileNotFoundError as e:
if efs_proxy_enabled:
logging.warning("Watchdog failed to start efs-proxy due to %s", e)
else:
logging.warning("Watchdog failed to start stunnel due to %s", e)