-
Notifications
You must be signed in to change notification settings - Fork 6
/
cacheLib.py
253 lines (208 loc) · 8.8 KB
/
cacheLib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
import os
import json
import redis
import pymysql
import boto3
import sys
import os.path
import requests
def store_configs (config_file, configs):
'''
This function stores configurations in a json file.
'''
with open(config_file, 'w') as fp:
json.dump(configs, fp)
def load_configs (config_file):
'''
This function loads configurations from a json file.
'''
data = {}
with open(config_file) as fp:
data = json.load(fp)
return data
def get_secret(secret_name,region_name):
'''
This function retrieves information from Secrets Manager.
'''
# Create a Secrets Manager client
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=region_name
)
# In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
# See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
# We rethrow the exception by default.
try:
get_secret_value_response = client.get_secret_value(
SecretId=secret_name
)
except ClientError as e:
if e.response['Error']['Code'] == 'DecryptionFailureException':
# Secrets Manager can't decrypt the protected secret text using the provided KMS key.
# Deal with the exception here, and/or rethrow at your discretion.
raise e
elif e.response['Error']['Code'] == 'InternalServiceErrorException':
# An error occurred on the server side.
# Deal with the exception here, and/or rethrow at your discretion.
raise e
elif e.response['Error']['Code'] == 'InvalidParameterException':
# You provided an invalid value for a parameter.
# Deal with the exception here, and/or rethrow at your discretion.
raise e
elif e.response['Error']['Code'] == 'InvalidRequestException':
# You provided a parameter value that is not valid for the current state of the resource.
# Deal with the exception here, and/or rethrow at your discretion.
raise e
elif e.response['Error']['Code'] == 'ResourceNotFoundException':
# We can't find the resource that you asked for.
# Deal with the exception here, and/or rethrow at your discretion.
raise e
else:
# Decrypts secret using the associated KMS CMK.
# Depending on whether the secret is a string or binary, one of these fields will be populated.
if 'SecretString' in get_secret_value_response:
secret = json.loads(get_secret_value_response['SecretString'])
else:
secret = json.loads(base64.b64decode(get_secret_value_response['SecretBinary']))
return secret
def get_stack_outputs(stack_name,region_name):
'''
This function retrieves all outputs of a stack from CloudFormation.
'''
stack_outputs = {}
cf_client = boto3.client('cloudformation',region_name=region_name)
response = cf_client.describe_stacks(StackName=stack_name)
outputs = response["Stacks"][0]["Outputs"]
for output in outputs:
stack_outputs[output["OutputKey"]] = output["OutputValue"]
response = get_secret(stack_outputs['secretname'],region_name)
stack_outputs['db_password'] = response['password']
stack_outputs['db_name'] = response['dbname']
stack_outputs['db_port'] = response['port']
stack_outputs['db_username'] = response['username']
stack_outputs['db_host'] = response['host']
return stack_outputs
def mysql_execute_command(sql, db_host, db_username, db_password):
'''
This function excutes the sql statement, does not return any value.
'''
try:
con = pymysql.connect(host=db_host,
user=db_username,
password=db_password,
autocommit=True,
local_infile=1)
# Create cursor and execute SQL statement
cursor = con.cursor()
cursor.execute(sql)
con.close()
except Exception as e:
print('Error: {}'.format(str(e)))
sys.exit(1)
def mysql_fetch_data(sql, db_host, db_username, db_password, db_name):
'''
This function excutes the sql query and returns dataset.
'''
try:
con = pymysql.connect(host=db_host,
user=db_username,
password=db_password,
database=db_name,
autocommit=True,
local_infile=1,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
# Create cursor and execute SQL statement
cursor = con.cursor()
cursor.execute(sql)
data_set = cursor.fetchall()
con.close()
return data_set
except Exception as e:
print('Error: {}'.format(str(e)))
sys.exit(1)
def flush_cache():
'''
This function flushes all records from the cache.
'''
Cache.flushall()
def query_mysql_and_cache(sql,db_host, db_username, db_password, db_name):
'''
This function retrieves records from the cache if it exists, or else gets it from the MySQL database.
'''
res = Cache.get(sql)
if res:
print ('Records in cache...')
return ({'records_in_cache': True, 'data' : res})
res = mysql_fetch_data(sql, db_host, db_username, db_password, db_name)
if res:
print ('Cache was empty. Now populating cache...')
Cache.setex(sql, ttl, json.dumps(res))
return ({'records_in_cache': False, 'data' : res})
else:
return None
def query_mysql(sql,db_host, db_username, db_password, db_name):
'''
This function retrieve records from the database.
'''
res = mysql_fetch_data(sql, db_host, db_username, db_password, db_name)
if res:
print ('Records in database...')
return res
else:
return None
def initialize_database(configs):
'''
This function initialize the MySQL database if not already done so and generates
all configurations needed for the application.
'''
# Initialize Database
print ('Initializing MySQL Database...')
#Drop table if exists
sql_command = "DROP TABLE IF EXISTS covid.articles;"
mysql_execute_command(sql_command, configs['db_host'], configs['db_username'], configs['db_password'])
#Create table
sql_command = "CREATE TABLE covid.articles (OBJECTID INT, SHA TEXT, PossiblePlace TEXT, Sentence TEXT, MatchedPlace TEXT, DOI TEXT, Title TEXT, Abstract TEXT, PublishedDate TEXT, Authors TEXT, Journal TEXT, Source TEXT, License TEXT, PRIMARY KEY (OBJECTID));"
mysql_execute_command(sql_command, configs['db_host'], configs['db_username'], configs['db_password'])
#Load CSV file into mysql
sql_command = """
LOAD DATA LOCAL INFILE '{0}'
INTO TABLE covid.articles
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
""".format(configs['dataset_file'])
mysql_execute_command(sql_command, configs['db_host'], configs['db_username'], configs['db_password'])
# Load configurations from config file
config_file = 'configs.json'
if os.path.exists(config_file):
configs = load_configs (config_file)
print('Local config file found...')
else:
print ('Missing config file...')
exit
stack_name = configs['stack_name']
ttl = configs['ttl']
app_port = configs['app_port']
max_rows = configs['max_rows'] #max # of rows to query from database
dataset_file = configs['dataset_file']
region_name = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document').json()['region']
configs['region_name'] = region_name
# If datbase is not populated, retrieve endpoints for the database, cache and compute instance from CloudFormation and populate the database
if configs['database_populated'] is False:
# Get additional configurations from CloudFormation and save on disk
stack_outputs = get_stack_outputs(stack_name,region_name)
for key in stack_outputs.keys():
configs[key] = stack_outputs[key]
# Get all configs. If database was not initialized, it will be populated with sample data.
initialize_database(configs)
configs['database_populated'] = True
store_configs (config_file, configs)
# Initialize the cache
Cache = redis.Redis.from_url('redis://' + configs['redisendpoint'] + ':6379')
db_table = 'articles'
db_tbl_fields = ['OBJECTID', 'Sentence', 'Title', 'Source']
sql_fields = ', '.join(db_tbl_fields)
sql = "select SQL_NO_CACHE " + sql_fields + " from " + db_table + " where Sentence like '%delta%' order by OBJECTID limit " + str(max_rows)