diff --git a/ai/ai.py b/ai/ai.py index de4783b5a..8b4d1b8b3 100644 --- a/ai/ai.py +++ b/ai/ai.py @@ -100,7 +100,8 @@ def __init__(self, datasets=False, use_knowledgebase=False, term_condition='n_recs', - max_time=5): + max_time=5, + train_save_only=False): """Initializes AI managing agent.""" if 'RANDOM_SEED' in os.environ: self.random_state = int(os.environ['RANDOM_SEED']) @@ -177,6 +178,11 @@ def __init__(self, # store dataset_id to hash dictionary self.dataset_mf_cache_id_hash_lookup = {} + # retrain and save recs and exit + if train_save_only == True: + self.train_save_recommenders(rec_class) + return + # set recommender status self.labApi.set_recommender_status( RECOMMENDER_STATUS.INITIALIZING.value) @@ -249,16 +255,51 @@ def initialize_recommenders(self, rec_class): self.rec_engines[pred_type] = \ self.DEFAULT_REC_CLASS[pred_type](**recArgs) + logger.debug("recomendation engines initialized: ") + for prob_type, rec in self.rec_engines.items(): + logger.debug(f'\tproblemType: {prob_type} - {rec}') + #logger.debug('\trec.ml_p:\n'+str(rec.ml_p.head())) + + + def train_save_recommenders(self, rec_class): + """ + Initilize classification and regression recommenders + """ - # self.rec_engines[pred_type].update(kb['resultsData'][pred_type], - # self.dataset_mf_cache, source='knowledgebase') - ########################################################## - # this section is used to save trained recommenders - # on the PMLB knowledgebases. - # For normal operation, they can be skipped. - # logger.info('saving recommender') - # self.rec_engines[pred_type].save() - ########################################################## + kb = self.load_knowledgebase() + assert kb is not None + + for pred_type in self.rec_engines.keys(): + logger.info('initialiazing rec engine for problem type "' + +pred_type+'"') + + # get the ml parameters for the given recommender type + logger.debug("getting ml_p") + ml_p = self.labApi.get_all_ml_p(pred_type) + assert ml_p is not None + assert len(ml_p) > 0 + + # Create supervised learning recommenders + logger.debug("initializing engine") + recArgs = self.DEFAULT_REC_ARGS[pred_type] + recArgs['ml_p'] = ml_p + + recArgs['serialized_rec_directory'] = 'data/recommenders/pennaiweb' + recArgs['load_serialized_rec'] = "never" + recArgs['knowledgebase_results'] = kb['resultsData'][pred_type] + + if (rec_class): + self.rec_engines[pred_type] = rec_class(**recArgs) + else: + self.rec_engines[pred_type] = \ + self.DEFAULT_REC_CLASS[pred_type](**recArgs) + + + #self.rec_engines[pred_type].update(kb['resultsData'][pred_type], + # self.dataset_mf_cache, source='knowledgebase') + + logger.info('saving recommender') + self.rec_engines[pred_type].save() logger.debug("recomendation engines initialized: ") for prob_type, rec in self.rec_engines.items(): @@ -598,6 +639,10 @@ def main(): parser.add_argument('--knowledgebase','-k', action='store_true', dest='USE_KNOWLEDGEBASE', default=False, help='Load a knowledgebase for the recommender') + parser.add_argument('--train_save_only', action='store_true', + dest='TRAIN_SAVE_ONLY', default=False, + help='Retrain and save recommenders and exit') + args = parser.parse_args() @@ -628,7 +673,8 @@ def main(): verbose=args.VERBOSE, n_recs=args.N_RECS, warm_start=args.WARM_START, datasets=args.DATASETS, use_knowledgebase=args.USE_KNOWLEDGEBASE, - term_condition=args.TERM_COND, max_time=args.MAX_TIME) + term_condition=args.TERM_COND, max_time=args.MAX_TIME, + train_save_only=args.TRAIN_SAVE_ONLY) n = 0; try: @@ -654,8 +700,9 @@ def main(): finally: # shut down gracefully logger.info("Shutting down AI engine...") - logger.info("...Shutting down Request Manager...") - pennai.requestManager.shutdown() + if hasattr(pennai, "requestManager"): + logger.info("...Shutting down Request Manager...") + pennai.requestManager.shutdown() logger.info("Goodbye") if __name__ == '__main__': diff --git a/config/ai.env-template b/config/ai.env-template index d7509ec04..a4591101c 100644 --- a/config/ai.env-template +++ b/config/ai.env-template @@ -1,13 +1,14 @@ # Environment variables for auto-starting the AI # Options: # AI_AUTOSTART: whether to start AI automatically -# AI_RECOMMENDER: type of recommender. options: random, average, knn, svd +# AI_RECOMMENDER: type of recommender. options: random, average, knnmeta, svd # AI_VERBOSE: 1: really loud, 0: loud # AI_PMLB_KNOWLEDGEBASE: 1: load the pmlb knowledgebase, 0:off # AI_TERM_COND: how AI stops recommending. n_recs, time or continuous # AI_MAX_TIME: maximum time in seconds to recommend when using 'n_recs' # AI_NUMRECOMMEND: how many recommendations the AI will make when toggled when using 'time' # MAX_FILE_SIZE: (optional) Maximum allowed file size in bites +# AI_TRAIN_SAVE_ONLY: (optional) Retrain and save the recommender, and exit AI_AUTOSTART=1 diff --git a/docker/lab/files/entrypoint.sh b/docker/lab/files/entrypoint.sh index f79364a4f..f1ed44817 100644 --- a/docker/lab/files/entrypoint.sh +++ b/docker/lab/files/entrypoint.sh @@ -60,13 +60,21 @@ if [ ${AI_AUTOSTART} -eq 1 ]; then [ -n "$AI_NUMRECOMMEND" ] && { PARMS+=" -n ${AI_NUMRECOMMEND}"; } [ -n "$AI_TERM_COND" ] && { PARMS+=" -term_condition ${AI_TERM_COND}"; } [ -n "$AI_MAX_TIME" ] && { PARMS+=" -max_time ${AI_MAX_TIME}"; } + [ -n "$AI_TRAIN_SAVE_ONLY" ] && { PARMS+=" --train_save_only ${TRAIN_SAVE_ONLY}"; } echo "python -m ai.ai $PARMS" cd $PROJECT_ROOT/ - #python -m ai.ai $PARMS - pm2 start "python -u -m ai.ai $PARMS" --name ai + + if [ ${AI_TRAIN_SAVE_ONLY} -eq 1 ]; then + pm2 start "python -u -m ai.ai $PARMS" --name ai --no-autorestart + #pm2 stop lab + #exit + else + pm2 start "python -u -m ai.ai $PARMS" --name ai + fi + else echo "not autostarting ai..." diff --git a/utils/generate_recommenders.py b/utils/generate_recommenders.py new file mode 100644 index 000000000..50ba28d72 --- /dev/null +++ b/utils/generate_recommenders.py @@ -0,0 +1,29 @@ +"""~This file is part of the PennAI library~ + +Copyright (C) 2017 Epistasis Lab, University of Pennsylvania + +PennAI is maintained by: + - Heather Williams (hwilli@upenn.edu) + - Weixuan Fu (weixuanf@upenn.edu) + - William La Cava (lacava@upenn.edu) + - Michael Stauffer (stauffer@upenn.edu) + - and many other generous open source contributors + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +(Autogenerated header, do not modify) + +""" + +Train and save recommenders