retrain and save recommenders

env param to train and save the recommender with the web environment settings Ref #319
EpistasisLab · Apr 9, 2021 · f66e2fb · f66e2fb
1 parent 6ebea52
commit f66e2fb
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 16 deletions.
diff --git a/ai/ai.py b/ai/ai.py
@@ -100,7 +100,8 @@ def __init__(self,
                 datasets=False,
                 use_knowledgebase=False,
                 term_condition='n_recs',
-                max_time=5):
+                max_time=5,
+                train_save_only=False):
         """Initializes AI managing agent."""
         if 'RANDOM_SEED' in os.environ:
             self.random_state = int(os.environ['RANDOM_SEED'])
@@ -177,6 +178,11 @@ def __init__(self,
         # store dataset_id to hash dictionary
         self.dataset_mf_cache_id_hash_lookup = {}
 
+        # retrain and save recs and exit
+        if train_save_only == True:
+            self.train_save_recommenders(rec_class)
+            return
+
         # set recommender status
         self.labApi.set_recommender_status(
                 RECOMMENDER_STATUS.INITIALIZING.value)
@@ -249,16 +255,51 @@ def initialize_recommenders(self, rec_class):
                 self.rec_engines[pred_type]  = \
                         self.DEFAULT_REC_CLASS[pred_type](**recArgs)
 
+        logger.debug("recomendation engines initialized: ")
+        for prob_type, rec in self.rec_engines.items():
+            logger.debug(f'\tproblemType: {prob_type} - {rec}')
+            #logger.debug('\trec.ml_p:\n'+str(rec.ml_p.head()))
+
+
+    def train_save_recommenders(self, rec_class):
+        """
+        Initilize classification and regression recommenders
+        """
 
-            # self.rec_engines[pred_type].update(kb['resultsData'][pred_type], 
-            #         self.dataset_mf_cache, source='knowledgebase')
-            ##########################################################
-            # this section is used to save trained recommenders
-            # on the PMLB knowledgebases.            
-            # For normal operation, they can be skipped.
-            # logger.info('saving recommender')
-            # self.rec_engines[pred_type].save()
-            ##########################################################
+        kb = self.load_knowledgebase()
+        assert kb is not None
+
+        for pred_type in self.rec_engines.keys():
+            logger.info('initialiazing rec engine for problem type "'
+                    +pred_type+'"')
+
+            # get the ml parameters for the given recommender type
+            logger.debug("getting ml_p")
+            ml_p = self.labApi.get_all_ml_p(pred_type)
+            assert ml_p is not None
+            assert len(ml_p) > 0
+
+            # Create supervised learning recommenders
+            logger.debug("initializing engine")
+            recArgs = self.DEFAULT_REC_ARGS[pred_type]
+            recArgs['ml_p'] = ml_p
+
+            recArgs['serialized_rec_directory'] = 'data/recommenders/pennaiweb'
+            recArgs['load_serialized_rec'] = "never" 
+            recArgs['knowledgebase_results'] = kb['resultsData'][pred_type]
+
+            if (rec_class):
+                self.rec_engines[pred_type] = rec_class(**recArgs)
+            else:
+                self.rec_engines[pred_type]  = \
+                        self.DEFAULT_REC_CLASS[pred_type](**recArgs)
+
+
+            #self.rec_engines[pred_type].update(kb['resultsData'][pred_type], 
+            #        self.dataset_mf_cache, source='knowledgebase')
+
+            logger.info('saving recommender')
+            self.rec_engines[pred_type].save()
 
         logger.debug("recomendation engines initialized: ")
         for prob_type, rec in self.rec_engines.items():
@@ -598,6 +639,10 @@ def main():
     parser.add_argument('--knowledgebase','-k', action='store_true',
             dest='USE_KNOWLEDGEBASE', default=False,
             help='Load a knowledgebase for the recommender')
+    parser.add_argument('--train_save_only', action='store_true',
+            dest='TRAIN_SAVE_ONLY', default=False,
+            help='Retrain and save recommenders and exit')
+
 
     args = parser.parse_args()
 
@@ -628,7 +673,8 @@ def main():
             verbose=args.VERBOSE, n_recs=args.N_RECS, 
             warm_start=args.WARM_START, datasets=args.DATASETS, 
             use_knowledgebase=args.USE_KNOWLEDGEBASE,
-            term_condition=args.TERM_COND, max_time=args.MAX_TIME)
+            term_condition=args.TERM_COND, max_time=args.MAX_TIME,
+            train_save_only=args.TRAIN_SAVE_ONLY)
 
     n = 0;
     try:
@@ -654,8 +700,9 @@ def main():
     finally:
         # shut down gracefully
         logger.info("Shutting down AI engine...")
-        logger.info("...Shutting down Request Manager...")
-        pennai.requestManager.shutdown()
+        if hasattr(pennai, "requestManager"):
+            logger.info("...Shutting down Request Manager...")
+            pennai.requestManager.shutdown()
         logger.info("Goodbye")
 
 if __name__ == '__main__':

diff --git a/config/ai.env-template b/config/ai.env-template
@@ -1,13 +1,14 @@
 # Environment variables for auto-starting the AI
 # Options:
 # AI_AUTOSTART: whether to start AI automatically
-# AI_RECOMMENDER: type of recommender. options: random, average, knn, svd
+# AI_RECOMMENDER: type of recommender. options: random, average, knnmeta, svd
 # AI_VERBOSE: 1: really loud, 0: loud
 # AI_PMLB_KNOWLEDGEBASE: 1: load the pmlb knowledgebase, 0:off
 # AI_TERM_COND: how AI stops recommending. n_recs, time or continuous
 # AI_MAX_TIME: maximum time in seconds to recommend when using 'n_recs'
 # AI_NUMRECOMMEND: how many recommendations the AI will make when toggled when using 'time'
 # MAX_FILE_SIZE: (optional) Maximum allowed file size in bites
+# AI_TRAIN_SAVE_ONLY: (optional) Retrain and save the recommender, and exit
 
 
 AI_AUTOSTART=1

diff --git a/docker/lab/files/entrypoint.sh b/docker/lab/files/entrypoint.sh
@@ -60,13 +60,21 @@ if [ ${AI_AUTOSTART} -eq 1 ]; then
     [ -n "$AI_NUMRECOMMEND" ] && { PARMS+=" -n ${AI_NUMRECOMMEND}"; }
     [ -n "$AI_TERM_COND" ] && { PARMS+=" -term_condition ${AI_TERM_COND}"; }
     [ -n "$AI_MAX_TIME" ] && { PARMS+=" -max_time ${AI_MAX_TIME}"; }
+    [ -n "$AI_TRAIN_SAVE_ONLY" ] && { PARMS+=" --train_save_only ${TRAIN_SAVE_ONLY}"; }
 
 
     echo "python -m ai.ai $PARMS"
 
     cd $PROJECT_ROOT/
-    #python -m ai.ai $PARMS
-    pm2 start "python -u -m ai.ai $PARMS" --name ai
+
+    if [ ${AI_TRAIN_SAVE_ONLY} -eq 1 ]; then
+        pm2 start "python -u -m ai.ai $PARMS" --name ai --no-autorestart
+        #pm2 stop lab
+        #exit
+    else
+        pm2 start "python -u -m ai.ai $PARMS" --name ai
+    fi
+
 
 else
     echo "not autostarting ai..."

diff --git a/utils/generate_recommenders.py b/utils/generate_recommenders.py
@@ -0,0 +1,29 @@
+"""~This file is part of the PennAI library~
+
+Copyright (C) 2017 Epistasis Lab, University of Pennsylvania
+
+PennAI is maintained by:
+    - Heather Williams (hwilli@upenn.edu)
+    - Weixuan Fu (weixuanf@upenn.edu)
+    - William La Cava (lacava@upenn.edu)
+    - Michael Stauffer (stauffer@upenn.edu)
+    - and many other generous open source contributors
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+(Autogenerated header, do not modify)
+
+"""
+
+Train and save recommenders