pandas-dev · Jun 1, 2013 · Jun 1, 2013 · Jun 1, 2013
diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py
@@ -37,10 +37,18 @@
 import random
 import numpy as np
 
+import pandas as pd
 from pandas import DataFrame, Series
 
+try:
+    import git # gitpython
+except Exception:
+    print("Error: Please install the `gitpython` package\n")
+    sys.exit(1)
+
 from suite import REPO_PATH
 
+VB_DIR = os.path.dirname(os.path.abspath(__file__))
 DEFAULT_MIN_DURATION = 0.01
 HEAD_COL="head[ms]"
 BASE_COL="base[ms]"
@@ -57,6 +65,14 @@
 parser.add_argument('-t', '--target-commit',
                     help='The commit to compare against the baseline (default: HEAD).',
                     type=str)
+parser.add_argument('--base-pickle',
+                    help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\
+                    'filename must be of the form <hash>-*.* or specify --base-commit seperately',
+                    type=str)
+parser.add_argument('--target-pickle',
+                    help='name of pickle file with timings data generated by a former `-H -d FILE` run '\
+                    'filename must be of the form <hash>-*.* or specify --target-commit seperately',
+                    type=str)
 parser.add_argument('-m', '--min-duration',
                     help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
                     type=float,
@@ -104,8 +120,7 @@
 parser.add_argument('-a', '--affinity',
                     metavar="a",
                     dest='affinity',
-                    default=1,
-                    type=int,
+                    default=None,
                     help='set processor affinity of processm by default bind to cpu/core #1 only'
                              'requires the "affinity" python module , will raise Warning otherwise'  )
 
@@ -206,21 +221,34 @@ def profile_comparative(benchmarks):
 
         head_res = get_results_df(db, h_head)
         baseline_res = get_results_df(db, h_baseline)
-        totals = prep_totals(baseline_res, head_res)
-
-        h_msg =   repo.messages.get(h_head, "")
-        b_msg =   repo.messages.get(h_baseline, "")
 
-        print_report(totals,h_head=h_head,h_msg=h_msg,
-                     h_baseline=h_baseline,b_msg=b_msg)
+        report_comparative(head_res,baseline_res)
 
-        if args.outdf:
-            prprint("The results DataFrame was written to '%s'\n" %  args.outdf)
-            totals.save(args.outdf)
     finally:
         #        print("Disposing of TMP_DIR: %s" % TMP_DIR)
         shutil.rmtree(TMP_DIR)
 
+def prep_pickle_for_total(df, agg_name='median'):
+    """
+    accepts a datafram resulting from invocation with -H -d o.pickle
+    If multiple data columns are present (-N was used), the
+    `agg_name` attr of the datafram will be used to reduce
+    them to a single value per vbench, df.median is used by defa
+    ult.
+
+    Returns a datadrame of the form expected by prep_totals
+    """
+    def prep(df):
+        agg = getattr(df,agg_name)
+        df = DataFrame(agg(1))
+        cols = list(df.columns)
+        cols[0]='timing'
+        df.columns=cols
+        df['name'] = list(df.index)
+        return df
+
+    return prep(df)
+
 def prep_totals(head_res, baseline_res):
     """
     Each argument should be a dataframe with  'timing' and 'name' columns
@@ -241,6 +269,27 @@ def prep_totals(head_res, baseline_res):
     ).sort("ratio").set_index('name')  # sort in ascending order
     return totals
 
+def report_comparative(head_res,baseline_res):
+    try:
+        r=git.Repo(VB_DIR)
+    except:
+        import pdb
+        pdb.set_trace()
+
+    totals = prep_totals(head_res,baseline_res)
+
+    h_head = args.target_commit
+    h_baseline = args.base_commit
+    h_msg =  r.commit(h_head).message.strip()
+    b_msg =  r.commit(h_baseline).message.strip()
+
+    print_report(totals,h_head=h_head,h_msg=h_msg,
+             h_baseline=h_baseline,b_msg=b_msg)
+
+    if args.outdf:
+        prprint("The results DataFrame was written to '%s'\n" %  args.outdf)
+        totals.save(args.outdf)
+
 def profile_head_single(benchmark):
     import gc
     results = []
@@ -398,18 +447,23 @@ def main():
     random.seed(args.seed)
     np.random.seed(args.seed)
 
-    try:
-        import affinity
-        affinity.set_process_affinity_mask(0,args.affinity)
-        assert affinity.get_process_affinity_mask(0) == args.affinity
-        print("CPU affinity set to %d" % args.affinity)
-    except ImportError:
-        import warnings
-        print("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"+
-                      "The 'affinity' module is not available, results may be unreliable\n" +
-                      "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
-            )
-        time.sleep(2)
+    if args.base_pickle and args.target_pickle:
+        baseline_res = prep_pickle_for_total(pd.load(args.base_pickle))
+        target_res = prep_pickle_for_total(pd.load(args.target_pickle))
+
+        report_comparative(target_res, baseline_res)
+        sys.exit(0)
+
+    if args.affinity is not None:
+        try:
+            import affinity
+
+            affinity.set_process_affinity_mask(0,args.affinity)
+            assert affinity.get_process_affinity_mask(0) == args.affinity
+            print("CPU affinity set to %d" % args.affinity)
+        except ImportError:
+            print("-a/--afinity specified, but the 'affinity' module is not available, aborting.\n")
+            sys.exit(1)
 
     print("\n")
     prprint("LOG_FILE = %s" % args.log_file)
@@ -489,10 +543,40 @@ def inner(repo_path):
 
 if __name__ == '__main__':
     args = parser.parse_args()
-    if not args.head and (not args.base_commit and not args.target_commit):
+    if (not args.head
+        and not (args.base_commit and args.target_commit)
+        and not (args.base_pickle and args.target_pickle)):
         parser.print_help()
-    else:
-        import warnings
-        warnings.filterwarnings('ignore',category=FutureWarning)
-        warnings.filterwarnings('ignore',category=DeprecationWarning)
-        main()
+        sys.exit(1)
+    elif ((args.base_pickle or args.target_pickle) and not
+        (args.base_pickle and args.target_pickle)):
+        print("Must specify Both --base-pickle and --target-pickle.")
+        sys.exit(1)
+
+    if ((args.base_pickle or args.target_pickle) and not
+        (args.base_commit and args.target_commit)):
+        if not args.base_commit:
+            print("base_commit not specified, Assuming base_pickle is named <commit>-foo.*")
+            args.base_commit = args.base_pickle.split('-')[0]
+        if not args.target_commit:
+            print("target_commit not specified, Assuming target_pickle is named <commit>-foo.*")
+            print(args.target_pickle.split('-')[0])
+            args.target_commit = args.target_pickle.split('-')[0]
+
+    import warnings
+    warnings.filterwarnings('ignore',category=FutureWarning)
+    warnings.filterwarnings('ignore',category=DeprecationWarning)
+
+    if args.base_commit and args.target_commit:
+        print("Verifying specified commits exist in repo...")
+        r=git.Repo(VB_DIR)
+        for c in [ args.base_commit, args.target_commit ]:
+            try:
+                msg =  r.commit(c).message.strip()
+            except git.BadObject:
+                print("The commit '%s' was not found, aborting" % c)
+                sys.exit(1)
+            else:
+                print("%s: %s" % (c,msg))
+
+    main()