From de9da80cab8fe83e0a869d43c738a0fbe39ec649 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Mon, 6 Feb 2023 16:30:53 +0100 Subject: [PATCH 1/7] initial implementation grid sampler --- easyvvuq/sampling/__init__.py | 1 + easyvvuq/sampling/grid_sampler.py | 94 +++++++++++++++++++++++++++++++ tests/grid_search/test_grid.py | 14 +++++ tests/test_grid_sampler.py | 66 ++++++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 easyvvuq/sampling/grid_sampler.py create mode 100755 tests/grid_search/test_grid.py create mode 100644 tests/test_grid_sampler.py diff --git a/easyvvuq/sampling/__init__.py b/easyvvuq/sampling/__init__.py index 7996ad019..2e89a6b11 100644 --- a/easyvvuq/sampling/__init__.py +++ b/easyvvuq/sampling/__init__.py @@ -22,6 +22,7 @@ from .mc_sampler import MCSampler from .csv_sampler import CSVSampler from .dataframe_sampler import DataFrameSampler +from .grid_sampler import Grid_Sampler __copyright__ = """ diff --git a/easyvvuq/sampling/grid_sampler.py b/easyvvuq/sampling/grid_sampler.py new file mode 100644 index 000000000..28374127a --- /dev/null +++ b/easyvvuq/sampling/grid_sampler.py @@ -0,0 +1,94 @@ +"""A grid sampler + +Useful for e.g. hyperparameter search. The "vary" dict contains the values +that must be considered per (hyper)parameter, for instance: + + vary = {"x1": [0.0, 0.5, 0.1], + "x2 = [1, 3], + "x3" = [True, False]} + +The sampler will create a tensor grid using all specified 1D parameter +values. +""" + +__author__ = "Wouter Edeling" +__copyright__ = """ + + Copyright 2018 Robin A. Richardson, David W. Wright + + This file is part of EasyVVUQ + + EasyVVUQ is free software: you can redistribute it and/or modify + it under the terms of the Lesser GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + EasyVVUQ is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + Lesser GNU General Public License for more details. + + You should have received a copy of the Lesser GNU General Public License + along with this program. If not, see . + +""" +__license__ = "LGPL" + +from itertools import product +import numpy as np +from .base import BaseSamplingElement, Vary + +class Grid_Sampler(BaseSamplingElement, sampler_name="grid_sampler"): + + def __init__(self, vary, count=0): + """ + Initialize the grid sampler. + + Parameters + ---------- + vary : dict, or list of dicts + A dictionary containing all 1D values for each parameter. For instance + vary = {"x1": [0.0, 0.5. 1.0], "x2": [True, False]}. This will + create a 2D tensor product of all (x1, x2) parameter combinations. + The tensor product points are stored in the 'points' attribute. + count : int, optional + Internal counter used to count the number of samples that have + been executed. The default is 0. + + Returns + ------- + None. + + """ + + self.vary = Vary(vary) + self.count = count + + # make sure all parameters are stored in a list or array, even + # if they have only a single value + for param in vary.keys(): + if type(vary[param]) != list and type(vary[param]) != np.ndarray: + vary[param] = [vary[param]] + + # use dtype=object to allow for multiple different type (float, boolean etc) + self.points = np.array(list(product(*list(vary.values()))), dtype=object) + + def is_finite(self): + return True + + def n_samples(self): + """Returns the number of samples in this sampler. + """ + return self.points.shape[0] + + def __next__(self): + if self.count < self.n_samples(): + run_dict = {} + i_par = 0 + for param_name in self.vary.get_keys(): + run_dict[param_name] = self.points[self.count][i_par] + i_par += 1 + self.count += 1 + return run_dict + else: + raise StopIteration \ No newline at end of file diff --git a/tests/grid_search/test_grid.py b/tests/grid_search/test_grid.py new file mode 100755 index 000000000..a4b1e7378 --- /dev/null +++ b/tests/grid_search/test_grid.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +import numpy as np + +# stand in for the hyperparameters the grid search is typically used for +a = $x1 +b = $x2 + +if b == True: + f = a ** 2 +else: + f = -a ** 2 + +np.savetxt("out.csv", np.array([f]), header="f", comments='') \ No newline at end of file diff --git a/tests/test_grid_sampler.py b/tests/test_grid_sampler.py new file mode 100644 index 000000000..6ac7587b3 --- /dev/null +++ b/tests/test_grid_sampler.py @@ -0,0 +1,66 @@ +import pytest +import numpy as np +import easyvvuq as uq +from easyvvuq.actions import CreateRunDirectory, Encode, Decode, ExecuteLocal, Actions + +@pytest.fixture +def sampler(): + vary = {"x1" : [0.0, 0.5, 1.0], + "x2" : [True, False]} + return uq.sampling.Grid_Sampler(vary) + +@pytest.fixture +def campaign(): + + params = {} + params["x1"] = {"type":"float", "default": 0.5} + params["x2"] = {"type":"boolean", "default": True} + + # python file is its own template + encoder = uq.encoders.GenericEncoder('tests/grid_search/test_grid.py', + target_filename='test_grid.py') + + execute = ExecuteLocal("python3 test_grid.py") + + output_columns = ["f"] + decoder = uq.decoders.SimpleCSV( + target_filename='out.csv', + output_columns=output_columns) + + actions = Actions(CreateRunDirectory('/tmp'), Encode(encoder), execute, Decode(decoder)) + + campaign = uq.Campaign(name='foo', work_dir='/tmp', params=params, actions=actions) + + vary = {"x1" : [0.0, 0.5, 1.0], + "x2" : [True, False]} + + sampler = uq.sampling.Grid_Sampler(vary) + + campaign.set_sampler(sampler) + + campaign.execute().collate() + + return campaign + + +def test_tensor_product(sampler): + # test if the tensor product gets constructed properly + points = sampler.points + assert (points == np.array([[0.0, True], + [0.0, False], + [0.5, True], + [0.5, False], + [1.0, True], + [1.0, False]], dtype=object)).all() + + +def test_grid_search(campaign): + # test if the sampling works correctly + df = campaign.get_collation_result() + assert (df['f'].values == np.array([[ 0. ], + [-0. ], + [ 0.25], + [-0.25], + [ 1. ], + [-1. ]])).all() + \ No newline at end of file From 06c9cfedbda2875c12cb80c2b5d99cb0c34668a1 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Tue, 7 Feb 2023 13:12:50 +0100 Subject: [PATCH 2/7] finished grid sampler --- easyvvuq/sampling/grid_sampler.py | 64 +++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/easyvvuq/sampling/grid_sampler.py b/easyvvuq/sampling/grid_sampler.py index 28374127a..cc60743fa 100644 --- a/easyvvuq/sampling/grid_sampler.py +++ b/easyvvuq/sampling/grid_sampler.py @@ -2,12 +2,12 @@ Useful for e.g. hyperparameter search. The "vary" dict contains the values that must be considered per (hyper)parameter, for instance: - + vary = {"x1": [0.0, 0.5, 0.1], "x2 = [1, 3], "x3" = [True, False]} -The sampler will create a tensor grid using all specified 1D parameter +The sampler will create a tensor grid using all specified 1D parameter values. """ @@ -36,10 +36,11 @@ from itertools import product import numpy as np -from .base import BaseSamplingElement, Vary +from .base import BaseSamplingElement # , Vary + class Grid_Sampler(BaseSamplingElement, sampler_name="grid_sampler"): - + def __init__(self, vary, count=0): """ Initialize the grid sampler. @@ -50,7 +51,10 @@ def __init__(self, vary, count=0): A dictionary containing all 1D values for each parameter. For instance vary = {"x1": [0.0, 0.5. 1.0], "x2": [True, False]}. This will create a 2D tensor product of all (x1, x2) parameter combinations. - The tensor product points are stored in the 'points' attribute. + If a list of vary dicts is specified, each vary dict will be treated + independently to generate points. These dicts do not have to contain + the same parameters. The tensor product points are stored in the + 'points' list, with one tensor product per vary dict. count : int, optional Internal counter used to count the number of samples that have been executed. The default is 0. @@ -60,18 +64,28 @@ def __init__(self, vary, count=0): None. """ + # allways add vary to list, even if only a single dict is specified + if not isinstance(vary, list): + vary = [vary] - self.vary = Vary(vary) + self.vary = vary self.count = count + self.points = [] # make sure all parameters are stored in a list or array, even - # if they have only a single value - for param in vary.keys(): - if type(vary[param]) != list and type(vary[param]) != np.ndarray: - vary[param] = [vary[param]] + # if they have only a single value + for _vary in vary: + for param in _vary.keys(): + if not isinstance(_vary[param], list) and not isinstance(_vary[param], np.ndarray): + vary[param] = [vary[param]] - # use dtype=object to allow for multiple different type (float, boolean etc) - self.points = np.array(list(product(*list(vary.values()))), dtype=object) + # use dtype=object to allow for multiple different type (float, boolean etc) + self.points.append(np.array(list(product(*list(_vary.values()))), dtype=object)) + + # the cumulative sizes of all ensembles generated by the vary dicts + self.cumul_sizes = np.cumsum([points.shape[0] for points in self.points]) + # add a zero to the beginning (necessary in __next__ subroutine) + self.cumul_sizes = np.insert(self.cumul_sizes, 0, 0) def is_finite(self): return True @@ -79,16 +93,34 @@ def is_finite(self): def n_samples(self): """Returns the number of samples in this sampler. """ - return self.points.shape[0] + # return self.points.shape[0] + return self.cumul_sizes[-1] def __next__(self): + """ + Return the next sample from the input distributions. + + Raises + ------ + StopIteration + Stop iteration when count >= n_samples. + + Returns + ------- + run_dict : dict + A dictionary with the random input samples, e.g. + {'x1': 0.5, 'x2': False}. + + """ if self.count < self.n_samples(): + vary_idx = np.where(self.count < self.cumul_sizes[1:])[0][0] run_dict = {} i_par = 0 - for param_name in self.vary.get_keys(): - run_dict[param_name] = self.points[self.count][i_par] + for param_name in self.vary[vary_idx].keys(): + sample_idx = self.count - self.cumul_sizes[vary_idx] + run_dict[param_name] = self.points[vary_idx][sample_idx][i_par] i_par += 1 self.count += 1 return run_dict else: - raise StopIteration \ No newline at end of file + raise StopIteration From 255255518f90da87aeb03f3abbb0faa05c837a41 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Wed, 8 Feb 2023 15:43:20 +0100 Subject: [PATCH 3/7] added tutorial and fix pep8 --- easyvvuq/actions/execute_local.py | 2 +- easyvvuq/analysis/pce_analysis.py | 8 +- easyvvuq/analysis/results.py | 6 +- easyvvuq/analysis/sc_analysis.py | 8 +- easyvvuq/db/sql.py | 4 +- easyvvuq/encoders/jinja_encoder.py | 2 +- .../simplex_stochastic_collocation.py | 22 +- easyvvuq/sampling/stochastic_collocation.py | 2 +- tests/gauss/gauss_json.py | 8 +- tests/grid_search/test_grid.py | 6 +- tests/test_SSC.py | 20 +- tests/test_actions_action_pool.py | 10 +- tests/test_actions_action_statuses.py | 10 +- tests/test_actions_execute_local.py | 6 +- tests/test_actions_execute_slurm.py | 2 +- tests/test_analysis_results.py | 12 +- tests/test_campaign.py | 4 +- tests/test_comparison_validate.py | 12 +- tests/test_db.py | 30 +- tests/test_decoder_hdf5.py | 2 +- tests/test_decoders_simple_csv.py | 10 +- tests/test_dimension_adaptive_SC.py | 38 +- tests/test_encoders_copy_encoder.py | 2 +- tests/test_encoders_directory_builder.py | 12 +- tests/test_ensemble_boot.py | 34 +- tests/test_grid_sampler.py | 45 +- tests/test_hierarchical_sparse_grid_sc.py | 6 +- tests/test_integration.py | 2 +- tests/test_jinja_encoder.py | 2 +- tests/test_jsondecoder.py | 22 +- tests/test_mc_analysis.py | 30 +- tests/test_mc_analysis_results.py | 36 +- tests/test_multiapp.py | 2 +- tests/test_pce_analysis_results.py | 34 +- tests/test_quasirandom.py | 20 +- tests/test_sampling_csv_sampler.py | 8 +- tests/test_sampling_mc.py | 6 +- tests/test_sampling_qmc.py | 8 +- tests/test_sampling_replica_sampler.py | 14 +- tests/test_sampling_sampler_of_samplers.py | 4 +- tests/test_sampling_sweep.py | 14 +- tests/test_sc_analysis_results.py | 32 +- tests/test_stochastic_collocation.py | 6 +- tests/test_surrogate_workflow.py | 8 +- tests/test_utils.py | 8 +- tests/test_vector.py | 2 +- tests/test_yamldecoder.py | 8 +- tutorials/fabsim3_cmd_api.py | 364 ++++++++ .../hyperparameter_tuning_tutorial.ipynb | 786 ++++++++++++++++++ tutorials/mnist/keras_mnist.template | 71 ++ tutorials/mnist/mnist_feats.png | Bin 0 -> 13225 bytes 51 files changed, 1516 insertions(+), 294 deletions(-) create mode 100644 tutorials/fabsim3_cmd_api.py create mode 100644 tutorials/hyperparameter_tuning_tutorial.ipynb create mode 100644 tutorials/mnist/keras_mnist.template create mode 100644 tutorials/mnist/mnist_feats.png diff --git a/easyvvuq/actions/execute_local.py b/easyvvuq/actions/execute_local.py index 1f3f8801e..4520a4281 100644 --- a/easyvvuq/actions/execute_local.py +++ b/easyvvuq/actions/execute_local.py @@ -268,7 +268,7 @@ def start(self, previous=None): for action in self.actions: previous = self.wrapper(action, previous) self.result = previous - assert(self.result['run_id'] == run_id) + assert (self.result['run_id'] == run_id) return previous def finished(self): diff --git a/easyvvuq/analysis/pce_analysis.py b/easyvvuq/analysis/pce_analysis.py index 392470af9..9ab3e9964 100644 --- a/easyvvuq/analysis/pce_analysis.py +++ b/easyvvuq/analysis/pce_analysis.py @@ -350,8 +350,8 @@ def sobols(P, coefficients): varied = [_ for _ in self.sampler.vary.get_keys()] S1 = {_: np.zeros(sobol.shape[-1]) for _ in varied} ST = {_: np.zeros(sobol.shape[-1]) for _ in varied} - #S2 = {_ : {__: np.zeros(sobol.shape[-1]) for __ in varied} for _ in varied} - #for v in varied: del S2[v][v] + # S2 = {_ : {__: np.zeros(sobol.shape[-1]) for __ in varied} for _ in varied} + # for v in varied: del S2[v][v] S2 = {_: np.zeros((len(varied), sobol.shape[-1])) for _ in varied} for n, si in enumerate(sobol_idx): if len(si) == 1: @@ -360,8 +360,8 @@ def sobols(P, coefficients): elif len(si) == 2: v1 = varied[si[0]] v2 = varied[si[1]] - #S2[v1][v2] = sobol[n] - #S2[v2][v1] = sobol[n] + # S2[v1][v2] = sobol[n] + # S2[v2][v1] = sobol[n] S2[v1][si[1]] = sobol[n] S2[v2][si[0]] = sobol[n] for i in si: diff --git a/easyvvuq/analysis/results.py b/easyvvuq/analysis/results.py index 98bb5024b..21467c8d8 100644 --- a/easyvvuq/analysis/results.py +++ b/easyvvuq/analysis/results.py @@ -167,7 +167,7 @@ def _get_sobols_general(self, getter, qoi=None, input_=None): ------- dict or array """ - assert(not ((qoi is None) and (input_ is not None))) + assert (not ((qoi is None) and (input_ is not None))) if (qoi is not None) and (qoi not in self.qois): raise RuntimeError('no such qoi in this analysis') if (input_ is not None) and (input_ not in self.inputs): @@ -349,7 +349,7 @@ def describe(self, qoi=None, statistic=None): an array with the values for that statistic. Otherwise will return a DataFrame with more data. """ - assert(not ((qoi is None) and (statistic is not None))) + assert (not ((qoi is None) and (statistic is not None))) statistics = ['mean', 'var', 'std', '1%', '10%', '90%', '99%', 'min', 'max', 'median'] qois = self.qois if qoi is not None: @@ -361,7 +361,7 @@ def describe(self, qoi=None, statistic=None): for statistic_ in statistics: try: value = self._describe(qoi, statistic_) - assert(isinstance(value, np.ndarray)) + assert (isinstance(value, np.ndarray)) for i, x in enumerate(value): try: result[(qoi, i)][statistic_] = x diff --git a/easyvvuq/analysis/sc_analysis.py b/easyvvuq/analysis/sc_analysis.py index 3b789fcd7..74984e869 100644 --- a/easyvvuq/analysis/sc_analysis.py +++ b/easyvvuq/analysis/sc_analysis.py @@ -385,7 +385,7 @@ def adapt_dimension(self, qoi, data_frame, store_stats_history=True, c_l = self.compute_comb_coef(l_norm=candidate_l_norm) _, var_candidate_l, _ = self.get_pce_stats( candidate_l_norm, self.pce_coefs[qoi], c_l) - #error in var + # error in var error[tuple(l)] = np.linalg.norm(var_candidate_l - var_l, np.inf) else: logging.debug('Specified refinement method %s not recognized' % method) @@ -467,7 +467,7 @@ def merge_accepted_and_admissible(self, level=0, **kwargs): admissible_idx = np.array(admissible_idx).reshape([count, self.N]) merged_l = np.concatenate((self.l_norm, admissible_idx)) # make sure final result contains only unique indices and store - #results in l_norm + # results in l_norm idx = np.unique(merged_l, axis=0, return_index=True)[1] # return np.array([merged_l[i] for i in sorted(idx)]) self.l_norm = np.array([merged_l[i] for i in sorted(idx)]) @@ -894,7 +894,7 @@ def SC2PCE(self, samples, qoi, verbose=True, **kwargs): for k in k_norm: # product of the PCE basis function or order k - 1 and all # Lagrange basis functions in a_1d, per dimension - #[[phi_k[0]*a_1d[0]], ..., [phi_k[N-1]*a_1d[N-1]]] + # [[phi_k[0]*a_1d[0]], ..., [phi_k[N-1]*a_1d[N-1]]] # orthogonal polynomial generated by chaospy phi_k = [cp.expansion.stieltjes(k[n] - 1, @@ -1265,7 +1265,7 @@ def get_sobol_indices(self, qoi, typ='first_order'): for i_u in range(wi_d_u.shape[0]): D_u[u] += np.sign(np.prod(diff)) * h[i_u]**2 * wi_d_u[i_u].prod() - #D_u[u] = D_u[u].flatten() + # D_u[u] = D_u[u].flatten() # all subsets of u W = list(powerset(u))[0:-1] diff --git a/easyvvuq/db/sql.py b/easyvvuq/db/sql.py index b80bcd15a..b21c323d9 100644 --- a/easyvvuq/db/sql.py +++ b/easyvvuq/db/sql.py @@ -244,7 +244,7 @@ def set_active_app(self, name): selected = self.session.query(AppTable).filter_by(name=name).all() if len(selected) == 0: raise RuntimeError('no such app - {}'.format(name)) - assert(not (len(selected) > 1)) + assert (not (len(selected) > 1)) app = selected[0] self.session.query(CampaignTable).update({'active_app': app.id}) self.session.commit() @@ -519,7 +519,7 @@ def _get_campaign_info(self, campaign_name=None): ------- SQLAlchemy query for campaign with this name. """ - assert(isinstance(campaign_name, str) or campaign_name is None) + assert (isinstance(campaign_name, str) or campaign_name is None) query = self.session.query(CampaignTable) if campaign_name is None: campaign_info = query diff --git a/easyvvuq/encoders/jinja_encoder.py b/easyvvuq/encoders/jinja_encoder.py index 4624f4858..8b811bbe3 100644 --- a/easyvvuq/encoders/jinja_encoder.py +++ b/easyvvuq/encoders/jinja_encoder.py @@ -1,5 +1,5 @@ import os -#from string import Template +# from string import Template from jinja2 import Template import logging diff --git a/easyvvuq/sampling/simplex_stochastic_collocation.py b/easyvvuq/sampling/simplex_stochastic_collocation.py index 48f7d2e46..afa4a251a 100644 --- a/easyvvuq/sampling/simplex_stochastic_collocation.py +++ b/easyvvuq/sampling/simplex_stochastic_collocation.py @@ -111,7 +111,7 @@ def init_grid(self): CONSEQUENCE: I NEED TO RE-MAKE A NEW 'Delaunay' OBJECT EVERYTIME THE GRID IS REFINED. """ - #tri = Delaunay(xi_k_jl, incremental=True) + # tri = Delaunay(xi_k_jl, incremental=True) tri = Delaunay(xi_k_jl) else: @@ -590,7 +590,7 @@ def check_LEC_j(self, p_j, v, S_j, n_mc, queue): Psi = self.compute_Psi(xi_Sj, p_j) # check if Psi is well poised - #det_Psi = np.linalg.det(Psi) + # det_Psi = np.linalg.det(Psi) # if det_Psi == 0: # #print 'Warning: determinant Psi is zero.' # #print 'Reducing local p_j from ' + str(p_j[j]) + ' to a lower value.' @@ -598,7 +598,7 @@ def check_LEC_j(self, p_j, v, S_j, n_mc, queue): # return queue.put({'p_j[j]':-99, 'el_idx_j':el_idx_j}) # compute the coefficients c_jl - #c_jl = np.linalg.solve(Psi, v_Sj) + # c_jl = np.linalg.solve(Psi, v_Sj) c_jl = DAFSILAS(Psi, v_Sj) # check the LEC condition for all simplices in the STENCIL S_j @@ -644,7 +644,7 @@ def check_LEC_j(self, p_j, v, S_j, n_mc, queue): Psi = self.compute_Psi(xi_Sj, p_j) # check if Psi is well poised - #det_Psi = np.linalg.det(Psi) + # det_Psi = np.linalg.det(Psi) # if det_Psi == 0: # #print 'Warning: determinant Psi is zero.' # #print 'Reducing local p_j from ' + str(p_j[j]) + ' to a lower value.' @@ -652,7 +652,7 @@ def check_LEC_j(self, p_j, v, S_j, n_mc, queue): # return queue.put({'p_j[j]':-99, 'el_idx_j':el_idx_j}) # compute the coefficients c_jl - #c_jl = np.linalg.solve(Psi, v_Sj) + # c_jl = np.linalg.solve(Psi, v_Sj) c_jl = DAFSILAS(Psi, v_Sj, False) if k == el_idx_j.size: @@ -684,7 +684,7 @@ def compute_stencil_j(self): for j in range(n_e): # the number of points in S_j - #Np1_j = factorial(n_xi + p_j[j])/(factorial(n_xi)*factorial(p_j[j])) + # Np1_j = factorial(n_xi + p_j[j])/(factorial(n_xi)*factorial(p_j[j])) # k = {1,...,n_s}\{k_j0, ..., k_jn_xi} idx = np.delete(range(n_s), self.tri.simplices[j]) # store the vertex indices of the element itself @@ -1055,7 +1055,7 @@ def surrogate(self, xi, S_j, p_j, v): # print 'Error, det(Psi)=0 in compute_surplus_k() method, should not be possible' # compute the coefficients c_jl - #c_jl = np.linalg.solve(Psi, v_Sj) + # c_jl = np.linalg.solve(Psi, v_Sj) c_jl = DAFSILAS(Psi, v_Sj, False) # compute the interpolation on the old grid @@ -1240,7 +1240,7 @@ def DAFSILAS(A, b, print_message=False): P = np.eye(n) # the ill-condition control parameter - #epsilon = np.finfo(np.float64).eps + # epsilon = np.finfo(np.float64).eps epsilon = 10**-14 for i in range(n - 1): @@ -1266,9 +1266,9 @@ def DAFSILAS(A, b, print_message=False): Ap[:, i + col] = tmp # Also interchange the entries in b - #tmp = A[i, n] + # tmp = A[i, n] # A[i, n] = A[i+col, n]Ap[i+1+j, i:m] - #A[i+col, n] = tmp + # A[i+col, n] = tmp # keep track of column switches via a series of permuation matrices P = # P1*P2*...*Pi*...*Pn ==> at each iteration x = P*xi @@ -1305,7 +1305,7 @@ def DAFSILAS(A, b, print_message=False): # ajj = 1, aij = 0 for j = i...n Ap[idx[0]:n, idx[0]:n] = np.eye(nullity) - #bj = 0 + # bj = 0 Ap[idx[0]:n, n] = 0 # ejj = 1, eij = 0 Ap[idx[0]:n, idx[0] + n + 1:m] = np.eye(nullity) diff --git a/easyvvuq/sampling/stochastic_collocation.py b/easyvvuq/sampling/stochastic_collocation.py index 82aac72aa..4968ca504 100644 --- a/easyvvuq/sampling/stochastic_collocation.py +++ b/easyvvuq/sampling/stochastic_collocation.py @@ -129,7 +129,7 @@ def __init__(self, else: self.l_norm = self.compute_sparse_multi_idx(self.L, self.N) # create sparse grid of dimension N and level q using the 1d - #rules in self.xi_1d + # rules in self.xi_1d self.xi_d = self.generate_grid(self.l_norm) self._n_samples = self.xi_d.shape[0] diff --git a/tests/gauss/gauss_json.py b/tests/gauss/gauss_json.py index bb3f09b46..2c7f3bbfa 100755 --- a/tests/gauss/gauss_json.py +++ b/tests/gauss/gauss_json.py @@ -60,12 +60,12 @@ numbers += bias numbers_out = np.array(list(enumerate(numbers))) -#header = 'Step,Value' +# header = 'Step,Value' -#fmt = '%i,%f' -#np.savetxt(output_filename, numbers_out, fmt=fmt, header=header) +# fmt = '%i,%f' +# np.savetxt(output_filename, numbers_out, fmt=fmt, header=header) -#json_output = {'numbers': list(numbers)} +# json_output = {'numbers': list(numbers)} # with open(output_filename + '.json', 'wt') as json_fp: # json.dump(json_output, json_fp) diff --git a/tests/grid_search/test_grid.py b/tests/grid_search/test_grid.py index a4b1e7378..da2d164c8 100755 --- a/tests/grid_search/test_grid.py +++ b/tests/grid_search/test_grid.py @@ -6,9 +6,9 @@ a = $x1 b = $x2 -if b == True: +if b: f = a ** 2 else: f = -a ** 2 - -np.savetxt("out.csv", np.array([f]), header="f", comments='') \ No newline at end of file + +np.savetxt("out.csv", np.array([f]), header="f", comments='') diff --git a/tests/test_SSC.py b/tests/test_SSC.py index 337174320..7c17f5fca 100644 --- a/tests/test_SSC.py +++ b/tests/test_SSC.py @@ -177,26 +177,26 @@ def test_init(SSC_campaign): # test the grid initialization _, sampler, _ = SSC_campaign points = sampler.tri.points - assert((points == np.array([[0., 0.], - [0., 1.], - [1., 0.], - [1., 1.], - [0.5, 0.5]])).all()) - assert(sampler.pmax_cutoff == 4) + assert ((points == np.array([[0., 0.], + [0., 1.], + [1., 0.], + [1., 1.], + [0.5, 0.5]])).all()) + assert (sampler.pmax_cutoff == 4) def test_find_pmax(SSC_campaign): # test finding the max polynomials order, given the number of samples _, sampler, _ = SSC_campaign - assert(sampler.find_pmax(5) == 1) - assert(sampler.find_pmax(6) == 2) - assert(sampler.find_pmax(10) == 3) + assert (sampler.find_pmax(5) == 1) + assert (sampler.find_pmax(6) == 2) + assert (sampler.find_pmax(10) == 3) def test_compute_vol(SSC_campaign): # test simplex volume computation _, sampler, _ = SSC_campaign - assert((sampler.compute_vol() == np.array([0.25, 0.25, 0.25, 0.25])).all()) + assert ((sampler.compute_vol() == np.array([0.25, 0.25, 0.25, 0.25])).all()) def test_compute_xi_center(SSC_campaign): diff --git a/tests/test_actions_action_pool.py b/tests/test_actions_action_pool.py index 86f8b61f9..d22b01ece 100644 --- a/tests/test_actions_action_pool.py +++ b/tests/test_actions_action_pool.py @@ -20,14 +20,14 @@ def model(params): def test_action_pool_start(campaign): action_pool = campaign.execute(nsamples=3) - assert(len(action_pool.futures) == 3) + assert (len(action_pool.futures) == 3) action_pool.collate() - assert(len(action_pool.campaign.get_collation_result()) == 3) - assert(action_pool.progress() == {'ready': 0, 'active': 0, 'finished': 3, 'failed': 0}) + assert (len(action_pool.campaign.get_collation_result()) == 3) + assert (action_pool.progress() == {'ready': 0, 'active': 0, 'finished': 3, 'failed': 0}) def test_action_pool_start_sequential(campaign): action_pool = campaign.execute(nsamples=3, sequential=True) - assert(len(action_pool.results) == 3) + assert (len(action_pool.results) == 3) action_pool.collate() - assert(len(action_pool.campaign.get_collation_result()) == 3) + assert (len(action_pool.campaign.get_collation_result()) == 3) diff --git a/tests/test_actions_action_statuses.py b/tests/test_actions_action_statuses.py index 498c67935..fe4529fae 100644 --- a/tests/test_actions_action_statuses.py +++ b/tests/test_actions_action_statuses.py @@ -14,7 +14,7 @@ def action_pool(): def test_action_pool_start(action_pool): action_pool.start() - assert(action_pool.progress()['finished'] == 3) + assert (action_pool.progress()['finished'] == 3) mock1 = MagicMock() mock1.running = MagicMock(return_value=True) mock1.done = MagicMock(return_value=False) @@ -33,7 +33,7 @@ def test_action_pool_start(action_pool): mock4.result = MagicMock(return_value=False) action_pool.futures = [mock1, mock2, mock3, mock4] progress = action_pool.progress() - assert(progress['ready'] == 1) - assert(progress['active'] == 1) - assert(progress['finished'] == 1) - assert(progress['failed'] == 1) + assert (progress['ready'] == 1) + assert (progress['active'] == 1) + assert (progress['finished'] == 1) + assert (progress['failed'] == 1) diff --git a/tests/test_actions_execute_local.py b/tests/test_actions_execute_local.py index c5b842650..9c24c67be 100644 --- a/tests/test_actions_execute_local.py +++ b/tests/test_actions_execute_local.py @@ -7,17 +7,17 @@ def test_create_run_directory(tmpdir): action = CreateRunDirectory(tmpdir) previous = {'campaign_dir': 'test', 'run_id': 123456789, 'run_info': {'id': 123456789}} previous = action.start(previous) - assert(os.path.exists( + assert (os.path.exists( os.path.join( tmpdir, 'test', 'runs', 'runs_100000000-200000000', 'runs_123000000-124000000', 'runs_123450000-123460000', 'runs_123456700-123456800', 'run_123456789'))) previous = {'campaign_dir': 'test', 'run_id': 0, 'run_info': {'id': 0}} previous = action.start(previous) - assert(os.path.exists( + assert (os.path.exists( os.path.join( tmpdir, 'test', 'runs', 'runs_0-100000000', 'runs_0-1000000', 'runs_0-10000', 'runs_0-100'))) previous = {'campaign_dir': 'test', 'run_id': 100, 'run_info': {'id': 100}} action = CreateRunDirectory(tmpdir, flatten=True) previous = action.start(previous) - assert(os.path.exists(os.path.join(tmpdir, 'test', 'runs', 'run_100'))) + assert (os.path.exists(os.path.join(tmpdir, 'test', 'runs', 'run_100'))) diff --git a/tests/test_actions_execute_slurm.py b/tests/test_actions_execute_slurm.py index a51c630f2..014659342 100644 --- a/tests/test_actions_execute_slurm.py +++ b/tests/test_actions_execute_slurm.py @@ -34,4 +34,4 @@ def test_action_status_slurm(mock_subprocess_run): action = ExecuteSLURM('tutorials/epidemic/example.slurm', '$target_dir') previous = {'rundir': '/tmp'} # action.start(previous) - #assert(status.job_id == '65541') + # assert(status.job_id == '65541') diff --git a/tests/test_analysis_results.py b/tests/test_analysis_results.py index e941e443a..3ed4fb534 100644 --- a/tests/test_analysis_results.py +++ b/tests/test_analysis_results.py @@ -6,18 +6,18 @@ def test_keys_to_tuples(): - assert(AnalysisResults._keys_to_tuples({}) == {}) - assert(AnalysisResults._keys_to_tuples( + assert (AnalysisResults._keys_to_tuples({}) == {}) + assert (AnalysisResults._keys_to_tuples( {'a': 1, 'b': 2}) == {('a', 0): 1, ('b', 0): 2}) - assert(AnalysisResults._keys_to_tuples( + assert (AnalysisResults._keys_to_tuples( {('a', 0): 1, ('b', 0): 2}) == {('a', 0): 1, ('b', 0): 2}) - assert(AnalysisResults._keys_to_tuples( + assert (AnalysisResults._keys_to_tuples( {('a', 0): 1, 'b': 2}) == {('a', 0): 1, ('b', 0): 2}) def test_to_tuple(): - assert(AnalysisResults._to_tuple('a') == ('a', 0)) - assert(AnalysisResults._to_tuple(('a', 0)) == ('a', 0)) + assert (AnalysisResults._to_tuple('a') == ('a', 0)) + assert (AnalysisResults._to_tuple(('a', 0)) == ('a', 0)) with pytest.raises(RuntimeError): AnalysisResults._to_tuple(3) diff --git a/tests/test_campaign.py b/tests/test_campaign.py index 95e4a4c61..2464405ea 100644 --- a/tests/test_campaign.py +++ b/tests/test_campaign.py @@ -77,8 +77,8 @@ def campaign(tmpdir): def test_campaign_exists(tmp_path): campaign = uq.Campaign(name='test', work_dir=tmp_path) - assert(campaign.campaign_db.campaign_exists('test')) - assert(not campaign.campaign_db.campaign_exists('test2')) + assert (campaign.campaign_db.campaign_exists('test')) + assert (not campaign.campaign_db.campaign_exists('test2')) def test_invalid_sampler(tmp_path): diff --git a/tests/test_comparison_validate.py b/tests/test_comparison_validate.py index c7ab28357..c9bc082f1 100644 --- a/tests/test_comparison_validate.py +++ b/tests/test_comparison_validate.py @@ -34,8 +34,8 @@ def test_validate_similarity(): def test_validate_similarity_hellinger(): validator = uq.comparison.validate.ValidateSimilarityHellinger() - assert(validator.element_name() == 'validate_similarity_hellinger') - assert(validator.element_version() == '0.1') + assert (validator.element_name() == 'validate_similarity_hellinger') + assert (validator.element_version() == '0.1') d1 = cp.Exponential(1) d2 = cp.Exponential(2) xmin = min(d1.lower[0], d2.lower[0]) @@ -50,8 +50,8 @@ def test_validate_similarity_hellinger(): def test_validate_similarity_jensen_shannon(): validator = uq.comparison.validate.ValidateSimilarityJensenShannon() - assert(validator.element_name() == 'validate_similarity_jensen_shannon') - assert(validator.element_version() == '0.1') + assert (validator.element_name() == 'validate_similarity_jensen_shannon') + assert (validator.element_version() == '0.1') d1 = cp.Normal(0, 1) d2 = cp.Normal(1, 2) xmin = min(d1.lower[0], d2.lower[0]) @@ -65,8 +65,8 @@ def test_validate_similarity_jensen_shannon(): def test_validate_similarity_wasserstein(): validator = uq.comparison.validate.ValidateSimilarityWasserstein() - assert(validator.element_name() == 'validate_similarity_wasserstein') - assert(validator.element_version() == '0.1') + assert (validator.element_name() == 'validate_similarity_wasserstein') + assert (validator.element_version() == '0.1') d1 = cp.Normal(0, 1) d2 = cp.Normal(1, 2) xmin = min(d1.lower[0], d2.lower[0]) diff --git a/tests/test_db.py b/tests/test_db.py index 072e3c596..5a52d1e0b 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -62,26 +62,26 @@ def campaign(tmp_path, app_info): def test_db_file_created(campaign): - assert(os.path.isfile('{}/test.sqlite'.format(campaign.tmp_path))) + assert (os.path.isfile('{}/test.sqlite'.format(campaign.tmp_path))) def test_get_and_set_status(campaign): run_ids = list(range(1, 1011)) - assert(all([campaign.get_run_status(id_) == Status.NEW for id_ in run_ids])) + assert (all([campaign.get_run_status(id_) == Status.NEW for id_ in run_ids])) campaign.set_run_statuses(run_ids, Status.ENCODED) - assert(all([campaign.get_run_status(id_) == Status.ENCODED for id_ in run_ids])) + assert (all([campaign.get_run_status(id_) == Status.ENCODED for id_ in run_ids])) def test_get_num_runs(campaign): - assert(campaign.get_num_runs() == 1010) + assert (campaign.get_num_runs() == 1010) def test_app(campaign): with pytest.raises(RuntimeError): campaign.app('test_') app_dict = campaign.app('test') - assert(app_dict['name'] == 'test') - assert(isinstance(app_dict, dict)) + assert (app_dict['name'] == 'test') + assert (isinstance(app_dict, dict)) def test_add_app(campaign, app_info): @@ -90,17 +90,17 @@ def test_add_app(campaign, app_info): def test_campaign(campaign): - assert('test' in campaign.campaigns()) + assert ('test' in campaign.campaigns()) def test_get_campaign_id(campaign): with pytest.raises(RuntimeError): campaign.get_campaign_id('test_') - assert(campaign.get_campaign_id('test') == 1) + assert (campaign.get_campaign_id('test') == 1) def test_campaign_dir(campaign): - assert(campaign.campaign_dir('test') == campaign.tmp_path) + assert (campaign.campaign_dir('test') == campaign.tmp_path) def test_version_check(campaign): @@ -126,11 +126,11 @@ def test_collation(campaign): campaign.store_results('test', results) campaign.set_run_statuses([run[0] for run in campaign.runs()], Status.COLLATED) result = campaign.get_results('test', 1) - assert(isinstance(result, pd.DataFrame)) - assert(list(result.columns) == [('run_id', 0), ('iteration', 0), - ('a', 0), ('b', 0), ('c', 0), ('c', 1)]) - assert(list(result.iloc[100].values) == [101, 0, 1, 100, 101, 102]) - assert(result.count()[0] == 1010) + assert (isinstance(result, pd.DataFrame)) + assert (list(result.columns) == [('run_id', 0), ('iteration', 0), + ('a', 0), ('b', 0), ('c', 0), ('c', 1)]) + assert (list(result.iloc[100].values) == [101, 0, 1, 100, 101, 102]) + assert (result.count()[0] == 1010) def test_mv_collation(tmp_path, app_info): @@ -309,5 +309,5 @@ def test_mv_collation(tmp_path, app_info): campaign.add_app(app_info) results = [(0, mv_data), (1, mv_data)] campaign.store_results('test', results) - assert(not campaign.get_results('test', 1).empty) + assert (not campaign.get_results('test', 1).empty) return campaign diff --git a/tests/test_decoder_hdf5.py b/tests/test_decoder_hdf5.py index 5fd576990..4a0698d23 100644 --- a/tests/test_decoder_hdf5.py +++ b/tests/test_decoder_hdf5.py @@ -17,7 +17,7 @@ def test_hdf5(decoder): def test_get_output_path(decoder): - assert(decoder._get_output_path( + assert (decoder._get_output_path( {'run_dir': os.path.join('tests', 'hdf5')}, 'test.hdf5') == os.path.join('tests', 'hdf5', 'test.hdf5')) with pytest.raises(RuntimeError): diff --git a/tests/test_decoders_simple_csv.py b/tests/test_decoders_simple_csv.py index 5a1a909d2..08544f2c0 100644 --- a/tests/test_decoders_simple_csv.py +++ b/tests/test_decoders_simple_csv.py @@ -192,8 +192,8 @@ def test_wrong_column_exception(mv_data_fail): def test_simple_csv(decoder): df = decoder.parse_sim_output({'run_dir': os.path.join('tests', 'simple_csv')}) - assert(df['Step'][1] == 1) - assert(df['Value'][5] == 25.950662) + assert (df['Step'][1] == 1) + assert (df['Value'][5] == 25.950662) def test_init_exceptions(): @@ -202,7 +202,7 @@ def test_init_exceptions(): def test_get_output_path(decoder): - assert(decoder._get_output_path( + assert (decoder._get_output_path( {'run_dir': os.path.join('tests', 'simple_csv')}, 'test.csv') == os.path.join('tests', 'simple_csv', 'test.csv')) with pytest.raises(RuntimeError): @@ -211,6 +211,6 @@ def test_get_output_path(decoder): def test_mv_data(mv_data): data = mv_data.parse_sim_output({'run_dir': 'tests/files'}) - assert(data == TEST_MV_DATA) + assert (data == TEST_MV_DATA) for key in data.keys(): - assert(len(data[key]) == 51) + assert (len(data[key]) == 51) diff --git a/tests/test_dimension_adaptive_SC.py b/tests/test_dimension_adaptive_SC.py index 6197ab586..e3d7495c4 100755 --- a/tests/test_dimension_adaptive_SC.py +++ b/tests/test_dimension_adaptive_SC.py @@ -115,10 +115,10 @@ def test_look_ahead(adaptive_campaign): # This is because the order is not preserved in the setdiff2d subroutine of the # analysis class. for idx in admissible_idx: - assert(idx in sampler.admissible_idx) + assert (idx in sampler.admissible_idx) # check if the right number of new samples were computed during the 1st 3 iterations - assert(sampler.n_new_points == [6, 2, 6]) + assert (sampler.n_new_points == [6, 2, 6]) def test_adapt_dimension(adaptive_campaign): @@ -127,7 +127,7 @@ def test_adapt_dimension(adaptive_campaign): """ _, analysis, _ = adaptive_campaign # check if the dimensions were refined in the right order - assert(np.array_equal(analysis.l_norm, np.array([[1, 1, 1], [2, 1, 1], [1, 2, 1], [1, 1, 2]]))) + assert (np.array_equal(analysis.l_norm, np.array([[1, 1, 1], [2, 1, 1], [1, 2, 1], [1, 1, 2]]))) def test_SC2PCE(adaptive_campaign): @@ -135,12 +135,12 @@ def test_SC2PCE(adaptive_campaign): Test the conversion from the SC basis to the PCE basis (analysis.SC2PCE) """ _, analysis, _ = adaptive_campaign - assert(analysis.pce_coefs['f'][(1, 1, 1)][(1, 1, 1)] == - pytest.approx(np.array([0.22204355]), abs=0.0001)) - assert(analysis.pce_coefs['f'][(2, 1, 1)][(1, 1, 1)] == - pytest.approx(np.array([0.25376406]), abs=0.0001)) - assert(analysis.pce_coefs['f'][(2, 1, 1)][(2, 1, 1)] == - pytest.approx(np.array([0.10988306]), abs=0.0001)) + assert (analysis.pce_coefs['f'][(1, 1, 1)][(1, 1, 1)] == + pytest.approx(np.array([0.22204355]), abs=0.0001)) + assert (analysis.pce_coefs['f'][(2, 1, 1)][(1, 1, 1)] == + pytest.approx(np.array([0.25376406]), abs=0.0001)) + assert (analysis.pce_coefs['f'][(2, 1, 1)][(2, 1, 1)] == + pytest.approx(np.array([0.10988306]), abs=0.0001)) def test_comb_coef(adaptive_campaign): @@ -150,8 +150,8 @@ def test_comb_coef(adaptive_campaign): _, analysis, _ = adaptive_campaign coefs = analysis.compute_comb_coef(l_norm=np.array([[1, 1, 1], [1, 2, 1], [1, 3, 1], [2, 1, 1], [2, 2, 1]])) - assert(coefs == {(1, 1, 1): 0.0, (1, 2, 1): -1.0, - (1, 3, 1): 1.0, (2, 1, 1): 0.0, (2, 2, 1): 1.0}) + assert (coefs == {(1, 1, 1): 0.0, (1, 2, 1): -1.0, + (1, 3, 1): 1.0, (2, 1, 1): 0.0, (2, 2, 1): 1.0}) def test_error(adaptive_campaign): @@ -159,7 +159,7 @@ def test_error(adaptive_campaign): """ _, analysis, _ = adaptive_campaign - assert(np.array_equal(analysis.adaptation_errors, np.array( + assert (np.array_equal(analysis.adaptation_errors, np.array( [0.19032304687500004, 0.0033058593749999976, 0.0033058593749999976]))) @@ -180,19 +180,19 @@ def test_results(adaptive_campaign): # check moments computed_mean = results.describe('f', 'mean') computed_std = results.describe('f', 'std') - assert(computed_mean == pytest.approx(ref_mean, 0.01)) - assert(computed_std == pytest.approx(ref_std, 0.1)) + assert (computed_mean == pytest.approx(ref_mean, 0.01)) + assert (computed_std == pytest.approx(ref_std, 0.1)) # check sobols, x_1 should be close to 1, others to 0 - assert(results._get_sobols_first('f', 'x1') == pytest.approx(1.0, abs=0.01)) - assert(results._get_sobols_first('f', 'x2') == pytest.approx(0.0, abs=0.01)) - assert(results._get_sobols_first('f', 'x3') == pytest.approx(0.0, abs=0.01)) + assert (results._get_sobols_first('f', 'x1') == pytest.approx(1.0, abs=0.01)) + assert (results._get_sobols_first('f', 'x2') == pytest.approx(0.0, abs=0.01)) + assert (results._get_sobols_first('f', 'x3') == pytest.approx(0.0, abs=0.01)) # check the quality of the polynomial surrogate x = np.array([0.2, 0.1, 0.6]) f_at_x = poly_model(x) surrogate_at_x = analysis.surrogate('f', x) - assert(f_at_x == pytest.approx(surrogate_at_x, abs=0.01)) + assert (f_at_x == pytest.approx(surrogate_at_x, abs=0.01)) # check uncertainty magnification factor - assert(analysis.get_uncertainty_amplification('f') == pytest.approx(0.8048, abs=1e-4)) + assert (analysis.get_uncertainty_amplification('f') == pytest.approx(0.8048, abs=1e-4)) diff --git a/tests/test_encoders_copy_encoder.py b/tests/test_encoders_copy_encoder.py index fb88e2cb6..07d032274 100644 --- a/tests/test_encoders_copy_encoder.py +++ b/tests/test_encoders_copy_encoder.py @@ -16,4 +16,4 @@ def test_copy_encoder_encode(tmp_path): with pytest.raises(RuntimeError): encoder.encode({}, 'axcd') encoder.encode({}, tmp_path) - assert(os.path.isfile(os.path.join(tmp_path, filename2))) + assert (os.path.isfile(os.path.join(tmp_path, filename2))) diff --git a/tests/test_encoders_directory_builder.py b/tests/test_encoders_directory_builder.py index e8fe9cffd..27c4a0643 100644 --- a/tests/test_encoders_directory_builder.py +++ b/tests/test_encoders_directory_builder.py @@ -9,7 +9,7 @@ def builder(): def test_init(builder): - assert(builder.tree == {'a': {'b': {'c': None, 'd': None}}, 'e': {'f': None}}) + assert (builder.tree == {'a': {'b': {'c': None, 'd': None}}, 'e': {'f': None}}) def test_encoder(builder, tmp_path): @@ -19,15 +19,15 @@ def test_encoder(builder, tmp_path): def test_create_dir_tree(builder, tmp_path): builder.create_dir_tree(builder.tree, tmp_path) - assert(os.path.isdir(os.path.join(tmp_path, 'a', 'b', 'c'))) - assert(os.path.isdir(os.path.join(tmp_path, 'a', 'b', 'd'))) - assert(os.path.isdir(os.path.join(tmp_path, 'e', 'f'))) + assert (os.path.isdir(os.path.join(tmp_path, 'a', 'b', 'c'))) + assert (os.path.isdir(os.path.join(tmp_path, 'a', 'b', 'd'))) + assert (os.path.isdir(os.path.join(tmp_path, 'e', 'f'))) def test_get_restart_dict(builder): restart_dict = builder.get_restart_dict() - assert(restart_dict['tree'] == {'a': {'b': {'c': None, 'd': None}}, 'e': {'f': None}}) + assert (restart_dict['tree'] == {'a': {'b': {'c': None, 'd': None}}, 'e': {'f': None}}) def test_element_version(builder): - assert(isinstance(builder.element_version(), str)) + assert (isinstance(builder.element_version(), str)) diff --git a/tests/test_ensemble_boot.py b/tests/test_ensemble_boot.py index d48daefab..fef3fb7b1 100644 --- a/tests/test_ensemble_boot.py +++ b/tests/test_ensemble_boot.py @@ -33,26 +33,26 @@ def test_confidence_interval(): stat, low, high = confidence_interval(dist, 0.0, 0.05) dist = np.array([0.0]) stat, low, high = confidence_interval(dist, 0.0, 0.05) - assert(stat == low == high == 0.0) + assert (stat == low == high == 0.0) stat, low, high = confidence_interval(dist, 0.0, 0.05, pivotal=True) - assert(stat == low == high == 0.0) + assert (stat == low == high == 0.0) stat, low, high = confidence_interval(VALUES, 0.0, 0.05) - assert(stat == pytest.approx(-0.06909454)) - assert(low == pytest.approx(-1.4859470412500002)) - assert(high == pytest.approx(1.90957202975)) + assert (stat == pytest.approx(-0.06909454)) + assert (low == pytest.approx(-1.4859470412500002)) + assert (high == pytest.approx(1.90957202975)) stat, low, high = confidence_interval(VALUES, 0.0, 0.05, pivotal=True) - assert(stat == 0.0) - assert(low == pytest.approx(-1.90957202975)) - assert(high == pytest.approx(1.4859470412500002)) + assert (stat == 0.0) + assert (low == pytest.approx(-1.90957202975)) + assert (high == pytest.approx(1.4859470412500002)) def test_bootstrap(): with pytest.raises(RuntimeError): bootstrap(pd.DataFrame({}), np.mean) stat, low, high = bootstrap(pd.DataFrame({'a': [0.0]}), np.mean) - assert(stat == 0.0) - assert(low == 0.0) - assert(high == 0.0) + assert (stat == 0.0) + assert (low == 0.0) + assert (high == 0.0) def test_ensemble_bootstrap(): @@ -60,18 +60,18 @@ def test_ensemble_bootstrap(): 'a': np.concatenate((VALUES, VALUES)), 'b': ['group1'] * VALUES.shape[0] + ['group2'] * VALUES.shape[0]}) results = ensemble_bootstrap(df, groupby=['b'], qoi_cols=['a']) - assert(not results.empty) - assert(results.values.shape == (2, 3)) + assert (not results.empty) + assert (results.values.shape == (2, 3)) with pytest.raises(RuntimeError): ensemble_bootstrap(df, groupby=['b'], qoi_cols=['c']) results = ensemble_bootstrap(df, qoi_cols=['a']) - assert(results.values.shape == (1, 3)) + assert (results.values.shape == (1, 3)) def test_ensemble_boot(): analysis = EnsembleBoot() - assert(analysis.element_name() == 'ensemble_boot') - assert(analysis.element_version() == '0.1') + assert (analysis.element_name() == 'ensemble_boot') + assert (analysis.element_version() == '0.1') with pytest.raises(RuntimeError): analysis.analyse() with pytest.raises(RuntimeError): @@ -81,4 +81,4 @@ def test_ensemble_boot(): 'a': np.concatenate((VALUES, VALUES)), 'b': ['group1'] * VALUES.shape[0] + ['group2'] * VALUES.shape[0]}) results = analysis.analyse(df) - assert(not results.empty) + assert (not results.empty) diff --git a/tests/test_grid_sampler.py b/tests/test_grid_sampler.py index 6ac7587b3..9977a6400 100644 --- a/tests/test_grid_sampler.py +++ b/tests/test_grid_sampler.py @@ -3,21 +3,23 @@ import easyvvuq as uq from easyvvuq.actions import CreateRunDirectory, Encode, Decode, ExecuteLocal, Actions + @pytest.fixture def sampler(): - vary = {"x1" : [0.0, 0.5, 1.0], - "x2" : [True, False]} + vary = {"x1": [0.0, 0.5, 1.0], + "x2": [True, False]} return uq.sampling.Grid_Sampler(vary) + @pytest.fixture def campaign(): - + params = {} - params["x1"] = {"type":"float", "default": 0.5} - params["x2"] = {"type":"boolean", "default": True} - + params["x1"] = {"type": "float", "default": 0.5} + params["x2"] = {"type": "boolean", "default": True} + # python file is its own template - encoder = uq.encoders.GenericEncoder('tests/grid_search/test_grid.py', + encoder = uq.encoders.GenericEncoder('tests/grid_search/test_grid.py', target_filename='test_grid.py') execute = ExecuteLocal("python3 test_grid.py") @@ -28,18 +30,18 @@ def campaign(): output_columns=output_columns) actions = Actions(CreateRunDirectory('/tmp'), Encode(encoder), execute, Decode(decoder)) - + campaign = uq.Campaign(name='foo', work_dir='/tmp', params=params, actions=actions) - vary = {"x1" : [0.0, 0.5, 1.0], - "x2" : [True, False]} - + vary = {"x1": [0.0, 0.5, 1.0], + "x2": [True, False]} + sampler = uq.sampling.Grid_Sampler(vary) - + campaign.set_sampler(sampler) - + campaign.execute().collate() - + return campaign @@ -52,15 +54,14 @@ def test_tensor_product(sampler): [0.5, False], [1.0, True], [1.0, False]], dtype=object)).all() - + def test_grid_search(campaign): # test if the sampling works correctly df = campaign.get_collation_result() - assert (df['f'].values == np.array([[ 0. ], - [-0. ], - [ 0.25], - [-0.25], - [ 1. ], - [-1. ]])).all() - \ No newline at end of file + assert (df['f'].values == np.array([[0.], + [-0.], + [0.25], + [-0.25], + [1.], + [-1.]])).all() diff --git a/tests/test_hierarchical_sparse_grid_sc.py b/tests/test_hierarchical_sparse_grid_sc.py index e6505c0d2..f94b03762 100755 --- a/tests/test_hierarchical_sparse_grid_sc.py +++ b/tests/test_hierarchical_sparse_grid_sc.py @@ -118,10 +118,10 @@ def test_next_level_sparse_grid(sparse_campaign): if l not in analysis.l_norm: all_in = False break - assert(all_in) + assert (all_in) # check if the grid has the right size - assert(sampler.xi_d.shape[0] == 145) + assert (sampler.xi_d.shape[0] == 145) def test_results(sparse_campaign): @@ -136,4 +136,4 @@ def test_results(sparse_campaign): computed_sobol = results._get_sobols_first('f', 'x%d' % (i + 1)) logging.debug('Exact Sobol indices x%d = %.4f' % (i + 1, ref_sobols[i])) logging.debug('Computed Sobol indices x%d = %.4f' % (i + 1, computed_sobol)) - assert(ref_sobols[i] == pytest.approx(computed_sobol, abs=0.01)) + assert (ref_sobols[i] == pytest.approx(computed_sobol, abs=0.01)) diff --git a/tests/test_integration.py b/tests/test_integration.py index f2b141ada..497a9eb25 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -72,7 +72,7 @@ def _campaign(work_dir, campaign_name, app_name, params, encoder, decoder, sampl reloaded_campaign.set_app(app_name) reloaded_campaign.execute(nsamples=num_samples).collate() # Draw 3 more samples, execute, and collate onto existing dataframe - #reloaded_campaign.draw_samples(num_samples=num_samples, replicas=replicas) + # reloaded_campaign.draw_samples(num_samples=num_samples, replicas=replicas) # reloaded_campaign.collate() if stats is not None: reloaded_campaign.apply_analysis(stats) diff --git a/tests/test_jinja_encoder.py b/tests/test_jinja_encoder.py index becf2a281..4d4ec4e3b 100644 --- a/tests/test_jinja_encoder.py +++ b/tests/test_jinja_encoder.py @@ -130,7 +130,7 @@ def test_jinjaencoder(tmpdir): encoder = JinjaEncoder(template_fname='tests/jinjaencoder/namoptions.template', target_filename='namoptions.001') encoder.encode(params, tmpdir) - assert(os.path.isfile(os.path.join(tmpdir, 'namoptions.001'))) + assert (os.path.isfile(os.path.join(tmpdir, 'namoptions.001'))) if __name__ == "__main__": diff --git a/tests/test_jsondecoder.py b/tests/test_jsondecoder.py index 0212cea90..3ad937c23 100644 --- a/tests/test_jsondecoder.py +++ b/tests/test_jsondecoder.py @@ -8,11 +8,11 @@ def test_jsondecoder_basic(): decoder = JSONDecoder(os.path.join('jsondecoder', 'fredrik.json'), ['cfrac', 'we', 'v']) run_info = {'run_dir': 'tests'} data = decoder.parse_sim_output(run_info) - assert(data['cfrac'] == 0.24000000131541285) - assert(data['we'] == -0.4910355508327484) - assert(len(data['v']) == 126) - assert(data['v'][:3] == [0.014841768890619278, 0.014779693447053432, 0.014733896590769291]) - assert(data['v'][-3:] == [0.0010381652973592281, 0.0010054642334580421, 0.0009733123588375747]) + assert (data['cfrac'] == 0.24000000131541285) + assert (data['we'] == -0.4910355508327484) + assert (len(data['v']) == 126) + assert (data['v'][:3] == [0.014841768890619278, 0.014779693447053432, 0.014733896590769291]) + assert (data['v'][-3:] == [0.0010381652973592281, 0.0010054642334580421, 0.0009733123588375747]) def test_jsondecoder_scalars_only(): @@ -20,8 +20,8 @@ def test_jsondecoder_scalars_only(): decoder = JSONDecoder(os.path.join('jsondecoder', 'fredrik.json'), ['cfrac', 'we']) run_info = {'run_dir': 'tests'} data = decoder.parse_sim_output(run_info) - assert(data['cfrac'] == 0.24000000131541285) - assert(data['we'] == -0.4910355508327484) + assert (data['cfrac'] == 0.24000000131541285) + assert (data['we'] == -0.4910355508327484) def test_json_nested(): @@ -29,9 +29,9 @@ def test_json_nested(): [['root1', 'node1', 'leaf1'], ['root1', 'leaf2'], 'leaf3']) run_info = {'run_dir': 'tests'} data = decoder.parse_sim_output(run_info) - assert(data['root1.node1.leaf1'] == 0.33) - assert(data['root1.leaf2'] == 0.32) - assert(data['leaf3'] == [0.2, 0.3]) + assert (data['root1.node1.leaf1'] == 0.33) + assert (data['root1.leaf2'] == 0.32) + assert (data['leaf3'] == [0.2, 0.3]) def test_missing_column(): @@ -42,7 +42,7 @@ def test_missing_column(): with pytest.raises(RuntimeError) as excinfo: data = decoder.parse_sim_output(run_info) # Check if the missing column is reported in the exception message - assert("['root1', 'node1', 'abcd']" in str(excinfo.value)) + assert ("['root1', 'node1', 'abcd']" in str(excinfo.value)) def test_init_exceptions(): diff --git a/tests/test_mc_analysis.py b/tests/test_mc_analysis.py index b6596d8ab..410f5eb48 100644 --- a/tests/test_mc_analysis.py +++ b/tests/test_mc_analysis.py @@ -67,27 +67,27 @@ def test_sobol_bootstrap(data): mc_sampler, df = data analysis = uq.analysis.QMCAnalysis(sampler=mc_sampler, qoi_cols=['f']) s1, s1_conf, st, st_conf = analysis.sobol_bootstrap(df['f']) - assert(s1['x1'] == pytest.approx(0.5569058947880715, 0.01)) - assert(s1['x2'] == pytest.approx(0.20727553481694053, 0.01)) - assert(st['x1'] == pytest.approx(0.8132793654841785, 0.01)) - assert(st['x2'] == pytest.approx(0.3804962894947435, 0.01)) - assert(s1_conf['x1']['low'][0] == pytest.approx(0.14387035, 0.01)) - assert(s1_conf['x1']['high'][0] == pytest.approx(0.89428774, 0.01)) - assert(s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01)) - assert(s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01)) - assert(st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01)) - assert(st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01)) - assert(st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01)) - assert(st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01)) + assert (s1['x1'] == pytest.approx(0.5569058947880715, 0.01)) + assert (s1['x2'] == pytest.approx(0.20727553481694053, 0.01)) + assert (st['x1'] == pytest.approx(0.8132793654841785, 0.01)) + assert (st['x2'] == pytest.approx(0.3804962894947435, 0.01)) + assert (s1_conf['x1']['low'][0] == pytest.approx(0.14387035, 0.01)) + assert (s1_conf['x1']['high'][0] == pytest.approx(0.89428774, 0.01)) + assert (s1_conf['x2']['low'][0] == pytest.approx(-0.11063341, 0.01)) + assert (s1_conf['x2']['high'][0] == pytest.approx(0.46752829, 0.01)) + assert (st_conf['x1']['low'][0] == pytest.approx(0.61368887, 0.01)) + assert (st_conf['x1']['high'][0] == pytest.approx(1.01858671, 0.01)) + assert (st_conf['x2']['low'][0] == pytest.approx(0.24361207, 0.01)) + assert (st_conf['x2']['high'][0] == pytest.approx(0.49214117, 0.01)) def test_separate_output_values(data): mc_sampler, df = data analysis = uq.analysis.QMCAnalysis(sampler=mc_sampler, qoi_cols=['f']) f_M2, f_M1, f_Ni = analysis._separate_output_values(df['f'], 2, 100) - assert(f_M2.shape == (100, 1)) - assert(f_M1.shape == (100, 1)) - assert(f_Ni.shape == (100, 2, 1)) + assert (f_M2.shape == (100, 1)) + assert (f_M1.shape == (100, 1)) + assert (f_Ni.shape == (100, 2, 1)) def test_get_samples(data): diff --git a/tests/test_mc_analysis_results.py b/tests/test_mc_analysis_results.py index 291b01467..cde677951 100644 --- a/tests/test_mc_analysis_results.py +++ b/tests/test_mc_analysis_results.py @@ -83,50 +83,50 @@ def results_vectors(data_vectors): def test_results(results): - assert(isinstance(results, QMCAnalysisResults)) + assert (isinstance(results, QMCAnalysisResults)) sobols_first_x1 = results._get_sobols_first('f', 'x1') sobols_first_x2 = results._get_sobols_first('f', 'x2') sobols_total_x1 = results._get_sobols_total('f', 'x1') sobols_total_x2 = results._get_sobols_total('f', 'x2') - assert(sobols_first_x1 == pytest.approx(0.55690589, 0.001)) - assert(sobols_first_x2 == pytest.approx(0.20727553, 0.001)) - assert(sobols_total_x1 == pytest.approx(0.81327937, 0.001)) - assert(sobols_total_x2 == pytest.approx(0.38049629, 0.001)) + assert (sobols_first_x1 == pytest.approx(0.55690589, 0.001)) + assert (sobols_first_x2 == pytest.approx(0.20727553, 0.001)) + assert (sobols_total_x1 == pytest.approx(0.81327937, 0.001)) + assert (sobols_total_x2 == pytest.approx(0.38049629, 0.001)) def test_results_conf(results): sobols_first_x1_conf = results._get_sobols_first_conf('f', 'x1') - assert(sobols_first_x1_conf[0] == pytest.approx(0.14387, 0.001)) - assert(sobols_first_x1_conf[1] == pytest.approx(0.894288, 0.001)) + assert (sobols_first_x1_conf[0] == pytest.approx(0.14387, 0.001)) + assert (sobols_first_x1_conf[1] == pytest.approx(0.894288, 0.001)) sobols_first_x2_conf = results._get_sobols_first_conf('f', 'x2') - assert(sobols_first_x2_conf[0] == pytest.approx(-0.110633, 0.001)) - assert(sobols_first_x2_conf[1] == pytest.approx(0.467528, 0.001)) + assert (sobols_first_x2_conf[0] == pytest.approx(-0.110633, 0.001)) + assert (sobols_first_x2_conf[1] == pytest.approx(0.467528, 0.001)) sobols_total_x1_conf = results._get_sobols_total_conf('f', 'x1') - assert(sobols_total_x1_conf[0] == pytest.approx(0.613689, 0.001)) - assert(sobols_total_x1_conf[1] == pytest.approx(1.018587, 0.001)) + assert (sobols_total_x1_conf[0] == pytest.approx(0.613689, 0.001)) + assert (sobols_total_x1_conf[1] == pytest.approx(1.018587, 0.001)) sobols_total_x2_conf = results._get_sobols_total_conf('f', 'x2') - assert(sobols_total_x2_conf[0] == pytest.approx(0.243612, 0.001)) - assert(sobols_total_x2_conf[1] == pytest.approx(0.492141, 0.001)) + assert (sobols_total_x2_conf[0] == pytest.approx(0.243612, 0.001)) + assert (sobols_total_x2_conf[1] == pytest.approx(0.492141, 0.001)) def test_full_results(results): - assert(results.sobols_first() == {'f': {'x1': 0.5569058947880715, 'x2': 0.20727553481694053}}) - assert(results.sobols_total() == {'f': {'x1': 0.8132793654841785, 'x2': 0.3804962894947435}}) + assert (results.sobols_first() == {'f': {'x1': 0.5569058947880715, 'x2': 0.20727553481694053}}) + assert (results.sobols_total() == {'f': {'x1': 0.8132793654841785, 'x2': 0.3804962894947435}}) def test_describe(results_vectors): - assert( + assert ( results_vectors.describe()[ ('g', 1)].to_dict() == { 'mean': 0.4691844466934421, 'var': 0.08534945020531205, 'std': 0.29214628220347433}) - assert( + assert ( results_vectors.describe('h')[ ('h', 1)].to_dict() == { 'mean': 0.6873389710989142, 'var': 0.07501266456861228, 'std': 0.27388440000958847}) - assert(isinstance(results_vectors.describe('h', 'std'), np.ndarray)) + assert (isinstance(results_vectors.describe('h', 'std'), np.ndarray)) diff --git a/tests/test_multiapp.py b/tests/test_multiapp.py index 383b078f9..0c0d4ba61 100644 --- a/tests/test_multiapp.py +++ b/tests/test_multiapp.py @@ -185,7 +185,7 @@ def test_multiapp(tmpdir): cooling_df = campaign.get_collation_result - assert(not cannonsim_df.equals(cooling_df)) + assert (not cannonsim_df.equals(cooling_df)) if __name__ == "__main__": diff --git a/tests/test_pce_analysis_results.py b/tests/test_pce_analysis_results.py index 62184f738..c9d0b7431 100644 --- a/tests/test_pce_analysis_results.py +++ b/tests/test_pce_analysis_results.py @@ -84,19 +84,19 @@ def results_vectors(data_vectors): def test_results(results): - assert(isinstance(results, PCEAnalysisResults)) + assert (isinstance(results, PCEAnalysisResults)) sobols_first_x1 = results._get_sobols_first('f', 'x1') sobols_first_x2 = results._get_sobols_first('f', 'x2') sobols_second_x1 = results._get_sobols_second('f', 'x1') sobols_second_x2 = results._get_sobols_second('f', 'x2') sobols_total_x1 = results._get_sobols_total('f', 'x1') sobols_total_x2 = results._get_sobols_total('f', 'x2') - assert(sobols_first_x1 == pytest.approx(0.62644867, 0.001)) - assert(sobols_first_x2 == pytest.approx(0.26789576, 0.001)) - assert(sobols_second_x1['x2'] == pytest.approx(0.10565556484738273, 0.001)) - assert(sobols_second_x2['x1'] == pytest.approx(0.10565556484738273, 0.001)) - assert(sobols_total_x1 == pytest.approx(0.73210424, 0.001)) - assert(sobols_total_x2 == pytest.approx(0.37355133, 0.001)) + assert (sobols_first_x1 == pytest.approx(0.62644867, 0.001)) + assert (sobols_first_x2 == pytest.approx(0.26789576, 0.001)) + assert (sobols_second_x1['x2'] == pytest.approx(0.10565556484738273, 0.001)) + assert (sobols_second_x2['x1'] == pytest.approx(0.10565556484738273, 0.001)) + assert (sobols_total_x1 == pytest.approx(0.73210424, 0.001)) + assert (sobols_total_x2 == pytest.approx(0.37355133, 0.001)) def test_full_results(results): @@ -106,22 +106,22 @@ def test_full_results(results): results.sobols_first('f', 'y') with pytest.raises(AssertionError): results.sobols_first(None, 'x1') - assert(results.sobols_first()['f']['x1'][0] == pytest.approx(0.6264486733708418)) - assert(results.sobols_first()['f']['x2'][0] == pytest.approx(0.2678957617817755)) - assert(results.sobols_first('f')['x1'][0] == pytest.approx(0.6264486733708418)) - assert(results.sobols_first('f')['x2'][0] == pytest.approx(0.2678957617817755)) - assert(results.sobols_first('f', 'x1')[0] == pytest.approx(0.6264486733708418)) - assert(results.sobols_first('f', 'x2')[0] == pytest.approx(0.2678957617817755)) + assert (results.sobols_first()['f']['x1'][0] == pytest.approx(0.6264486733708418)) + assert (results.sobols_first()['f']['x2'][0] == pytest.approx(0.2678957617817755)) + assert (results.sobols_first('f')['x1'][0] == pytest.approx(0.6264486733708418)) + assert (results.sobols_first('f')['x2'][0] == pytest.approx(0.2678957617817755)) + assert (results.sobols_first('f', 'x1')[0] == pytest.approx(0.6264486733708418)) + assert (results.sobols_first('f', 'x2')[0] == pytest.approx(0.2678957617817755)) def test_distribution(results): with pytest.raises(RuntimeError): results.get_distribution('z') - assert(results.get_distribution('f').pdf([0, 0]) == pytest.approx([0.44296863, 0.44296863])) + assert (results.get_distribution('f').pdf([0, 0]) == pytest.approx([0.44296863, 0.44296863])) def test_describe(results_vectors): - assert( + assert ( results_vectors.describe()[ ('g', 1)].to_dict() == { @@ -135,7 +135,7 @@ def test_describe(results_vectors): '99%': pytest.approx(0.9905999521854744, 0.001), 'min': pytest.approx(-0.775685017772766, 0.001), 'max': pytest.approx(1.775781592068878, 0.001)}) - assert( + assert ( results_vectors.describe('g').to_dict()[ ('g', 1)] == { @@ -149,4 +149,4 @@ def test_describe(results_vectors): '99%': pytest.approx(0.9905999521854744, 0.001), 'min': pytest.approx(-0.7756850177727665, 0.001), 'max': pytest.approx(1.775781592068878, 0.001)}) - assert(isinstance(results_vectors.describe('g', 'min'), np.ndarray)) + assert (isinstance(results_vectors.describe('g', 'min'), np.ndarray)) diff --git a/tests/test_quasirandom.py b/tests/test_quasirandom.py index 0ccfc357c..a4bd91ee6 100644 --- a/tests/test_quasirandom.py +++ b/tests/test_quasirandom.py @@ -6,19 +6,19 @@ def test_lhc(): vary = {'a': cp.Uniform(-5, 3), 'b': cp.Uniform(2, 10)} sampler = uq.sampling.quasirandom.LHCSampler(vary, max_num=10) for sample in sampler: - assert(sample['a'] >= -5.0) - assert(sample['a'] <= 3.0) - assert(sample['b'] >= 2.0) - assert(sample['b'] <= 10.0) - assert(sampler.n_samples() == 10) + assert (sample['a'] >= -5.0) + assert (sample['a'] <= 3.0) + assert (sample['b'] >= 2.0) + assert (sample['b'] <= 10.0) + assert (sampler.n_samples() == 10) def test_halton(): vary = {'a': cp.Uniform(-5, 3), 'b': cp.Uniform(2, 10)} sampler = uq.sampling.quasirandom.HaltonSampler(vary, max_num=10) for sample in sampler: - assert(sample['a'] >= -5.0) - assert(sample['a'] <= 3.0) - assert(sample['b'] >= 2.0) - assert(sample['b'] <= 10.0) - assert(sampler.n_samples() == 10) + assert (sample['a'] >= -5.0) + assert (sample['a'] <= 3.0) + assert (sample['b'] >= 2.0) + assert (sample['b'] <= 10.0) + assert (sampler.n_samples() == 10) diff --git a/tests/test_sampling_csv_sampler.py b/tests/test_sampling_csv_sampler.py index 757ea45b5..c0584f730 100644 --- a/tests/test_sampling_csv_sampler.py +++ b/tests/test_sampling_csv_sampler.py @@ -9,8 +9,8 @@ def test_csv_sampler(): counter = 0 for sample in sampler: if sample['Step'] == 5: - assert(sample['Value'] == 25.950662) + assert (sample['Value'] == 25.950662) counter += 1 - assert(counter == 10) - assert(sampler.n_samples() == 10) - assert(sampler.is_finite()) + assert (counter == 10) + assert (sampler.n_samples() == 10) + assert (sampler.is_finite()) diff --git a/tests/test_sampling_mc.py b/tests/test_sampling_mc.py index 46d1ed246..f0c61f75e 100644 --- a/tests/test_sampling_mc.py +++ b/tests/test_sampling_mc.py @@ -7,11 +7,11 @@ def test_sampling(): vary = {'a': cp.Uniform(-5, 0), 'b': cp.Uniform(2, 10)} sampler = MCSampler(vary, 100) - assert(sampler.n_samples() == 400) + assert (sampler.n_samples() == 400) for _ in range(sampler.n_samples()): sample = next(sampler) - assert(sample['a'] >= -5 and sample['a'] <= 0) - assert(sample['b'] >= 2 and sample['b'] <= 10) + assert (sample['a'] >= -5 and sample['a'] <= 0) + assert (sample['b'] >= 2 and sample['b'] <= 10) with pytest.raises(StopIteration): next(sampler) diff --git a/tests/test_sampling_qmc.py b/tests/test_sampling_qmc.py index 5a91b4117..f76eb0bbc 100644 --- a/tests/test_sampling_qmc.py +++ b/tests/test_sampling_qmc.py @@ -14,17 +14,17 @@ def test_init(): def test_is_finite(): vary = {'a': cp.Uniform(-5, 3), 'b': cp.Uniform(2, 10)} sampler = QMCSampler(vary, 100) - assert(sampler.is_finite()) + assert (sampler.is_finite()) def test_sampling(): vary = {'a': cp.Uniform(-5, 0), 'b': cp.Uniform(2, 10)} sampler = QMCSampler(vary, 100) - assert(sampler.n_samples == 400) + assert (sampler.n_samples == 400) for _ in range(sampler.n_samples): sample = next(sampler) - assert(sample['a'] >= -5 and sample['a'] <= 0) - assert(sample['b'] >= 2 and sample['b'] <= 10) + assert (sample['a'] >= -5 and sample['a'] <= 0) + assert (sample['b'] >= 2 and sample['b'] <= 10) with pytest.raises(StopIteration): next(sampler) diff --git a/tests/test_sampling_replica_sampler.py b/tests/test_sampling_replica_sampler.py index 52cd66345..7985b3b89 100644 --- a/tests/test_sampling_replica_sampler.py +++ b/tests/test_sampling_replica_sampler.py @@ -15,7 +15,7 @@ def test_infite_exception(): def test_is_finite(replica_sampler): - assert(not replica_sampler.is_finite()) + assert (not replica_sampler.is_finite()) def test_n_samples(replica_sampler): @@ -25,14 +25,14 @@ def test_n_samples(replica_sampler): def test_replica_sampler_ensemble(replica_sampler): params = next(replica_sampler) - assert(params == {'a': 1, 'b': 3, 'ensemble_id': 0}) + assert (params == {'a': 1, 'b': 3, 'ensemble_id': 0}) params = next(replica_sampler) - assert(params == {'a': 1, 'b': 4, 'ensemble_id': 1}) + assert (params == {'a': 1, 'b': 4, 'ensemble_id': 1}) params = next(replica_sampler) - assert(params == {'a': 2, 'b': 3, 'ensemble_id': 2}) + assert (params == {'a': 2, 'b': 3, 'ensemble_id': 2}) params = next(replica_sampler) - assert(params == {'a': 2, 'b': 4, 'ensemble_id': 3}) + assert (params == {'a': 2, 'b': 4, 'ensemble_id': 3}) params = next(replica_sampler) - assert(params == {'a': 1, 'b': 3, 'ensemble_id': 0}) + assert (params == {'a': 1, 'b': 3, 'ensemble_id': 0}) params = next(replica_sampler) - assert(params == {'a': 1, 'b': 4, 'ensemble_id': 1}) + assert (params == {'a': 1, 'b': 4, 'ensemble_id': 1}) diff --git a/tests/test_sampling_sampler_of_samplers.py b/tests/test_sampling_sampler_of_samplers.py index 066205234..1b2879adc 100644 --- a/tests/test_sampling_sampler_of_samplers.py +++ b/tests/test_sampling_sampler_of_samplers.py @@ -27,14 +27,14 @@ def test_init_exceptions(): def test_is_finite(multi_sampler): - assert(multi_sampler.is_finite()) + assert (multi_sampler.is_finite()) def test_n_samples(): sampler1 = BasicSweep({'a': [1, 2, 3], 'b': [4, 5, 6]}) sampler2 = BasicSweep({'a': [1, 2, 3], 'b': [4, 5, 6]}) multi = MultiSampler(sampler1, sampler2) - assert(multi.n_samples() == 81) + assert (multi.n_samples() == 81) def test_iterator(): diff --git a/tests/test_sampling_sweep.py b/tests/test_sampling_sweep.py index 23ca33f24..30538c2f5 100644 --- a/tests/test_sampling_sweep.py +++ b/tests/test_sampling_sweep.py @@ -11,16 +11,16 @@ def test_wrap_iterable(): res = [] for var_name, val in wrap_iterable('a', [1, 2, 3]): res.append((var_name, val)) - assert(res == [('a', 1), ('a', 2), ('a', 3)]) + assert (res == [('a', 1), ('a', 2), ('a', 3)]) def test_sweep(basic_sweep_sampler): res = [] for run_dict in basic_sweep_sampler: res.append(run_dict) - assert(res == [{'a': 1, 'b': 4}, {'a': 1, 'b': 5}, {'a': 1, 'b': 6}, - {'a': 2, 'b': 4}, {'a': 2, 'b': 5}, {'a': 2, 'b': 6}, - {'a': 3, 'b': 4}, {'a': 3, 'b': 5}, {'a': 3, 'b': 6}]) + assert (res == [{'a': 1, 'b': 4}, {'a': 1, 'b': 5}, {'a': 1, 'b': 6}, + {'a': 2, 'b': 4}, {'a': 2, 'b': 5}, {'a': 2, 'b': 6}, + {'a': 3, 'b': 4}, {'a': 3, 'b': 5}, {'a': 3, 'b': 6}]) def test_init(): @@ -30,12 +30,12 @@ def test_init(): def test_is_finite(basic_sweep_sampler): - assert(basic_sweep_sampler.is_finite()) + assert (basic_sweep_sampler.is_finite()) def test_n_samples(basic_sweep_sampler): - assert(basic_sweep_sampler.n_samples() == 9) + assert (basic_sweep_sampler.n_samples() == 9) def test_basic_sweep_single_list(): - assert(len(list(BasicSweep({'a': [1, 2, 3]}))) == 3) + assert (len(list(BasicSweep({'a': [1, 2, 3]}))) == 3) diff --git a/tests/test_sc_analysis_results.py b/tests/test_sc_analysis_results.py index 672a39eb0..159149790 100644 --- a/tests/test_sc_analysis_results.py +++ b/tests/test_sc_analysis_results.py @@ -81,11 +81,11 @@ def results_vectors(data_vectors): def test_results(results): - assert(isinstance(results, SCAnalysisResults)) + assert (isinstance(results, SCAnalysisResults)) sobols_first_x1 = results._get_sobols_first('f', 'x1') sobols_first_x2 = results._get_sobols_first('f', 'x2') - assert(sobols_first_x1 == pytest.approx(0.610242, 0.001)) - assert(sobols_first_x2 == pytest.approx(0.26096511, 0.001)) + assert (sobols_first_x1 == pytest.approx(0.610242, 0.001)) + assert (sobols_first_x2 == pytest.approx(0.26096511, 0.001)) with pytest.raises(RuntimeError): results.sobols_total() @@ -102,16 +102,16 @@ def test_full_results(results): results.sobols_first('f', 'y') with pytest.raises(AssertionError): results.sobols_first(None, 'x1') - assert(results.sobols_first()['f']['x1'][0] == pytest.approx(0.6102419965318732, 0.001)) - assert(results.sobols_first()['f']['x2'][0] == pytest.approx(0.2609651061314295, 0.001)) - assert(results.sobols_first('f')['x1'][0] == pytest.approx(0.6102419965318732, 0.001)) - assert(results.sobols_first('f')['x2'][0] == pytest.approx(0.2609651061314295, 0.001)) - assert(results.sobols_first('f', 'x1')[0] == pytest.approx(0.6102419965318732, 0.001)) - assert(results.sobols_first('f', 'x2')[0] == pytest.approx(0.2609651061314295, 0.001)) + assert (results.sobols_first()['f']['x1'][0] == pytest.approx(0.6102419965318732, 0.001)) + assert (results.sobols_first()['f']['x2'][0] == pytest.approx(0.2609651061314295, 0.001)) + assert (results.sobols_first('f')['x1'][0] == pytest.approx(0.6102419965318732, 0.001)) + assert (results.sobols_first('f')['x2'][0] == pytest.approx(0.2609651061314295, 0.001)) + assert (results.sobols_first('f', 'x1')[0] == pytest.approx(0.6102419965318732, 0.001)) + assert (results.sobols_first('f', 'x2')[0] == pytest.approx(0.2609651061314295, 0.001)) def test_describe(results): - assert(results.describe().to_dict()[('f', 0)] == { + assert (results.describe().to_dict()[('f', 0)] == { 'mean': pytest.approx(0.9101117102420444, 0.001), 'std': pytest.approx(0.8184617581393419, 0.001), 'var': pytest.approx(0.6698796495365424, 0.001) @@ -119,9 +119,9 @@ def test_describe(results): def test_vectors(results_vectors): - assert(results_vectors.sobols_first(('g', 0), 'x1') == pytest.approx(1.0)) - assert(results_vectors.sobols_first(('g', 0), 'x2') == pytest.approx(0.0)) - assert(results_vectors.sobols_first(('g', 1), 'x1') == pytest.approx(0.0)) - assert(results_vectors.sobols_first(('g', 1), 'x2') == pytest.approx(1.0)) - assert(results_vectors.sobols_first(('g', 2), 'x1') == pytest.approx(0.5)) - assert(results_vectors.sobols_first(('g', 2), 'x2') == pytest.approx(0.5)) + assert (results_vectors.sobols_first(('g', 0), 'x1') == pytest.approx(1.0)) + assert (results_vectors.sobols_first(('g', 0), 'x2') == pytest.approx(0.0)) + assert (results_vectors.sobols_first(('g', 1), 'x1') == pytest.approx(0.0)) + assert (results_vectors.sobols_first(('g', 1), 'x2') == pytest.approx(1.0)) + assert (results_vectors.sobols_first(('g', 2), 'x1') == pytest.approx(0.5)) + assert (results_vectors.sobols_first(('g', 2), 'x2') == pytest.approx(0.5)) diff --git a/tests/test_stochastic_collocation.py b/tests/test_stochastic_collocation.py index 1baddd16f..d0956caed 100644 --- a/tests/test_stochastic_collocation.py +++ b/tests/test_stochastic_collocation.py @@ -16,8 +16,8 @@ def test_lagrange_poly(): - assert(uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 0) == -3.5) - assert(uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 1) == 2.0999999999999996) - assert(uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 2) == 2.4000000000000004) + assert (uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 0) == -3.5) + assert (uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 1) == 2.0999999999999996) + assert (uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 2) == 2.4000000000000004) with pytest.raises(IndexError): uq.analysis.sc_analysis.lagrange_poly(2.0, [8, 4, 9], 3) diff --git a/tests/test_surrogate_workflow.py b/tests/test_surrogate_workflow.py index fc6832e2a..249166585 100644 --- a/tests/test_surrogate_workflow.py +++ b/tests/test_surrogate_workflow.py @@ -67,7 +67,7 @@ def test_surrogate_workflow(tmpdir, sampler): for index, row in df.iterrows(): surrogate_y = surrogate({'Pe': row['Pe'][0], 'f': row['f'][0]})['u'] model_y = row['u'].values - #assert(pytest.approx(surrogate_y == model_y)) + # assert(pytest.approx(surrogate_y == model_y)) assert np.max(np.abs(surrogate_y - model_y)) < 1e-6 # Attempt callibration with MCMC @@ -75,7 +75,7 @@ def test_surrogate_workflow(tmpdir, sampler): db_location = campaign.db_location campaign = None reloaded_campaign = uq.Campaign('sc', db_location=db_location) - assert(reloaded_campaign._active_app_name == 'surrogate') + assert (reloaded_campaign._active_app_name == 'surrogate') u = np.array([0., 0.00333333, 0.00666667, 0.01, 0.01333333, 0.01666667, 0.02, 0.02333333, 0.02666667, 0.03, 0.03333333, 0.03666667, 0.04, 0.04333333, 0.04666667, @@ -150,6 +150,6 @@ def loglikelihood(x): for _ in range(100): next(iterator).collate() df = reloaded_campaign.get_collation_result() - assert(len(df) > 0) - assert(len(df) <= 100) + assert (len(df) > 0) + assert (len(df) <= 100) results = reloaded_campaign.analyse() diff --git a/tests/test_utils.py b/tests/test_utils.py index 803ad00cf..a6ed78ff0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,12 +15,12 @@ def test_multi_index_tuple_parser_exceptions(): def test_multi_index_parser_corner_cases(): - assert(multi_index_tuple_parser(["a"]) == (["a"], False)) - assert(multi_index_tuple_parser(['a', '("b", 1)']) == (["a", ("b", 1)], False)) + assert (multi_index_tuple_parser(["a"]) == (["a"], False)) + assert (multi_index_tuple_parser(['a', '("b", 1)']) == (["a", ("b", 1)], False)) def test_multi_index_parser(): - assert(multi_index_tuple_parser(['("a", 1)', '("b", 1)']) == ([("a", 1), ("b", 1)], True)) + assert (multi_index_tuple_parser(['("a", 1)', '("b", 1)']) == ([("a", 1), ("b", 1)], True)) def test_remove_start_of_file(tmp_path): @@ -38,4 +38,4 @@ def test_remove_start_of_file(tmp_path): fd.write(data) remove_start_of_file(os.path.join(tmp_path, 'test.txt'), 'START') with open(os.path.join(tmp_path, 'test.txt'), 'r') as fd: - assert(fd.read() == trimmed) + assert (fd.read() == trimmed) diff --git a/tests/test_vector.py b/tests/test_vector.py index 7191dc8a9..7c7ffea63 100644 --- a/tests/test_vector.py +++ b/tests/test_vector.py @@ -126,7 +126,7 @@ def test_gauss_vector_pce(tmpdir): encoder = uq.encoders.GenericEncoder(template_fname='tests/gauss/gauss.template', target_filename='gauss_in.json') - #decoder = JSONDecoder(target_filename='output.csv.json', output_columns=['numbers']) + # decoder = JSONDecoder(target_filename='output.csv.json', output_columns=['numbers']) decoder = uq.decoders.SimpleCSV(target_filename="output.csv", output_columns=["numbers"]) execute = uq.actions.ExecuteLocal(os.path.abspath( diff --git a/tests/test_yamldecoder.py b/tests/test_yamldecoder.py index ab82a6304..3a686bb9a 100644 --- a/tests/test_yamldecoder.py +++ b/tests/test_yamldecoder.py @@ -17,7 +17,7 @@ def test_yamldecoder_data(keys, vals): data = decoder.parse_sim_output(run_info) for k, v in zip(keys, vals): - assert((data[k] == np.array([v])).all().all()) + assert ((data[k] == np.array([v])).all().all()) def test_yaml_nested(): @@ -25,9 +25,9 @@ def test_yaml_nested(): [['root1', 'node1', 'leaf1'], ['root1', 'leaf2'], 'leaf3']) run_info = {'run_dir': 'tests'} data = decoder.parse_sim_output(run_info) - assert((data['root1.node1.leaf1'] == np.array([0.33])).all().all()) - assert((data['root1.leaf2'] == np.array([0.32])).all().all()) - assert((data['leaf3'] == np.array([0.2, 0.3])).all().all()) + assert ((data['root1.node1.leaf1'] == np.array([0.33])).all().all()) + assert ((data['root1.leaf2'] == np.array([0.32])).all().all()) + assert ((data['leaf3'] == np.array([0.2, 0.3])).all().all()) def test_init_exception(): diff --git a/tutorials/fabsim3_cmd_api.py b/tutorials/fabsim3_cmd_api.py new file mode 100644 index 000000000..98ed0d9cb --- /dev/null +++ b/tutorials/fabsim3_cmd_api.py @@ -0,0 +1,364 @@ +""" +FabSim3 Commands Python API + +This file maps command-line instructions for FabSim3 to Python functions. +NOTE: No effort is made to map output back to FabSim, as this complicates +the implementation greatly. + +This file can be included in any code base. +It has no dependencies, but does require a working FabSim3 installation. +""" + +import os +import sys +import time +import subprocess +from fabsim.base.fab import * + +add_local_paths("FabUQCampaign") + +def fabsim(command, arguments, machine = 'localhost'): + """ + Generic function for running any FabSim3 command. + + Parameters + ---------- + - command (string): the FanSim3 command to execute + - arguments (string): a list of arguments, starting with the config ID, + followed by keyword arguments "config,arg1=....,arg2=...." + - machine (string): the name of the remote machine as indicated in + machines_user.yml + + Returns + ------- + None + """ + if arguments == "" or arguments is None: + cmd = "fabsim {} {}".format(machine, command) + print('Executing', cmd) + # os.system("fabsim {} {}".format(machine, command)) + # os.popen also works in Jupyter notebooks, os.system hangs + os.popen(cmd).read() + else: + cmd = "fabsim {} {}:{}".format(machine, command, arguments) + print('Executing', cmd) + # os.system("fabsim {} {}:{}".format(machine, command, arguments)) + + os.popen(cmd).read() + +def fetch_results(machine='localhost'): + """ + Retrieves the results from the remote machine, and stores it in the FabSim3 + results directory. + + Parameters + ---------- + - machine (string): the name of the remote machine as indicated in + machines_user.yml + + Returns + ------- + Boolean flag, indicating success (True) or failure (False) + + """ + #Q: will this catch errors in fetch_results?? + try: + fabsim("fetch_results", "", machine) + return True + except: + return False + +def status(machine='localhost'): + """ + Prints the status of the jobs running on the remote machine. + + Parameters + ---------- + - machine (string): the name of the remote machine as indicated in + machines_user.yml + + Returns + ------- + None + + """ + fabsim("stat", "", machine) + +def wait(machine='localhost', sleep=1): + """ + Subroutine which returns when all jobs on the remote machine have finished. + Checks the status of the jobs every minutes. The method works in + the same way a human would, by examining the output of fab stat. + + Parameters + ---------- + - machine (string): the name of the remote machine as indicated in + machines_user.yml + - sleep (int, default=1): time interval in minutes between checks + + Returns + ------- + finished (boolean) : if False, something went wrong + + """ + if machine == 'localhost': return True + + #TODO: this must be modified with the new FabSim3 output + #number of header lines in fab stat + header = 2 + finished = False + + while not finished: + #get the output lines of fab stat + try: + out = subprocess.run(['fabsim', machine, 'stat'], stdout=subprocess.PIPE) + except: + print('wait subroutine failed') + return finished + + out = out.stdout.decode('utf-8').split("\n") + #number of uncompleted runs + n_uncompleted = 0 + print('Checking job status...') + for i in range(header, len(out)): + #remove all spaces from current line + line = out[i].split() + + #line = '' means no Job ID, and if the number of uncompleted runs + #is zero, we are done + if len(line) == 0 and n_uncompleted == 0: + print('All runs have completed') + finished = True + return finished + #If the first entry is a number, we have found a running/pending or + #completing job ID + elif len(line) > 0 and line[0].isnumeric(): + print('Job %s is %s' % (line[0], line[1])) + n_uncompleted += 1 + + #no more jobs + if n_uncompleted == 0: + finished = True + return finished + #still active jobs, sleep + else: + time.sleep(sleep * 60) + +def verify_last_ensemble(config, campaign_dir, target_filename, machine): + """ + Execute the FamSim3 command with the same name. Checks if the output file + for each run in the SWEEP directory is present in + the corresponding FabSim3 results directory. + + Parameters + ---------- + - config (string): the config ID, i.e. the name in /config_files/ + - campaign_dir (string): the EasyVVUQ work directory + - target_filename (string): the name of the filename to check the existence of. + (stored in campaign._active_decoder.target_filename) + - machine (string): the name of the remote machine as indicated in + machines_user.yml + + Returns + ------- + all_good (boolean): True if all output files are present, False otherwise. + + """ + + fetch_good = fetch_results(machine=machine) + n_fetch = 1 + max_fetch = 10 + #if an exception occured in fetch_results, try max_fetch times at most + if not fetch_good: + fetch_good = fetch_results(machine=machine) + n_fetch += 1 + if n_fetch > max_fetch and not fetch_good: + print('Error in fetching results after trying %d times' % max_fetch) + sys.exit() + + #filename might contain '=', which fabsim interprets as an argument + target_filename = target_filename.replace('=', 'replace_equal') + + #Run FabSim3 verify_last_ensemble command + arguments = "{},campaign_dir={},target_filename={},machine={}".format(config, + campaign_dir, + target_filename, + machine) + + fabsim("verify_last_ensemble", arguments, machine='localhost') + #FabSim3 verify_last_ensemble command writes a flag to the check.dat file + #in the EasuVVUQ work dir. Read to see if all files were present. + with open(os.path.join(campaign_dir, 'check.dat'), 'r') as file: + all_good = bool(int(file.read())) + return all_good + +def verify(config, campaign_dir, target_filename, machine, max_wait=10): + """ + This will execute the verify_last_ensemble subroutine to see if the output file + for each run in the SWEEP directory is present in + the corresponding FabSim3 results directory. + + Parameters + ---------- + - config (string) : the config ID, i.e. the name in /config_files/ + - campaign_dir (string) : the EasyVVUQ work directory + - target_filename (string): the name of the filename to check the existence of. + (strored in campaign._active_decoder.target_filename) + - machine (string) : the name of the remote machine as indicated in + machines_user.yml + + Returns + ------- + True or False + + """ + + #wait for all jobs to finish + finished = wait(machine=machine) + + #sometimes the wait subroutine fails, e.g. due to some ssh connection issue, + #retry max_wait times at most + n_wait = 1 + while not finished: + print("Wait subroutine failed, executing again") + finished = wait(machine=machine) + n_wait += 1 + if n_wait > max_wait and finished is False: + print('fabsim3_cmd_api.wait failed %d times, exiting.' % max_wait) + sys.exit() + + #check if the last ensemble returned all output files + all_good = verify_last_ensemble(config, campaign_dir, + target_filename, machine=machine) + + return all_good + + +def resubmit_previous_ensemble(config, script, command='uq_ensemble', + machine='localhost', PJ=False): + """ + Resubmits all jobs in the SWEEP directory: /config_files//SWEEP + + Parameters + ---------- + - config (string): the config ID, i.e. the name in /config_files/ + - script (string): the FabSim3 script to execute + - command : The default is 'uq_ensemble'. + - machine (string): the name of the remote machine as indicated in + machines_user.yml + - PJ (boolean): Use the QCG PilotJob framework to execute the ensemble. + Must be installed. If False, jobs are execute via the Slurm workload manager. + + Returns + ------- + None. + + """ + arguments = "{},script={},PJ={}".format(config, script, PJ) + fabsim(command, arguments, machine) + +def remove_succesful_runs(config, campaign_dir): + """ + This command clears the succesful runs from the SWEEP directory. Which runs are not + succesful is determined by executing the verify(...) command of this API. After the succesful + runs are cleared, execute resubmit_previous_ensemble(...) of this API to submit the failed + jobs again. + + Parameters + ---------- + config : string + The config ID, i.e. the name in /config_files/. + campaign_dir : string + The EasyVVUQ campaign directory. + + Returns + ------- + None. + + """ + #Run FabSim3 remove_succesful_runs command + arguments = "{},campaign_dir={}".format(config, campaign_dir) + fabsim("remove_succesful_runs", arguments, machine='localhost') + +def run_uq_ensemble(config, campaign_dir, script, machine='localhost', skip=0, PJ = False): + """ + Launches a EasyVVUQ UQ ensemble + + Parameters + ---------- + - config (string): the config ID, i.e. the name in /config_files/ + - campaign_dir (string): the EasyVVUQ work directory + - script (string): the FabSim3 script to execute + - machine (string): the name of the remote machine as indicated in + machines_user.yml + - skip (int): if > 0, the first runs are not executed. Required in + an adaptive setting to avoid recomputing already executed runs. + - PJ (boolean): Use the QCG PilotJob framework to execute the ensemble. + Must be installed. If False, jobs are execute via the Slurm workload manager. + + Returns + ------- + None + + """ + # sim_ID = campaign_dir.split('/')[-1] + arguments = "{},campaign_dir={},script={},skip={},PJ={}".format(config, campaign_dir, + script, skip, PJ) + fabsim("run_uq_ensemble", arguments, machine=machine) + +def get_uq_samples(config, campaign_dir, number_of_samples, skip=0, machine = 'localhost'): + """ + Copies the samples from the FabSim results directory to the EasyVVUQ campaign directory. + + Parameters + ---------- + - config (string): the config ID, i.e. the name in /config_files/ + - campaign_dir (string): the EasyVVUQ work directory + - number_of_samples (int): the total number of EasyVVUQ code samples + - skip (int): if > 0, the first runs are not executed. Required in + an adaptive setting to avoid recomputing already executed runs. + - machine (string): the name of the remote machine as indicated in + machines_user.yml + + Returns + ------- + None + + """ + + # sim_ID = campaign_dir.split('/')[-1] + arguments = "{},campaign_dir={},number_of_samples={},skip={}".format(config, + campaign_dir, + number_of_samples, + skip) + fabsim("get_uq_samples", arguments, machine=machine) + + #If the same FabSim3 config name was used before, the statement above + #might have copied more runs than currently are used by EasyVVUQ. + #This removes all runs in the EasyVVUQ campaign dir (not the Fabsim results dir) + #for which Run_X with X > number of current samples. + dirs = os.listdir(os.path.join(campaign_dir, 'runs')) + for dir_i in dirs: + run_id = int(dir_i.split('_')[-1]) + if run_id > number_of_samples: + local('rm -r %s/runs/Run_%d' % (campaign_dir, run_id)) + print('Removing Run %d from %s/runs' % (run_id, campaign_dir)) + +def clear_results(machine, name_results_dir): + """ + Clears a FabSim result directory on machine. + + Parameters + ---------- + machine : String + Machine name. + + name_results_dir : String + The name of the results directory + + Returns + ------- + None. + + """ + fabsim("clear_results", name_results_dir, machine=machine) diff --git a/tutorials/hyperparameter_tuning_tutorial.ipynb b/tutorials/hyperparameter_tuning_tutorial.ipynb new file mode 100644 index 000000000..5577fc0bb --- /dev/null +++ b/tutorials/hyperparameter_tuning_tutorial.ipynb @@ -0,0 +1,786 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05c43ea9", + "metadata": {}, + "source": [ + "## Tuning the hyperparameters of a neural network using EasyVVUQ and FabSim3\n", + "\n", + "In this tutorial we will use the EasyVVUQ `GridSampler` to perform a grid search on the hyperparameters of a simple Keras neural network model, trained to recognize hand-written digits. This is the famous MNIST data set, of which 4 input features (of size 28 x 28) are show below. These are fed into a standard feed-forward neural network, which will predict the label 0-9.\n", + "\n", + "The (Keras) neural network script is located in `mnist/keras_mnist.template`, which will form the input template for the EasyVVUQ encoder. We will assume you are familiar with the basic EasyVVUQ building blocks. If not, you can look at the [basic tutorial](https://github.com/UCL-CCS/EasyVVUQ/blob/dev/tutorials/basic_tutorial.ipynb)." + ] + }, + { + "cell_type": "markdown", + "id": "83545e38", + "metadata": {}, + "source": [ + "![](mnist/mnist_feats.png)" + ] + }, + { + "cell_type": "markdown", + "id": "bf467821", + "metadata": {}, + "source": [ + "We need EasyVVUQ, TensorFlow and the TensorFlow data sets to execute this tutorial. If you need to install these, uncomment the corresponding line below." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "00f7ba08", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install easyvvuq\n", + "# !pip install tensorflow\n", + "# !pip install tensorflow_datasets" + ] + }, + { + "cell_type": "markdown", + "id": "8d5c7fde", + "metadata": {}, + "source": [ + "### FabSim3\n", + "\n", + "While running on the localhost, we will use the [FabSim3](https://github.com/djgroen/FabSim3) automation toolkit for the data processing workflow, i.e. to move the UQ ensemble to/from the localhost. To connect EasyVVUQ with FabSim3, the [FabUQCampaign](https://github.com/wedeling/FabUQCampaign) plugin must be installed.\n", + "\n", + "The advantage of this construction is that we could offload the ensemble to a remote supercomputer using this same script by simply changing the `MACHINE='localhost'` flag, provided that FabSIm3 is set up on the remote resource.\n", + "\n", + "For an example **without FabSim3**, see XXX.\n", + "\n", + "For now, import the required libraries below. `fabsim3_cmd_api` is an interface with fabSim3 such that the command-line FabSim3 commands can be executed in a Python script. It is stored locally in `fabsim3_cmd_api.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7347053", + "metadata": {}, + "outputs": [], + "source": [ + "import easyvvuq as uq\n", + "import os\n", + "import numpy as np\n", + "\n", + "############################################\n", + "# Import the FabSim3 commandline interface #\n", + "############################################\n", + "import fabsim3_cmd_api as fab" + ] + }, + { + "cell_type": "markdown", + "id": "22672c4c", + "metadata": {}, + "source": [ + "We now set some flags:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "52064503", + "metadata": {}, + "outputs": [], + "source": [ + "# Work directory, where the EasyVVUQ directory will be placed\n", + "WORK_DIR = '/tmp'\n", + "# machine to run ensemble on\n", + "MACHINE = \"localhost\"\n", + "# target output filename generated by the code\n", + "TARGET_FILENAME = 'output.csv'\n", + "# EasyVVUQ campaign name\n", + "CAMPAIGN_NAME = 'grid_test'\n", + "\n", + "# FabSim3 config name\n", + "CONFIG = 'grid_search'\n", + "# Use QCG PilotJob or not\n", + "PILOT_JOB = False" + ] + }, + { + "cell_type": "markdown", + "id": "898a0d57", + "metadata": {}, + "source": [ + "Most of these are self explanatory. Here, `CONFIG` is the name of the script that gets executed for each sample, in this case `grid_search`, which is located in `FabUQCampaign/templates/grid_search`. Its contents are essentially just runs our Python code `hyper_param_tune.py`:" + ] + }, + { + "cell_type": "markdown", + "id": "a229e7b9", + "metadata": {}, + "source": [ + "```\n", + "cd $job_results\n", + "$run_prefix\n", + "\n", + "/usr/bin/env > env.log\n", + "\n", + "python3 hyper_param_tune.py\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5c0c006f", + "metadata": {}, + "source": [ + "Here, `hyper_param_tune` is generated by the EasyVVUQ encoder, see below. The flag `PILOT_JOB` regulates the use of the QCG PilotJob mechanism. If `True`, FabSim will submit the ensemble to the (remote) host as a QCG PilotJob, which essentially means that all invididual jobs of the ensemble will get packaged into a single job allocation, thereby circumventing the limit on the maximum number of simultaneous jobs that is present on many supercomputers. For more info on the QCG PilotJob click [here](https://github.com/vecma-project/QCG-PilotJob). In this example we'll run the samples on the localhost (see `MACHINE`), and hence we set `PILOT_JOB=False`.\n", + "\n", + "As is standard in EasyVVUQ, we now define the parameter space. In this case these are 4 hyperparameters. There is one hidden layer with `n_neurons` neurons, a Dropout layer after the input and hidden layer, with dropout probability `dropout_prob_in` and `dropout_prob_hidden` respectively. We made the `learning_rate` tuneable as well." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6a3a8a82", + "metadata": {}, + "outputs": [], + "source": [ + "params = {}\n", + "params[\"n_neurons\"] = {\"type\":\"integer\", \"default\": 32}\n", + "params[\"dropout_prob_in\"] = {\"type\":\"float\", \"default\": 0.0}\n", + "params[\"dropout_prob_hidden\"] = {\"type\":\"float\", \"default\": 0.0}\n", + "params[\"learning_rate\"] = {\"type\":\"float\", \"default\": 0.001}" + ] + }, + { + "cell_type": "markdown", + "id": "7b41214c", + "metadata": {}, + "source": [ + "These 4 hyperparameter appear as flags in the input template `mnist/keras_mnist.template`. Typically this is generated from an input file used by some simualtion code. In this case however, `mnist/keras_mnist.template` is directly our Python script, with the hyperparameters replaced by flags. For instance:\n", + "\n", + "```python\n", + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dropout($dropout_prob_in),\n", + " tf.keras.layers.Dense($n_neurons, activation='relu'),\n", + " tf.keras.layers.Dropout($dropout_prob_hidden),\n", + " tf.keras.layers.Dense(10)\n", + "])\n", + "```\n", + "\n", + "is simply the neural network construction part with flags for the dropout probabilities and the number of neurons in the hidden layer. The encoder reads the flags and replaces them with numeric values, and it subsequently writes the corresponding `target_filename=hyper_param_tune.py`:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3ed08818", + "metadata": {}, + "outputs": [], + "source": [ + "encoder = uq.encoders.GenericEncoder('./mnist/keras_mnist.template', target_filename='hyper_param_tune.py')" + ] + }, + { + "cell_type": "markdown", + "id": "02644574", + "metadata": {}, + "source": [ + "Now we create the first set of EasyVVUQ `actions` to create separate run directories and to encode the template:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a10d571c", + "metadata": {}, + "outputs": [], + "source": [ + "# actions: create directories and encode input template, placing 1 hyper_param_tune.py file in each directory.\n", + "actions = uq.actions.Actions(\n", + " uq.actions.CreateRunDirectory(root=WORK_DIR, flatten=True),\n", + " uq.actions.Encode(encoder),\n", + ")\n", + "\n", + "# create the EasyVVUQ main campaign object\n", + "campaign = uq.Campaign(\n", + " name=CAMPAIGN_NAME,\n", + " work_dir=WORK_DIR,\n", + ")\n", + "\n", + "# add the param definitions and actions to the campaign\n", + "campaign.add_app(\n", + " name=CAMPAIGN_NAME,\n", + " params=params,\n", + " actions=actions\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bbbba5f8", + "metadata": {}, + "source": [ + "As with the uncertainty-quantification (UQ) samplers, the `vary` is used to select which of the `params` we actually vary. Unlike the UQ samplers we do not specify an input probability distribution. This being a grid search, we simply specify a list of values for each hyperparameter. Parameters not in `vary`, but with a flag in the template, will be given the default value specified in `params`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a3247048", + "metadata": {}, + "outputs": [], + "source": [ + "vary = {\"n_neurons\": [64, 128], \"learning_rate\": [0.005, 0.01, 0.015]}" + ] + }, + { + "cell_type": "markdown", + "id": "612e912c", + "metadata": {}, + "source": [ + "**Note:** we are mixing integer and floats in the `vary` dict. Other data types (string, boolean) can also be used.\n", + "\n", + "The `vary` dict is passed to the `Grid_Sampler`. As can be seen, it created a tensor product of all 1D points specified in `vary`. If a single tensor product is not useful (e.g. because it creates combinations of parameters that do not makes sense), you can also pass a list of different `vary` dicts. For even more flexibility you can also write the required parameter combinations to a CSV file, and pass it to the `CSV_Sampler` instead." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "29e62d09", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 6 points:\n" + ] + }, + { + "data": { + "text/plain": [ + "[array([[64, 0.005],\n", + " [64, 0.01],\n", + " [64, 0.015],\n", + " [128, 0.005],\n", + " [128, 0.01],\n", + " [128, 0.015]], dtype=object)]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an instance of the Grid Sampler\n", + "sampler = uq.sampling.Grid_Sampler(vary)\n", + "\n", + "# Associate the sampler with the campaign\n", + "campaign.set_sampler(sampler)\n", + "\n", + "# print the points\n", + "print(\"There are %d points:\" % (sampler.n_samples()))\n", + "sampler.points" + ] + }, + { + "cell_type": "markdown", + "id": "99c39b4b", + "metadata": {}, + "source": [ + "Run the `actions` (create directories with `hyper_param_tune.py` files in it)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2095968a", + "metadata": {}, + "outputs": [], + "source": [ + "###############################\n", + "# execute the defined actions #\n", + "###############################\n", + "\n", + "campaign.execute().collate()" + ] + }, + { + "cell_type": "markdown", + "id": "b149dd32", + "metadata": {}, + "source": [ + "To run the ensemble, execute:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7b9cb1b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost run_uq_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,script=grid_search,skip=0,PJ=False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 15:38:16.535325: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:16.686716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:16.686745: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:38:17.563526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:17.563594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:17.563603: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:38:19.173964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:19.173991: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:38:19.174006: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:38:19.174233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:30.534364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:30.678101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:30.678126: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:38:31.523915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:31.523979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:31.523987: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:38:33.109661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:33.109685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:38:33.109702: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:38:33.109951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:46.244240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:46.391594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:46.391621: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:38:47.261374: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:47.261439: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:47.261448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:38:48.916947: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:48.916972: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:38:48.916988: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:38:48.917225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:04.143172: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:04.290000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:04.290025: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:39:05.206811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:05.206880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:05.206889: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 15:39:06.943849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:06.943870: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:39:06.943886: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:39:06.944141: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:20.118667: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:20.263838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:20.263863: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:39:21.105383: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:21.105447: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:21.105456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:39:22.695205: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:22.695230: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:39:22.695246: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:39:22.695475: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:37.156612: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:37.302893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:37.302914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:39:38.146639: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:38.146704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:38.146713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:39:39.743663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:39.743685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:39:39.743701: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:39:39.743931: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "###################################################\n", + "# run the UQ ensemble using the FabSim3 interface #\n", + "###################################################\n", + "\n", + "fab.run_uq_ensemble(CONFIG, campaign.campaign_dir, script='grid_search',\n", + " machine=MACHINE, PJ=PILOT_JOB)\n", + "\n", + "# wait for job to complete\n", + "fab.wait(machine=MACHINE)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9d2c0ddb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost fetch_results\n", + "Executing fabsim localhost verify_last_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,target_filename=output.csv,machine=localhost\n" + ] + } + ], + "source": [ + "# check if all output files are retrieved from the remote machine, returns a Boolean flag\n", + "all_good = fab.verify(CONFIG, campaign.campaign_dir, TARGET_FILENAME, machine=MACHINE)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c2b9838b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost get_uq_samples:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,number_of_samples=6,skip=0\n" + ] + } + ], + "source": [ + "if all_good:\n", + " # copy the results from the FabSim results dir to the EasyVVUQ results dir\n", + " fab.get_uq_samples(CONFIG, campaign.campaign_dir, sampler.n_samples(), machine=MACHINE)\n", + "else:\n", + " print(\"Not all samples executed correctly\")\n", + " import sys\n", + " sys.exit()" + ] + }, + { + "cell_type": "markdown", + "id": "907c295d", + "metadata": {}, + "source": [ + "Briely:\n", + "\n", + "* `fab.run_uq_ensemble`: this command submits the ensemble to the (remote) host for execution. Under the hood it uses the FabSim3 `campaign2ensemble` subroutine to copy the run directories from `WORK_DIR` to the FabSim3 `SWEEP` directory, located in `config_files/grid_search/SWEEP`. From there the ensemble will be sent to the (remote) host.\n", + "* `fab.wait`: this will check every minute on the status of the jobs on the remote host, and sleep otherwise, halting further execution of the script. On the localhost this command doesn't do anything.\n", + "* `fab.verify`: this will execute the `verify_last_ensemble` subroutine to see if the output file `target_filename` for each run in the `SWEEP` directory is present in the corresponding FabSim3 results directory. Returns a boolean flag. `fab.verify` will also call the FabSim `fetch_results` method, which actually retreives the results from the (remote) host. So, if you want to just get the results without verifying the presence of output files, call `fab.fetch_results(machine=MACHINE)` instead. However, if something went wrong on the (remote) host, this will cause an error later on since not all required output files will be transfered on the EasyVVUQ `WORK_DIR`.\n", + "* `fab.get_uq_samples`: copies the samples from the (local) FabSim results directory to the (local) EasyVVUQ campaign directory. It will not delete the results from the FabSim results directory. If you want to save space, you can delete the results on the FabSim side (see `results` directory in your FabSim home directory). You can also call `fab.clear_results(machine, name_results_dir)` to remove a specific FabSim results directory on a given machine.\n", + "\n", + "#### Error handling\n", + "\n", + "If `all_good == False` something went wrong on the (remote) host, and `sys.exit()` is called in our example, giving you the opportunity of investigating what went wrong. It can happen that a (small) number of jobs did not get executed on the remote host for some reason, whereas (most) jobs did execute succesfully. In this case simply resubmitting the failed jobs could be an option:\n", + "\n", + "```python\n", + "fab.remove_succesful_runs(CONFIG, campaign.campaign_dir)\n", + "fab.resubmit_previous_ensemble(CONFIG, 'grid_search')\n", + "```\n", + "\n", + "The first command removes all succesful run directories from the `SWEEP` dir for which the output file `TARGET_FILENAME` has been found. For this to work, `fab.verify` must have been called. Then, `fab.resubmit_previous_ensemble` simply resubmits the runs that are present in the `SWEEP` directory, which by now only contains the failed runs. After the jobs have finished, call `fab.verify` again to see if now `TARGET_FILENAME` is present in the results directory, for every run in the `SWEEP` dir.\n", + "\n", + "Once we are sure we have all required output files, the role of FabSim is over, and we proceed with decoding the output files. In this case, our Python script wrote the training and test accuracy to a CSV file, hence we use the `SimpleCSV` decoder. \n", + "\n", + "**Note**: It is also possible to use a more flexible HDF5 format, by using `uq.decoders.HDF5` instead." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0b55725a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
run_iditerationn_neuronslearning_ratedropout_prob_indropout_prob_hiddenaccuracy_trainaccuracy_test
00000000
010640.0050.00.00.9585000.9543
120640.0100.00.00.9731830.9656
230640.0150.00.00.9784500.9715
3401280.0050.00.00.9632830.9599
4501280.0100.00.00.9774670.9710
5601280.0150.00.00.9846000.9745
\n", + "
" + ], + "text/plain": [ + " run_id iteration n_neurons learning_rate dropout_prob_in \\\n", + " 0 0 0 0 0 \n", + "0 1 0 64 0.005 0.0 \n", + "1 2 0 64 0.010 0.0 \n", + "2 3 0 64 0.015 0.0 \n", + "3 4 0 128 0.005 0.0 \n", + "4 5 0 128 0.010 0.0 \n", + "5 6 0 128 0.015 0.0 \n", + "\n", + " dropout_prob_hidden accuracy_train accuracy_test \n", + " 0 0 0 \n", + "0 0.0 0.958500 0.9543 \n", + "1 0.0 0.973183 0.9656 \n", + "2 0.0 0.978450 0.9715 \n", + "3 0.0 0.963283 0.9599 \n", + "4 0.0 0.977467 0.9710 \n", + "5 0.0 0.984600 0.9745 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#############################################\n", + "# All output files are present, decode them #\n", + "#############################################\n", + "output_columns = [\"accuracy_train\", \"accuracy_test\"]\n", + "\n", + "decoder = uq.decoders.SimpleCSV(\n", + " target_filename=TARGET_FILENAME,\n", + " output_columns=output_columns)\n", + "\n", + "actions = uq.actions.Actions(\n", + " uq.actions.Decode(decoder),\n", + ")\n", + "\n", + "campaign.replace_actions(CAMPAIGN_NAME, actions)\n", + "\n", + "###########################\n", + "# Execute decoding action #\n", + "###########################\n", + "\n", + "campaign.execute().collate()\n", + "\n", + "data_frame = campaign.get_collation_result()\n", + "data_frame" + ] + }, + { + "cell_type": "markdown", + "id": "e1e62a8c", + "metadata": {}, + "source": [ + "Display the hyperparameters with the maximum test accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "99ba74e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparameters with 97.45% test accuracy:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_neuronslearning_rate
00
51280.015
\n", + "
" + ], + "text/plain": [ + " n_neurons learning_rate\n", + " 0 0\n", + "5 128 0.015" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"Best hyperparameters with %.2f%% test accuracy:\" % (data_frame['accuracy_test'].max().values * 100,))\n", + "data_frame.loc[data_frame['accuracy_test'].idxmax()][vary.keys()]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a9647a6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/mnist/keras_mnist.template b/tutorials/mnist/keras_mnist.template new file mode 100644 index 000000000..4c943a1ee --- /dev/null +++ b/tutorials/mnist/keras_mnist.template @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[]: + +import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds + +# In[2]: Load MNIST data + +(ds_train, ds_test), ds_info = tfds.load( + 'mnist', + split=['train', 'test'], + shuffle_files=True, + as_supervised=True, + with_info=True, +) + +# In[]: Normalize training features to values within [0, 1] + +def normalize_img(image, label): + """Normalizes images: `uint8` -> `float32`.""" + return tf.cast(image, tf.float32) / 255., label + +ds_train = ds_train.map( + normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) +ds_train = ds_train.cache() +ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples) +ds_train = ds_train.batch(128) +ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) + + +# In[]: Normalize testing features to values within [0, 1] + +ds_test = ds_test.map( + normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) +ds_test = ds_test.batch(128) +ds_test = ds_test.cache() +ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE) + +# In[]: Train an ANN + +model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dropout($dropout_prob_in), + tf.keras.layers.Dense($n_neurons, activation='relu'), + tf.keras.layers.Dropout($dropout_prob_hidden), + tf.keras.layers.Dense(10) +]) + +model.compile( + optimizer=tf.keras.optimizers.SGD(learning_rate=$learning_rate, momentum=0.95), + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], +) + +model.fit( + ds_train, + epochs=6, + validation_data=ds_test, +) + +# In[] Store training and test accuracy + +_, accuracy_train = model.evaluate(ds_train) +_, accuracy_test = model.evaluate(ds_test) + +np.savetxt('output.csv', np.array([accuracy_train, accuracy_test]).reshape([1, 2]), + header = "accuracy_train,accuracy_test", delimiter=",", + comments="") diff --git a/tutorials/mnist/mnist_feats.png b/tutorials/mnist/mnist_feats.png new file mode 100644 index 0000000000000000000000000000000000000000..dac3e36081e88f296ca46b84643a9f802eddf87b GIT binary patch literal 13225 zcmb_@c{r2{`~O2yX*-q5u0;vicS3cFBxK*08nR6GeeCUN5EaUnDBD|0?}NVXZ- zvt}k_-}&9cIp^)Y&ilTd>-v5FsIJR+o_Uu0zCYXD{mW`fI~iCQ5Cqw&eBqoXg3tya z2u<%+diaTWW9I<;Bkg)#-&M=unybeZXG=u&imT&w2iNO1R}Z;cI=k37*oz8?3y7XP zWbNwe=prL1X!qAI2sk)f3D)x!Ho-}@IbJYuL6F^7sDEfu6q0QaMD&32xii|HF{54n zj#@KuYZH11D?^@>caE~?WBaUpV|GV_g~LJ1Wlae#kzr;brq@a;y4(genVt$z+G-09 z4CV#7FcbF__H`OLW*L8Zy5B;S!5) zyCbn};S{H+)zDChOL+d>X+ak-w`?l z$=*Tp8-i4b{QrH>%-6b;tAmnNo~dW*b>6BWuJ!qun{S*4F9tJZa%m21jDqLS5XF?lzT5|14doXkLJ7<^eqG|ocA^lRhRqw-& zio1ThoMBojlRFmYJ=WNgs->f)H5J>j<>r|#ry6z!I@*u*^`oD31K;drxUH?PpYq_K z;K%-nq@q}CHI{a7C6;^&twxiN$B%qY_#HF*#VU~)84+PS-d)O$!J?D3on2g*Gqd+5 z?=Rjh%241{mlE>&I1(r8p{AE<97du98;;LlTHD%~T%sMNaE?-Jh3pKsi||Uw>{UgY z3&$z)JYN09n`)e}<4m@;wgVHMM^vs{xgugnXD)n=F~B1GI5JBismpoKUY}DdN=;4e zk{v_y^6~B8L4Q=`&~FzM(~*-^SlWk|h;$V=+{vA+5|`ZE+%zZ@GFi;FB_*n(plj^C zmCgL6odIJTzvn}LIh>c5ce-kyy7l~zub3Zix8SeUv&72#8cq$>tGJ<&3OJvb?&gXb z4)06D4e_Myf+f#iTQ%7J=zZIsYxy83C`hx51rbU_xPo;^G$jTJmoIRHD!ny-sKuYn zkhNm8X-zGAf7Z&U*F&NKP+#-({}(m&i}Jjbr7a1CTfbQE;D^GM&Jc}+N#kE$b0-WJ zVP?{Dh>1#9Yjjq>a<0DB=i}uKDqL+;;uN=U&Ufg!wHP2LMGJjl&P;m{qdQZG8< zFhYwU`gf3@KEM_3xTK@=BtpQ11^J#&(I{*dDL&5l@+P{nk~OFtp#)8cxy&4J+zE4vD>qMCu+2Gu3IHVS6iFW zWu@+^XmAG?CUwhm(I_o;N&8MI)M%|>`SK(9nm*G!t zxE#uIZTYfFa(-A+eY7MiS}BB0nCR^6ycZ*%#0~8%Xj-B_Gc#jrZT(q^(>LjcuD2iQi z%l3dVw+_4fV{c4MO-<`&LKRjEoT9ao)tJ$(t*wOi2ZCkRxTgY7k10z_OA}a{_9^`% z(1;8?bI}KGfzaOcQsq(GeKD`efh2iO#Fa1{@6X|~(&12U*LfV;p>JWSe9apbtWs)m zAms0@_j6AkN743XniPv!eq>x&@hdbIuxo$AlQt$5l9rY>Ft_w1Ha26TzlPG5gG#kI z5a0%7-=s`1rhoPF<@w+5>}c#PbP-1puTfAKqfgrXl@I=vTFBu?qg(1@#g9Mp(T6Qog?^(S5Yt5_gQ; zSN%Ykd=D8BC)1cYiem74XxQ{(8n&ZCE%Pe zOzPVPyw<7G=+%64{kX<1n$Xv&zrF{CYo z*=_-8EzZ(`5{U$5Sdv1rlcT3>|3#x2{5WuSesH{Mc?-$ zj_c<6udQn52Z+_|ISt4Cigi_5nm^p@J^NjmJSDgMqms~Wm){jprG{{k{a1(mvG}fH z504QS7@P$C+S*!CqTfQT84FrIzCjc;>s4TH%?|4lv&2v5|z@$f50AdxD2X}TbzksW6Z`}7oYSz74|YaTj)OfrtED{f+Ci6 zJiX(SZK%-mt8=|?Z;JcOJHlm-tjzV0rbn7l_FB-Nz_Z?uUOrJ3u=z8!-ormULp&32 z+@PiYV%7b9gr$Nm`@*gTW29VMH0iJXI{_*W;AbmfM`^|t3RF)`Zs9Ti7l=!|f|FrT03@8@7&{8734QQJ=B913_b zH_?xt3T3)~7b2GkKHKOj+6Da=YHx8g4>~iW`w79K#QEdL0<@PFDOthzh=>Cy zyVf+FSiek#rPm6gM4%zvY6=`8yMIxJmH7c-d)Mat*CNRWkVC1&OcOM2WqDX(ylEKI zA1sL#FYoDj*>KzFA0yyf{%WTf?R=!d8O9``9grdz?-?1Wd=9u|&g}c6p{^q?ho>BW z0o}}d5VWnW*ww17g98I66co$=n_B_r636=g1M=~ZnF8#9pfo8f0`irWnGOyPvIt+j zC16&DJEjtm^5%_pywBu)&{2HKhGjm5x;Xs=Jep+NTM=lOcm3&c)n`flHKBam+}zb` zo4=iithK~>m3DKv)4Hl1|CE}pm&JlM%DXO1G&D5C;%%B=F{9-$QK1G^l&e8^6k4=+=}z^bQcyDWL&>JB!sZ@ z)i_aABELcIx#nc`NUjW{d^OW7Jqjs{q@^zRBq)zY{}zy}Sc@v2Ue?Vw&nRm`33H_; zB%Fcq#>>wi+*vc~js}2fFZOV_h@mr=9AXU6{&??b0Lg_-3HpXb4)cb7H^)h3+w0dK zx2!TIkSMLd!ms#%F#AiJC7_VJku+%#e-Bfzva|)}Z)w@(Fxus7E8`ul_RHAY{ZdW} z9vXi@EW`Hg#U*CQ>|aMP1Z+Pj&@xmXjYrk&ni=m=$nBMZ-LxT>nEQ@PaTe_`*O z*scl|6X|to{$vl$XD2p({p`~KshKb|GyD4bQZPJ5OVbIR8XW(<-OSA{98zAH>Od6O7j+{ih@eb|D;c zwm6vWU9ReqR7Dhw2Qn)M&=-0AzDH{0Sd`i6+<(T$9hsSus+!dm>~O$jKj_Ok_6Fp* zk98JC+iVL691#%`wfZDxzb}ja(9D0mhvP?S%~e%{6|qbSF)?g|!^3EDODa%V&lcaI zx~E5h_~B3esMz$=Sn%0i@V~|aYhlm8hetX_My!RgfQ~`#hf~$#&^Drh%_=F2nzOPR)@uZ?HF51CEzYrla{s3Bu_nNvk+LrVC%FN(2(^Q({^&2@UVATUG~~cO9K$hG~VxC9GrZY=DBDQN{vdjd5?x55s{G; zkMgBhVwPDJyjyVz2{0ZsGBPs4li!w6*>Cn1@WtiU+9}(KklZ zMt!U>E+bC^g{qsuf{hbXOO*|)kSv$BFHHJX}7Agp#PEDI@jA*a1Jx>Sv36M{VN|5=^=n`82e4?f(B zzo8O=Rv2VtxUJc_?V&Nrbc*>$^H=6z=FLXJi87%`+qU}p`tX*ooY%I9sJghkKE%8-Xrbc?AyEa1`Bny%+IRL2acm{M410C%wA=&8D)c@8q%ZsU}EpXA#o74j@P zdT?&3HE~Nko3b9M0U+&(M%LTRiq(4&%1bwyTfNw5zTe`I!=wO959f-kJ*Et{VCmqR zW6|EF#L`P&xJ%O2{7DTd6w8n3?DTAf7nUDa%@y5t2{Zrfm+l*IFP;JKrFPUkx#-E8 zidE}S-y7Qf;!MFSdxBT4NS4uot7OjnKNRlq-Wiga9!!1A+*rny$~!i5V_m8CJT{;+ zU-D?ZdOto_JLn9EtE%Bv?*3Sl`m^)CGI(@c!HZ-383|v zj9Xd=rvir?XUFWercEwE(#M1o%@vXFN#GSyMV4W%#r<8J@;G4>Aq9uKl&l`F=7v^o zY-)OO`1d>C@Fdd*24;US1d^$@>Xuz>4;zM?lawDFDc5*rL#9x4OieSP3DPq&)glGW zbZu->ffXE?%dv|;G!vy&mx_+k5OV&^GuPu^F6z)-lH%sk>f$NsE8_Hyg$*75xp(3=PvjAiqjUpKkp8`8k0=NLhiF5(mc!J*zIn^q321kn+{| zLkm$_6JJ&(KVaX9(^aTRd5pzm8W$b`>5T9G!`Cx;8H!%2Ag%zp0<%;MGi|+-wC9AhLHp^h9Q@}}T&b&sEKSTcU>_G!z zr4suNhIiYxmBksYhBQa*6nZ0E*4D1wX*$8{?FjWZ1S-e@sbtG1%_aEUTV9zopC z>gq<^*~$8gve_AC0Btj0{z?p5*KuB6gf%-$ky43mWOVJjJZ*OT;#A<73Byh)ho~^vQn{;!xe2O@l_zk4TGf4;2 zG$WND7%d|J0AU?FckYyN9=JC-IVomdf&5n%aofZresa>8>SK(T&c%V%c4Yj897kyG z>(_(Mg}4sx3vxq@E@YN#j9AT?nSP@$X*%gKFJ2ts=HshnmT|QnpiE1EV{`XtYL^J8 zhZ?7{!la6+28Ho?A9)~kvZ$I6H+e8NTO_)Iua0h4Rs$|QAfGq7FKG*$_oEZkmXv_ge#WrS`6YL5ux3c-Z2fOSzx6-jGpFBwv2t55fdhOyU0ekgIVc~7&RwscPCpLqt z1MkCFWL#5pvrO3yJR0QYtbq!EZe92^dh_k`v4Z|xfHs3eL+)1TrwzB=KFfRZWJ~pf zgM$;EnqWx+le~XL(Jf9qIiRDx-Syk&!*WaCUu#fQR8;UD|Hsat4RD}F6pz5i zJ(l|7#aZ=J6SwZ{=F9}^=`xy#YaD4#7Kd@9ollQ|5LHI$AU?nYTmg+sf4ti+<2{Q5 zrGVo%%1cQ}InkOygTN&>>ASI4MDJf`6^uLR3#({x@6c<+it>HetI_3CxCLt2A!TQf z|FbCV7qQ2$;yOr$($aVfjWbiKho@c^@gw)B#4W-!)&9^^`kgFE3d>)2hjjo8x^zVn z`OE>dTtt6A;ypvIF1A5zf`L6SgQI;?nOiO9A(Se>5{Lh?lke)naH{<_uAA_H z!Cp_z@5ZQX+On%9es!7~p4z2E#o(qKRLykhxUb-l#8*fCuZ~jAvWyJJ(oK*9j`;w?f zgX!TGr;{9zMap+DqvjGaPJOo~3h?aKC}9+K&S~A% zRQ}G89|YykissYB2@!!tS>kpw3H4qxJWIbwJ+w#~W~eApF_a_rGb+>IH9^04X{zE=x=33^r8Rl z8_rYV?Mz3{Dp9|9Wws+24;E>swmKry;ObkYXXe{l08-}Sx579KnFm6Gm!{Ob2Dg(< zN+`iJ!1KR~iGQPT-~<-c0}Btvzj}2XWe4M|t1+YzPQKq|1zf1$4!WCXPK$m223i#vs5KzuzoG- zW&%%F6sTUgl8_@|@%}f|&eL;J0BAYH_I4JR74Q`-JBJ+BW*vg@rloajq=g!V)kggX zl6S#V8GZOYsaf-y}A_EGagwx zXge3o`=9w5f~y3lK}}>xM2MfCpGCyt-q5GmbY{_OZ_)+L%8a2bKeyx2;*Pzisa^Hx zxM~yV?1lCvRha4b3*{SC7?(-@E0H-8KJkv+uSGGNIQXQRzr1%bZhRn^q?n z#>@N4ta!fnsY{uDZQXcey&m?u)v$Xop(g}3O-1@IL<*C*Wl+WaS)8Htn{H=7ifCal%79j=m=F^aOMy_7{I1GA z-v|u3=I&B&tv2TMsMw6aMuSZT%pGMt*rgNv)zj245zit)^ zle|xH^h-FDujJ*-f{n7|3jekP9AWhkaM-wxmgDG&{)(wktMo*i&MfBrTXHKL>jR&icK z%=o-KTlnT8$FDaOVg7*ULiD8F5=rsD{Zerjk%`O99pXsU($Sfd)~`}o#%NPxqh zpF%wFRq220;_E)93;Gbca&@lUp_4Ejoe~bAods%CVYOcYi(g#CphgRa<3x#IB|=bQ zXP4o`Dgft<+`Rn&6MD;T4s4-(_e&JK4TqxSPxC-sg7N1BP~t8_bX${mUuq+6W-wLqN{1dD; zrl;QnNq{JEb-qS{1-*%W$Gt+o?h-G_E-foId7n0Yw4I$e)fQuR@rBJ0hazs|Lyqf+ zCTbK&v@pq`E&FXjJ#}1e;09#4p*eZpbM~lFs*~T&?$No&WrfwDFaZe$936-1GAMoD|`V7R3 zo`yzPxBr?iq^ol4wA0DobSqNC02d)*;KneIfgl1DUC`3fgMViV5z`@eR<|{?0<sVR6OkK|e3M{8?v$fzQ3Gi?J6)eAo9> zsMhGU`e&9nNW8j$YS^nz2+-_@<%|2Pic?r-Q7m3vVWmZ%Rm9@L*@i7umA}Y3fAE_p z#b%YSw(CcWTB)54Exi6Em#&z`JW+P6;An#3DQceiV2;1nQ%@M#Tm6fZZR}i7gH!ji zMKT`>JhFQIrS7>hE8NWYFH^5Dv;iU3_VyA)FxTy=YZvPTPO^31Z`*&732m_OOVlg{ z3_+?tFTeC%6jCvCONs_&VK8#0l5>8d zaS+<6ImTy{u8s~fx)ftsrNW*sH1wFg&jkExuso~uDUj>F7)2DBo#?VR{8=hJ1pD6= zIL?!Zh_sh4k2%NxvATd6#gklnD`*yc| zneh8vG57EKV5~mj)3f~$e(agNZxNSGSCQK!rWA;q?>nU!mw`hOS)eW<1whHVi+Su9y8z@pmkyZpsypQVQ22(lIns zfLoFCAC@ZhDh&%96C~_AQ@}D!D=3gkGc`6o8Q+jxR3t;ijPN92e61uBWiZlA?R+hf zny9yf_p%pLSH4OqS9`<$yhAwodoRSJ2gWD$PUl*lLrw|`l7JG;bbF8UKx{VY#r#jA zmrdC;E*nuu0cOTlxe21sL?VeuwEYw#9iC(r=cNcm{7k{$Z-6$vBhOAtO-(Jlc(eIY zkOi*cduW8Z56__?DTnZ+Z`F_vaXq;Hfm07HJ%XI}`8hm&v?rdXDr0zZuy&iB$LJ(e z&h%K9Y@<8!T*U`=8VQD>9BeLU7JFNN+IRb}mIrk^;A(9I|28{GkFidHF*>B^6D+4_ zblL*X~VyVnquominfZC&D(<|ftJ^HdxL3?Fs`ucyt7)1JvDj@A9o3s{J4?h4uWMsoYvccMtjQ5s0%MX3%Rh6)7sRu*R32# z9p(F%*I~dp_FEjC}_L#DOUDLe0&yc4v;@^$78)&G{b=#Z|oVEB=G%_JqRE&A&Egj48a_7h7h@j z-~<6OAsfjU?Z~I-l`lTo#U}L}qE=qEU99Wr1xF}%F5`g%$NLp%PT!$y#F{4}`=!*< zqQ6}PB8maz$HR(lffqpq8b^J&d}1f-AJ95?%hlwH3tq%lLiVYD{Q_l@XwSFnn03FfPsM_{BuGq z_>94L*sxqvyUKl<49$62Q}Zeg9*Q6=a7|cNSs}0`GgI8Nq6Yb>9&8@7u(DFOa-}zW zjWiwOhjl=Z`ON^`Z-ApvY1N4(dn<$V$*&~lORBp>=^XA4D?;#*zFxsHNm42cPs8+q<#;H4&5uMVww zM3ykGz}^^*(qJEUzE{icK10@tm3C2tojx!lb>TJB1|kZlG90>nb0Fbpl6PWLf&gZ6 zX@%T)Q#~XponrO4cJN~g$z0Fe|6Fa-3T(!m3V|;Q<&bNE&QcDq9Sr9JbFq8UU&`eE zN~7tk@B6xSlFKPpEi)p6;%z!@oQ)!u{z8c|ccXUvymGq@k~|F9N`vmjT3r_n&Hdlh z$e&4@V8Ier?-5qBSeKR~dh!(tjF0L1Jom2bS~#*G#zJxtJ_%$?t&kK+N+oT};sBvaG>|}re5Vq8or~VLA$1~I- zUUz~?`rG5B@$#OhV9U1@6_1y7c)_&RYSAEHa0dKhD)fan2s?Yyf~C`%7F}S=40@j8 z7$%jkx-rOHDg(<#DO}6UY+P#B5?$)sn8gDwyey&&BuBdJeXvbHwReG2C}x0*(n{6p zEjo+V>pO?2mR0b_SS0OZ#Fx04c3cbx|DP*I{h<#vLs)|h;VTQRId{z8w%ns$WH!~{ zea$JWP&q#>T-^7)F@SR-L0_!UD<=3px+6{gf^_kNi5c<(ujqwx*g%DQ9mx3?d(g}wE5$PO0g#yJff zrsU7q3MKcwo^c3gzRkJ^kv+`ErvhsLS|%nYEsKEg!7Spu=ON#LRcTnFPFZOt5??JY zE@HbBLi(*Czo8-tb&+fT!GlVzcAeZ8AZ_x=TTvBBG=Im_t41X_`_xY*e674odAo(P z1q8e6;7`pLV@Vs$UCLTb7@*X^wiofziP2fXvQqrznGB>f+)= z#&y_fQUi_i=*g489qck Date: Thu, 9 Feb 2023 12:51:43 +0100 Subject: [PATCH 4/7] added tutorial without FabSim --- .../hyperparameter_tuning_tutorial.ipynb | 597 +++++++------ ...arameter_tuning_tutorial_with_fabsim.ipynb | 836 ++++++++++++++++++ 2 files changed, 1149 insertions(+), 284 deletions(-) create mode 100644 tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb diff --git a/tutorials/hyperparameter_tuning_tutorial.ipynb b/tutorials/hyperparameter_tuning_tutorial.ipynb index 5577fc0bb..081a99391 100644 --- a/tutorials/hyperparameter_tuning_tutorial.ipynb +++ b/tutorials/hyperparameter_tuning_tutorial.ipynb @@ -5,10 +5,12 @@ "id": "05c43ea9", "metadata": {}, "source": [ - "## Tuning the hyperparameters of a neural network using EasyVVUQ and FabSim3\n", + "## Tuning the hyperparameters of a neural network using EasyVVUQ\n", "\n", "In this tutorial we will use the EasyVVUQ `GridSampler` to perform a grid search on the hyperparameters of a simple Keras neural network model, trained to recognize hand-written digits. This is the famous MNIST data set, of which 4 input features (of size 28 x 28) are show below. These are fed into a standard feed-forward neural network, which will predict the label 0-9.\n", "\n", + "**Note**: This tutorial always runs on the localhost. One possibility of performing the grid search on a remote supercomputer involves the use of FabSim, see the `hyperparameter_tuning_tutorial_with_fabsim.ipynb` notebook tutorial.\n", + "\n", "The (Keras) neural network script is located in `mnist/keras_mnist.template`, which will form the input template for the EasyVVUQ encoder. We will assume you are familiar with the basic EasyVVUQ building blocks. If not, you can look at the [basic tutorial](https://github.com/UCL-CCS/EasyVVUQ/blob/dev/tutorials/basic_tutorial.ipynb)." ] }, @@ -30,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "id": "00f7ba08", "metadata": {}, "outputs": [], @@ -40,25 +42,9 @@ "# !pip install tensorflow_datasets" ] }, - { - "cell_type": "markdown", - "id": "8d5c7fde", - "metadata": {}, - "source": [ - "### FabSim3\n", - "\n", - "While running on the localhost, we will use the [FabSim3](https://github.com/djgroen/FabSim3) automation toolkit for the data processing workflow, i.e. to move the UQ ensemble to/from the localhost. To connect EasyVVUQ with FabSim3, the [FabUQCampaign](https://github.com/wedeling/FabUQCampaign) plugin must be installed.\n", - "\n", - "The advantage of this construction is that we could offload the ensemble to a remote supercomputer using this same script by simply changing the `MACHINE='localhost'` flag, provided that FabSIm3 is set up on the remote resource.\n", - "\n", - "For an example **without FabSim3**, see XXX.\n", - "\n", - "For now, import the required libraries below. `fabsim3_cmd_api` is an interface with fabSim3 such that the command-line FabSim3 commands can be executed in a Python script. It is stored locally in `fabsim3_cmd_api.py`." - ] - }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 13, "id": "e7347053", "metadata": {}, "outputs": [], @@ -66,11 +52,7 @@ "import easyvvuq as uq\n", "import os\n", "import numpy as np\n", - "\n", - "############################################\n", - "# Import the FabSim3 commandline interface #\n", - "############################################\n", - "import fabsim3_cmd_api as fab" + "from easyvvuq.actions import CreateRunDirectory, Encode, Decode, ExecuteLocal, Actions" ] }, { @@ -83,62 +65,30 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "id": "52064503", "metadata": {}, "outputs": [], "source": [ "# Work directory, where the EasyVVUQ directory will be placed\n", "WORK_DIR = '/tmp'\n", - "# machine to run ensemble on\n", - "MACHINE = \"localhost\"\n", "# target output filename generated by the code\n", "TARGET_FILENAME = 'output.csv'\n", "# EasyVVUQ campaign name\n", - "CAMPAIGN_NAME = 'grid_test'\n", - "\n", - "# FabSim3 config name\n", - "CONFIG = 'grid_search'\n", - "# Use QCG PilotJob or not\n", - "PILOT_JOB = False" - ] - }, - { - "cell_type": "markdown", - "id": "898a0d57", - "metadata": {}, - "source": [ - "Most of these are self explanatory. Here, `CONFIG` is the name of the script that gets executed for each sample, in this case `grid_search`, which is located in `FabUQCampaign/templates/grid_search`. Its contents are essentially just runs our Python code `hyper_param_tune.py`:" - ] - }, - { - "cell_type": "markdown", - "id": "a229e7b9", - "metadata": {}, - "source": [ - "```\n", - "cd $job_results\n", - "$run_prefix\n", - "\n", - "/usr/bin/env > env.log\n", - "\n", - "python3 hyper_param_tune.py\n", - "```" + "CAMPAIGN_NAME = 'grid_test'" ] }, { "cell_type": "markdown", - "id": "5c0c006f", + "id": "3cae997f", "metadata": {}, "source": [ - "Here, `hyper_param_tune` is generated by the EasyVVUQ encoder, see below. The flag `PILOT_JOB` regulates the use of the QCG PilotJob mechanism. If `True`, FabSim will submit the ensemble to the (remote) host as a QCG PilotJob, which essentially means that all invididual jobs of the ensemble will get packaged into a single job allocation, thereby circumventing the limit on the maximum number of simultaneous jobs that is present on many supercomputers. For more info on the QCG PilotJob click [here](https://github.com/vecma-project/QCG-PilotJob). In this example we'll run the samples on the localhost (see `MACHINE`), and hence we set `PILOT_JOB=False`.\n", - "\n", "As is standard in EasyVVUQ, we now define the parameter space. In this case these are 4 hyperparameters. There is one hidden layer with `n_neurons` neurons, a Dropout layer after the input and hidden layer, with dropout probability `dropout_prob_in` and `dropout_prob_hidden` respectively. We made the `learning_rate` tuneable as well." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "id": "6a3a8a82", "metadata": {}, "outputs": [], @@ -172,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "id": "3ed08818", "metadata": {}, "outputs": [], @@ -185,21 +135,25 @@ "id": "02644574", "metadata": {}, "source": [ - "Now we create the first set of EasyVVUQ `actions` to create separate run directories and to encode the template:" + "What follows are standard steps in setting up an EasyVVUQ Campaign" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "id": "a10d571c", "metadata": {}, "outputs": [], "source": [ - "# actions: create directories and encode input template, placing 1 hyper_param_tune.py file in each directory.\n", - "actions = uq.actions.Actions(\n", - " uq.actions.CreateRunDirectory(root=WORK_DIR, flatten=True),\n", - " uq.actions.Encode(encoder),\n", - ")\n", + "# execute the runs locally\n", + "execute = ExecuteLocal('python3 hyper_param_tune.py')\n", + "\n", + "# decode the output CSV files, with stored training and test accuracy values\n", + "output_columns = [\"accuracy_train\", \"accuracy_test\"]\n", + "decoder = uq.decoders.SimpleCSV(target_filename=TARGET_FILENAME, output_columns=output_columns)\n", + "\n", + "# actions are 1) create run dirs, 2) encode input template, 3) execute runs, 4) decode output files\n", + "actions = Actions(CreateRunDirectory(root='/tmp', flatten=True), Encode(encoder), execute, Decode(decoder))\n", "\n", "# create the EasyVVUQ main campaign object\n", "campaign = uq.Campaign(\n", @@ -225,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "id": "a3247048", "metadata": {}, "outputs": [], @@ -245,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "id": "29e62d09", "metadata": {}, "outputs": [ @@ -267,7 +221,7 @@ " [128, 0.015]], dtype=object)]" ] }, - "execution_count": 8, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -294,221 +248,317 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "id": "2095968a", "metadata": {}, - "outputs": [], - "source": [ - "###############################\n", - "# execute the defined actions #\n", - "###############################\n", - "\n", - "campaign.execute().collate()" - ] - }, - { - "cell_type": "markdown", - "id": "b149dd32", - "metadata": {}, - "source": [ - "To run the ensemble, execute:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7b9cb1b8", - "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Executing fabsim localhost run_uq_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,script=grid_search,skip=0,PJ=False\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-02-08 15:38:16.535325: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:16.686716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:16.686745: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:38:17.563526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:17.563594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:17.563603: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:38:19.173964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:19.173991: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:38:19.174006: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:38:19.174233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:44.946331: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:30.534364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:44.955703: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:30.678101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:30.678126: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:38:31.523915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:31.523979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:31.523987: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:38:33.109661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:33.109685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:38:33.109702: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:38:33.109951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:44.992926: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:46.244240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:45.009895: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:46.391594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:46.391621: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:38:47.261374: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:47.261439: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:47.261448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:38:48.916947: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:48.916972: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:38:48.916988: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:38:48.917225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:45.015426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:04.143172: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:45.026090: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:04.290000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:04.290025: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:39:05.206811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:05.206880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:05.206889: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + "2023-02-08 16:03:45.152547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:45.152724: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 16:03:45.185859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:45.185998: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 16:03:45.191458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:45.191592: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 16:03:45.207372: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:45.207499: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 16:03:45.223128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:45.223246: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 16:03:45.239120: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:45.239235: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 16:03:46.712789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:46.713075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:46.713108: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 16:03:46.947109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:46.947414: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:46.947445: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 16:03:46.980504: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:46.980720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:46.980789: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 16:03:47.009215: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:47.009456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:47.009536: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 16:03:47.062563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:47.062813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:47.062891: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 16:03:47.067269: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:47.067493: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:47.067554: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-02-08 15:39:06.943849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:06.943870: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:39:06.943886: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:39:06.944141: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:51.501659: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:51.506190: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:51.506230: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 16:03:51.506261: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 16:03:51.506664: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:20.118667: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:51.516118: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 16:03:51.516247: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 16:03:51.516744: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:20.263838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:20.263863: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:39:21.105383: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:21.105447: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:21.105456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:39:22.695205: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:22.695230: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:39:22.695246: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:39:22.695475: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:51.915109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:51.915697: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 16:03:51.915799: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 16:03:51.916374: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:37.156612: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:52.248903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:52.249430: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 16:03:52.249657: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 16:03:52.250282: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/6\n", + "Epoch 1/6\n", + "Epoch 1/6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:03:52.631532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:52.632048: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 16:03:52.632157: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 16:03:52.632559: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:37.302893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:37.302914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:39:38.146639: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:38.146704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:38.146713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:39:39.743663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:39.743685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:39:39.743701: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:39:39.743931: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-02-08 16:03:52.949574: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 16:03:52.950163: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 16:03:52.950295: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 16:03:52.950896: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "###################################################\n", - "# run the UQ ensemble using the FabSim3 interface #\n", - "###################################################\n", - "\n", - "fab.run_uq_ensemble(CONFIG, campaign.campaign_dir, script='grid_search',\n", - " machine=MACHINE, PJ=PILOT_JOB)\n", - "\n", - "# wait for job to complete\n", - "fab.wait(machine=MACHINE)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "9d2c0ddb", - "metadata": {}, - "outputs": [ + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/6\n", + "Epoch 1/6\n", + "Epoch 1/6\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:03.585261: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 43442 of 60000\n", + "2023-02-08 16:04:04.100835: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 27025 of 60000\n", + "2023-02-08 16:04:04.220694: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 28169 of 60000\n", + "2023-02-08 16:04:05.258903: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 36992 of 60000\n", + "2023-02-08 16:04:05.267869: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 23851 of 60000\n", + "2023-02-08 16:04:07.095526: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 20192 of 60000\n", + "2023-02-08 16:04:07.898318: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Executing fabsim localhost fetch_results\n", - "Executing fabsim localhost verify_last_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,target_filename=output.csv,machine=localhost\n" + "183/469 [==========>...................] - ETA: 2s - loss: 0.6070 - sparse_categorical_accuracy: 0.8185" ] - } - ], - "source": [ - "# check if all output files are retrieved from the remote machine, returns a Boolean flag\n", - "all_good = fab.verify(CONFIG, campaign.campaign_dir, TARGET_FILENAME, machine=MACHINE)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "c2b9838b", - "metadata": {}, - "outputs": [ + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:09.827426: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "340/469 [====================>.........] - ETA: 1s - loss: 0.4603 - sparse_categorical_accuracy: 0.8630.085" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:11.536398: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 1/469 [..............................] - ETA: 2:36:25 - loss: 2.3016 - sparse_categorical_accuracy: 0.1094" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:12.699494: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "400/469 [========================>.....] - ETA: 0s - loss: 0.5790 - sparse_categorical_accuracy: 0.8365" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:15.272137: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 46812 of 60000\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "469/469 [==============================] - 24s 17ms/step - loss: 0.4012 - sparse_categorical_accuracy: 0.8810 - val_loss: 0.2117 - val_sparse_categorical_accuracy: 0.9375\n", + "372/469 [======================>.......] - ETA: 0s - loss: 0.4127 - sparse_categorical_accuracy: 0.8787" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:16.448009: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Executing fabsim localhost get_uq_samples:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,number_of_samples=6,skip=0\n" + "469/469 [==============================] - 25s 19ms/step - loss: 0.4395 - sparse_categorical_accuracy: 0.8714 - val_loss: 0.2499 - val_sparse_categorical_accuracy: 0.9277\n", + "Epoch 2/6\n", + "469/469 [==============================] - 28s 19ms/step - loss: 0.5410 - sparse_categorical_accuracy: 0.8465 - val_loss: 0.2965 - val_sparse_categorical_accuracy: 0.9148\n", + "469/469 [==============================] - 28s 17ms/step - loss: 0.3759 - sparse_categorical_accuracy: 0.8899 - val_loss: 0.2053 - val_sparse_categorical_accuracy: 0.9395\n", + "Epoch 2/6\n", + "408/469 [=========================>....] - ETA: 0s - loss: 0.5421 - sparse_categorical_accuracy: 0.8485" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 16:04:20.977864: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "469/469 [==============================] - 6s 12ms/step - loss: 0.2182 - sparse_categorical_accuracy: 0.9376 - val_loss: 0.1825 - val_sparse_categorical_accuracy: 0.9449\n", + "Epoch 3/6\n", + "469/469 [==============================] - 31s 18ms/step - loss: 0.5106 - sparse_categorical_accuracy: 0.8572 - val_loss: 0.2834 - val_sparse_categorical_accuracy: 0.9222\n", + "Epoch 2/6\n", + "469/469 [==============================] - 6s 12ms/step - loss: 0.1755 - sparse_categorical_accuracy: 0.9489 - val_loss: 0.1488 - val_sparse_categorical_accuracy: 0.9547\n", + "469/469 [==============================] - 34s 14ms/step - loss: 0.4380 - sparse_categorical_accuracy: 0.8723 - val_loss: 0.2433 - val_sparse_categorical_accuracy: 0.9304\n", + "469/469 [==============================] - 5s 9ms/step - loss: 0.1650 - sparse_categorical_accuracy: 0.9526 - val_loss: 0.1451 - val_sparse_categorical_accuracy: 0.9584\n", + "Epoch 4/6\n", + "469/469 [==============================] - 5s 9ms/step - loss: 0.2626 - sparse_categorical_accuracy: 0.9258 - val_loss: 0.2281 - val_sparse_categorical_accuracy: 0.9353\n", + "Epoch 3/6\n", + "137/469 [=======>......................] - ETA: 2s - loss: 0.1411 - sparse_categorical_accuracy: 0.9607Epoch 3/6\n", + "466/469 [============================>.] - ETA: 0s - loss: 0.2146 - sparse_categorical_accuracy: 0.9399Epoch 2/6\n", + "469/469 [==============================] - 5s 11ms/step - loss: 0.1325 - sparse_categorical_accuracy: 0.9621 - val_loss: 0.1244 - val_sparse_categorical_accuracy: 0.9632\n", + "Epoch 5/6\n", + " 1/469 [..............................] - ETA: 2:00 - loss: 0.1179 - sparse_categorical_accuracy: 0.9766Epoch 2/6\n", + "469/469 [==============================] - 5s 11ms/step - loss: 0.2146 - sparse_categorical_accuracy: 0.9399 - val_loss: 0.1891 - val_sparse_categorical_accuracy: 0.9453\n", + "Epoch 4/6\n", + " 36/469 [=>............................] - ETA: 5s - loss: 0.2037 - sparse_categorical_accuracy: 0.9418Epoch 2/6\n", + "469/469 [==============================] - 6s 11ms/step - loss: 0.1273 - sparse_categorical_accuracy: 0.9635 - val_loss: 0.1145 - val_sparse_categorical_accuracy: 0.9638\n", + "142/469 [========>.....................] - ETA: 3s - loss: 0.2129 - sparse_categorical_accuracy: 0.9404Epoch 4/6\n", + "469/469 [==============================] - 7s 15ms/step - loss: 0.1914 - sparse_categorical_accuracy: 0.9454 - val_loss: 0.1559 - val_sparse_categorical_accuracy: 0.9544\n", + "Epoch 3/6\n", + "469/469 [==============================] - 7s 14ms/step - loss: 0.2780 - sparse_categorical_accuracy: 0.9215 - val_loss: 0.2372 - val_sparse_categorical_accuracy: 0.9317\n", + "469/469 [==============================] - 8s 16ms/step - loss: 0.1106 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.1105 - val_sparse_categorical_accuracy: 0.9675\n", + "430/469 [==========================>...] - ETA: 0s - loss: 0.1022 - sparse_categorical_accuracy: 0.9698Epoch 6/6\n", + "469/469 [==============================] - 8s 15ms/step - loss: 0.1801 - sparse_categorical_accuracy: 0.9493 - val_loss: 0.1693 - val_sparse_categorical_accuracy: 0.9522\n", + " 35/469 [=>............................] - ETA: 2s - loss: 0.1041 - sparse_categorical_accuracy: 0.9690Epoch 5/6\n", + "469/469 [==============================] - 7s 14ms/step - loss: 0.2172 - sparse_categorical_accuracy: 0.9384 - val_loss: 0.1806 - val_sparse_categorical_accuracy: 0.9486\n", + "Epoch 3/6\n", + "469/469 [==============================] - 7s 14ms/step - loss: 0.1021 - sparse_categorical_accuracy: 0.9697 - val_loss: 0.1086 - val_sparse_categorical_accuracy: 0.9661\n", + "Epoch 5/6\n", + "272/469 [================>.............] - ETA: 1s - loss: 0.1580 - sparse_categorical_accuracy: 0.9552Epoch 3/6\n", + "469/469 [==============================] - 4s 9ms/step - loss: 0.1461 - sparse_categorical_accuracy: 0.9574 - val_loss: 0.1325 - val_sparse_categorical_accuracy: 0.9617\n", + "331/469 [====================>.........] - ETA: 1s - loss: 0.0834 - sparse_categorical_accuracy: 0.9753Epoch 4/6\n", + "469/469 [==============================] - 4s 9ms/step - loss: 0.1660 - sparse_categorical_accuracy: 0.9528 - val_loss: 0.1494 - val_sparse_categorical_accuracy: 0.9563\n", + "Epoch 4/6\n", + "469/469 [==============================] - 5s 10ms/step - loss: 0.1565 - sparse_categorical_accuracy: 0.9557 - val_loss: 0.1514 - val_sparse_categorical_accuracy: 0.9547\n", + " 48/469 [==>...........................] - ETA: 2s - loss: 0.1362 - sparse_categorical_accuracy: 0.9627Epoch 6/6\n", + "469/469 [==============================] - 5s 10ms/step - loss: 0.0833 - sparse_categorical_accuracy: 0.9755 - val_loss: 0.0951 - val_sparse_categorical_accuracy: 0.9699\n", + "320/469 [===================>..........] - ETA: 1s - loss: 0.2365 - sparse_categorical_accuracy: 0.9347Epoch 6/6\n", + "469/469 [==============================] - 5s 11ms/step - loss: 0.0957 - sparse_categorical_accuracy: 0.9730 - val_loss: 0.1015 - val_sparse_categorical_accuracy: 0.9689\n", + "469/469 [==============================] - 5s 9ms/step - loss: 0.2308 - sparse_categorical_accuracy: 0.9354 - val_loss: 0.2063 - val_sparse_categorical_accuracy: 0.9425\n", + "Epoch 4/6\n", + "469/469 [==============================] - 5s 9ms/step - loss: 0.1211 - sparse_categorical_accuracy: 0.9646 - val_loss: 0.1182 - val_sparse_categorical_accuracy: 0.9655\n", + "Epoch 5/6\n", + "469/469 [==============================] - 4s 8ms/step - loss: 0.0853 - sparse_categorical_accuracy: 0.9763\n", + "469/469 [==============================] - 5s 11ms/step - loss: 0.1353 - sparse_categorical_accuracy: 0.9609 - val_loss: 0.1248 - val_sparse_categorical_accuracy: 0.9627\n", + "173/469 [==========>...................] - ETA: 2s - loss: 0.1068 - sparse_categorical_accuracy: 0.9694Epoch 5/6\n", + "469/469 [==============================] - 6s 12ms/step - loss: 0.1389 - sparse_categorical_accuracy: 0.9605 - val_loss: 0.1331 - val_sparse_categorical_accuracy: 0.9609\n", + "469/469 [==============================] - 6s 11ms/step - loss: 0.0712 - sparse_categorical_accuracy: 0.9793 - val_loss: 0.0885 - val_sparse_categorical_accuracy: 0.9712\n", + "79/79 [==============================] - 1s 6ms/step - loss: 0.1015 - sparse_categorical_accuracy: 0.9689\n", + "469/469 [==============================] - 5s 10ms/step - loss: 0.1973 - sparse_categorical_accuracy: 0.9447 - val_loss: 0.1837 - val_sparse_categorical_accuracy: 0.9467\n", + "217/469 [============>.................] - ETA: 1s - loss: 0.1289 - sparse_categorical_accuracy: 0.9644Epoch 5/6\n", + "469/469 [==============================] - 5s 9ms/step - loss: 0.1045 - sparse_categorical_accuracy: 0.9696 - val_loss: 0.1058 - val_sparse_categorical_accuracy: 0.9687\n", + "Epoch 6/6\n", + "469/469 [==============================] - 4s 8ms/step - loss: 0.1281 - sparse_categorical_accuracy: 0.9639\n", + "469/469 [==============================] - 4s 7ms/step - loss: 0.0626 - sparse_categorical_accuracy: 0.9826\n", + "469/469 [==============================] - 5s 10ms/step - loss: 0.1167 - sparse_categorical_accuracy: 0.9655 - val_loss: 0.1198 - val_sparse_categorical_accuracy: 0.9660\n", + "216/469 [============>.................] - ETA: 2s - loss: 0.1815 - sparse_categorical_accuracy: 0.9489Epoch 6/6\n", + "79/79 [==============================] - 1s 8ms/step - loss: 0.0885 - sparse_categorical_accuracy: 0.9712\n", + "79/79 [==============================] - 1s 9ms/step - loss: 0.1331 - sparse_categorical_accuracy: 0.9609\n", + "469/469 [==============================] - 5s 11ms/step - loss: 0.1751 - sparse_categorical_accuracy: 0.9509 - val_loss: 0.1667 - val_sparse_categorical_accuracy: 0.9519\n", + "269/469 [================>.............] - ETA: 2s - loss: 0.1039 - sparse_categorical_accuracy: 0.9699Epoch 6/6\n", + "469/469 [==============================] - 5s 11ms/step - loss: 0.0917 - sparse_categorical_accuracy: 0.9735 - val_loss: 0.1031 - val_sparse_categorical_accuracy: 0.9693\n", + "469/469 [==============================] - 4s 9ms/step - loss: 0.1036 - sparse_categorical_accuracy: 0.9693 - val_loss: 0.1052 - val_sparse_categorical_accuracy: 0.9679\n", + "469/469 [==============================] - 2s 5ms/step - loss: 0.1571 - sparse_categorical_accuracy: 0.9559 - val_loss: 0.1545 - val_sparse_categorical_accuracy: 0.9562\n", + "469/469 [==============================] - 2s 3ms/step - loss: 0.0900 - sparse_categorical_accuracy: 0.9747\n", + "79/79 [==============================] - 0s 2ms/step - loss: 0.1052 - sparse_categorical_accuracy: 0.9679\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "469/469 [==============================] - 1s 2ms/step - loss: 0.1467 - sparse_categorical_accuracy: 0.9588\n", + "79/79 [==============================] - 0s 2ms/step - loss: 0.1545 - sparse_categorical_accuracy: 0.9562\n", + "469/469 [==============================] - 1s 1ms/step - loss: 0.0818 - sparse_categorical_accuracy: 0.9765\n", + "79/79 [==============================] - 0s 1ms/step - loss: 0.1031 - sparse_categorical_accuracy: 0.9693\n" ] } ], "source": [ - "if all_good:\n", - " # copy the results from the FabSim results dir to the EasyVVUQ results dir\n", - " fab.get_uq_samples(CONFIG, campaign.campaign_dir, sampler.n_samples(), machine=MACHINE)\n", - "else:\n", - " print(\"Not all samples executed correctly\")\n", - " import sys\n", - " sys.exit()" - ] - }, - { - "cell_type": "markdown", - "id": "907c295d", - "metadata": {}, - "source": [ - "Briely:\n", - "\n", - "* `fab.run_uq_ensemble`: this command submits the ensemble to the (remote) host for execution. Under the hood it uses the FabSim3 `campaign2ensemble` subroutine to copy the run directories from `WORK_DIR` to the FabSim3 `SWEEP` directory, located in `config_files/grid_search/SWEEP`. From there the ensemble will be sent to the (remote) host.\n", - "* `fab.wait`: this will check every minute on the status of the jobs on the remote host, and sleep otherwise, halting further execution of the script. On the localhost this command doesn't do anything.\n", - "* `fab.verify`: this will execute the `verify_last_ensemble` subroutine to see if the output file `target_filename` for each run in the `SWEEP` directory is present in the corresponding FabSim3 results directory. Returns a boolean flag. `fab.verify` will also call the FabSim `fetch_results` method, which actually retreives the results from the (remote) host. So, if you want to just get the results without verifying the presence of output files, call `fab.fetch_results(machine=MACHINE)` instead. However, if something went wrong on the (remote) host, this will cause an error later on since not all required output files will be transfered on the EasyVVUQ `WORK_DIR`.\n", - "* `fab.get_uq_samples`: copies the samples from the (local) FabSim results directory to the (local) EasyVVUQ campaign directory. It will not delete the results from the FabSim results directory. If you want to save space, you can delete the results on the FabSim side (see `results` directory in your FabSim home directory). You can also call `fab.clear_results(machine, name_results_dir)` to remove a specific FabSim results directory on a given machine.\n", - "\n", - "#### Error handling\n", - "\n", - "If `all_good == False` something went wrong on the (remote) host, and `sys.exit()` is called in our example, giving you the opportunity of investigating what went wrong. It can happen that a (small) number of jobs did not get executed on the remote host for some reason, whereas (most) jobs did execute succesfully. In this case simply resubmitting the failed jobs could be an option:\n", - "\n", - "```python\n", - "fab.remove_succesful_runs(CONFIG, campaign.campaign_dir)\n", - "fab.resubmit_previous_ensemble(CONFIG, 'grid_search')\n", - "```\n", - "\n", - "The first command removes all succesful run directories from the `SWEEP` dir for which the output file `TARGET_FILENAME` has been found. For this to work, `fab.verify` must have been called. Then, `fab.resubmit_previous_ensemble` simply resubmits the runs that are present in the `SWEEP` directory, which by now only contains the failed runs. After the jobs have finished, call `fab.verify` again to see if now `TARGET_FILENAME` is present in the results directory, for every run in the `SWEEP` dir.\n", - "\n", - "Once we are sure we have all required output files, the role of FabSim is over, and we proceed with decoding the output files. In this case, our Python script wrote the training and test accuracy to a CSV file, hence we use the `SimpleCSV` decoder. \n", + "###############################\n", + "# execute the defined actions #\n", + "###############################\n", "\n", - "**Note**: It is also possible to use a more flexible HDF5 format, by using `uq.decoders.HDF5` instead." + "campaign.execute().collate()" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 21, "id": "0b55725a", "metadata": {}, "outputs": [ @@ -563,8 +613,8 @@ " 0.005\n", " 0.0\n", " 0.0\n", - " 0.958500\n", - " 0.9543\n", + " 0.958767\n", + " 0.9562\n", " \n", " \n", " 1\n", @@ -574,8 +624,8 @@ " 0.010\n", " 0.0\n", " 0.0\n", - " 0.973183\n", - " 0.9656\n", + " 0.974700\n", + " 0.9679\n", " \n", " \n", " 2\n", @@ -585,8 +635,8 @@ " 0.015\n", " 0.0\n", " 0.0\n", - " 0.978450\n", - " 0.9715\n", + " 0.976450\n", + " 0.9693\n", " \n", " \n", " 3\n", @@ -596,8 +646,8 @@ " 0.005\n", " 0.0\n", " 0.0\n", - " 0.963283\n", - " 0.9599\n", + " 0.963883\n", + " 0.9609\n", " \n", " \n", " 4\n", @@ -607,8 +657,8 @@ " 0.010\n", " 0.0\n", " 0.0\n", - " 0.977467\n", - " 0.9710\n", + " 0.976283\n", + " 0.9689\n", " \n", " \n", " 5\n", @@ -618,8 +668,8 @@ " 0.015\n", " 0.0\n", " 0.0\n", - " 0.984600\n", - " 0.9745\n", + " 0.982617\n", + " 0.9712\n", " \n", " \n", "\n", @@ -637,41 +687,20 @@ "\n", " dropout_prob_hidden accuracy_train accuracy_test \n", " 0 0 0 \n", - "0 0.0 0.958500 0.9543 \n", - "1 0.0 0.973183 0.9656 \n", - "2 0.0 0.978450 0.9715 \n", - "3 0.0 0.963283 0.9599 \n", - "4 0.0 0.977467 0.9710 \n", - "5 0.0 0.984600 0.9745 " + "0 0.0 0.958767 0.9562 \n", + "1 0.0 0.974700 0.9679 \n", + "2 0.0 0.976450 0.9693 \n", + "3 0.0 0.963883 0.9609 \n", + "4 0.0 0.976283 0.9689 \n", + "5 0.0 0.982617 0.9712 " ] }, - "execution_count": 13, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#############################################\n", - "# All output files are present, decode them #\n", - "#############################################\n", - "output_columns = [\"accuracy_train\", \"accuracy_test\"]\n", - "\n", - "decoder = uq.decoders.SimpleCSV(\n", - " target_filename=TARGET_FILENAME,\n", - " output_columns=output_columns)\n", - "\n", - "actions = uq.actions.Actions(\n", - " uq.actions.Decode(decoder),\n", - ")\n", - "\n", - "campaign.replace_actions(CAMPAIGN_NAME, actions)\n", - "\n", - "###########################\n", - "# Execute decoding action #\n", - "###########################\n", - "\n", - "campaign.execute().collate()\n", - "\n", "data_frame = campaign.get_collation_result()\n", "data_frame" ] @@ -686,7 +715,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 22, "id": "99ba74e2", "metadata": {}, "outputs": [ @@ -694,7 +723,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Best hyperparameters with 97.45% test accuracy:\n" + "Best hyperparameters with 97.12% test accuracy:\n" ] }, { @@ -743,7 +772,7 @@ "5 128 0.015" ] }, - "execution_count": 14, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } diff --git a/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb b/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb new file mode 100644 index 000000000..737a1ba0c --- /dev/null +++ b/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb @@ -0,0 +1,836 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05c43ea9", + "metadata": {}, + "source": [ + "## Tuning the hyperparameters of a neural network using EasyVVUQ and FabSim3\n", + "\n", + "In this tutorial we will use the EasyVVUQ `GridSampler` to perform a grid search on the hyperparameters of a simple Keras neural network model, trained to recognize hand-written digits. This is the famous MNIST data set, of which 4 input features (of size 28 x 28) are show below. These are fed into a standard feed-forward neural network, which will predict the label 0-9.\n", + "\n", + "The (Keras) neural network script is located in `mnist/keras_mnist.template`, which will form the input template for the EasyVVUQ encoder. We will assume you are familiar with the basic EasyVVUQ building blocks. If not, you can look at the [basic tutorial](https://github.com/UCL-CCS/EasyVVUQ/blob/dev/tutorials/basic_tutorial.ipynb)." + ] + }, + { + "cell_type": "markdown", + "id": "83545e38", + "metadata": {}, + "source": [ + "![](mnist/mnist_feats.png)" + ] + }, + { + "cell_type": "markdown", + "id": "bf467821", + "metadata": {}, + "source": [ + "We need EasyVVUQ, TensorFlow and the TensorFlow data sets to execute this tutorial. If you need to install these, uncomment the corresponding line below." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "00f7ba08", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install easyvvuq\n", + "# !pip install tensorflow\n", + "# !pip install tensorflow_datasets" + ] + }, + { + "cell_type": "markdown", + "id": "8d5c7fde", + "metadata": {}, + "source": [ + "### FabSim3\n", + "\n", + "While running on the localhost, we will use the [FabSim3](https://github.com/djgroen/FabSim3) automation toolkit for the data processing workflow, i.e. to move the UQ ensemble to/from the localhost. To connect EasyVVUQ with FabSim3, the [FabUQCampaign](https://github.com/wedeling/FabUQCampaign) plugin must be installed.\n", + "\n", + "The advantage of this construction is that we could offload the ensemble to a remote supercomputer using this same script by simply changing the `MACHINE='localhost'` flag, provided that FabSIm3 is set up on the remote resource.\n", + "\n", + "For an example **without FabSim3**, see XXX.\n", + "\n", + "For now, import the required libraries below. `fabsim3_cmd_api` is an interface with fabSim3 such that the command-line FabSim3 commands can be executed in a Python script. It is stored locally in `fabsim3_cmd_api.py`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7347053", + "metadata": {}, + "outputs": [], + "source": [ + "import easyvvuq as uq\n", + "import os\n", + "import numpy as np\n", + "\n", + "############################################\n", + "# Import the FabSim3 commandline interface #\n", + "############################################\n", + "import fabsim3_cmd_api as fab" + ] + }, + { + "cell_type": "markdown", + "id": "22672c4c", + "metadata": {}, + "source": [ + "We now set some flags:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "52064503", + "metadata": {}, + "outputs": [], + "source": [ + "# Work directory, where the EasyVVUQ directory will be placed\n", + "WORK_DIR = '/tmp'\n", + "# machine to run ensemble on\n", + "MACHINE = \"localhost\"\n", + "# target output filename generated by the code\n", + "TARGET_FILENAME = 'output.csv'\n", + "# EasyVVUQ campaign name\n", + "CAMPAIGN_NAME = 'grid_test'\n", + "\n", + "# FabSim3 config name\n", + "CONFIG = 'grid_search'\n", + "# Use QCG PilotJob or not\n", + "PILOT_JOB = False" + ] + }, + { + "cell_type": "markdown", + "id": "898a0d57", + "metadata": {}, + "source": [ + "Most of these are self explanatory. Here, `CONFIG` is the name of the script that gets executed for each sample, in this case `grid_search`, which is located in `FabUQCampaign/templates/grid_search`. Its contents are essentially just runs our Python code `hyper_param_tune.py`:" + ] + }, + { + "cell_type": "markdown", + "id": "a229e7b9", + "metadata": {}, + "source": [ + "```\n", + "cd $job_results\n", + "$run_prefix\n", + "\n", + "/usr/bin/env > env.log\n", + "\n", + "python3 hyper_param_tune.py\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5c0c006f", + "metadata": {}, + "source": [ + "Here, `hyper_param_tune` is generated by the EasyVVUQ encoder, see below. The flag `PILOT_JOB` regulates the use of the QCG PilotJob mechanism. If `True`, FabSim will submit the ensemble to the (remote) host as a QCG PilotJob, which essentially means that all invididual jobs of the ensemble will get packaged into a single job allocation, thereby circumventing the limit on the maximum number of simultaneous jobs that is present on many supercomputers. For more info on the QCG PilotJob click [here](https://github.com/vecma-project/QCG-PilotJob). In this example we'll run the samples on the localhost (see `MACHINE`), and hence we set `PILOT_JOB=False`.\n", + "\n", + "As is standard in EasyVVUQ, we now define the parameter space. In this case these are 4 hyperparameters. There is one hidden layer with `n_neurons` neurons, a Dropout layer after the input and hidden layer, with dropout probability `dropout_prob_in` and `dropout_prob_hidden` respectively. We made the `learning_rate` tuneable as well." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6a3a8a82", + "metadata": {}, + "outputs": [], + "source": [ + "params = {}\n", + "params[\"n_neurons\"] = {\"type\":\"integer\", \"default\": 32}\n", + "params[\"dropout_prob_in\"] = {\"type\":\"float\", \"default\": 0.0}\n", + "params[\"dropout_prob_hidden\"] = {\"type\":\"float\", \"default\": 0.0}\n", + "params[\"learning_rate\"] = {\"type\":\"float\", \"default\": 0.001}" + ] + }, + { + "cell_type": "markdown", + "id": "7b41214c", + "metadata": {}, + "source": [ + "These 4 hyperparameter appear as flags in the input template `mnist/keras_mnist.template`. Typically this is generated from an input file used by some simualtion code. In this case however, `mnist/keras_mnist.template` is directly our Python script, with the hyperparameters replaced by flags. For instance:\n", + "\n", + "```python\n", + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dropout($dropout_prob_in),\n", + " tf.keras.layers.Dense($n_neurons, activation='relu'),\n", + " tf.keras.layers.Dropout($dropout_prob_hidden),\n", + " tf.keras.layers.Dense(10)\n", + "])\n", + "```\n", + "\n", + "is simply the neural network construction part with flags for the dropout probabilities and the number of neurons in the hidden layer. The encoder reads the flags and replaces them with numeric values, and it subsequently writes the corresponding `target_filename=hyper_param_tune.py`:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3ed08818", + "metadata": {}, + "outputs": [], + "source": [ + "encoder = uq.encoders.GenericEncoder('./mnist/keras_mnist.template', target_filename='hyper_param_tune.py')" + ] + }, + { + "cell_type": "markdown", + "id": "02644574", + "metadata": {}, + "source": [ + "Now we create the first set of EasyVVUQ `actions` to create separate run directories and to encode the template:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a10d571c", + "metadata": {}, + "outputs": [], + "source": [ + "# actions: create directories and encode input template, placing 1 hyper_param_tune.py file in each directory.\n", + "actions = uq.actions.Actions(\n", + " uq.actions.CreateRunDirectory(root=WORK_DIR, flatten=True),\n", + " uq.actions.Encode(encoder),\n", + ")\n", + "\n", + "# create the EasyVVUQ main campaign object\n", + "campaign = uq.Campaign(\n", + " name=CAMPAIGN_NAME,\n", + " work_dir=WORK_DIR,\n", + ")\n", + "\n", + "# add the param definitions and actions to the campaign\n", + "campaign.add_app(\n", + " name=CAMPAIGN_NAME,\n", + " params=params,\n", + " actions=actions\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "bbbba5f8", + "metadata": {}, + "source": [ + "As with the uncertainty-quantification (UQ) samplers, the `vary` is used to select which of the `params` we actually vary. Unlike the UQ samplers we do not specify an input probability distribution. This being a grid search, we simply specify a list of values for each hyperparameter. Parameters not in `vary`, but with a flag in the template, will be given the default value specified in `params`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "a3247048", + "metadata": {}, + "outputs": [], + "source": [ + "vary = {\"n_neurons\": [64, 128], \"learning_rate\": [0.005, 0.01, 0.015]}" + ] + }, + { + "cell_type": "markdown", + "id": "612e912c", + "metadata": {}, + "source": [ + "**Note:** we are mixing integer and floats in the `vary` dict. Other data types (string, boolean) can also be used.\n", + "\n", + "The `vary` dict is passed to the `Grid_Sampler`. As can be seen, it created a tensor product of all 1D points specified in `vary`. If a single tensor product is not useful (e.g. because it creates combinations of parameters that do not makes sense), you can also pass a list of different `vary` dicts. For even more flexibility you can also write the required parameter combinations to a CSV file, and pass it to the `CSV_Sampler` instead." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "29e62d09", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 6 points:\n" + ] + }, + { + "data": { + "text/plain": [ + "[array([[64, 0.005],\n", + " [64, 0.01],\n", + " [64, 0.015],\n", + " [128, 0.005],\n", + " [128, 0.01],\n", + " [128, 0.015]], dtype=object)]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an instance of the Grid Sampler\n", + "sampler = uq.sampling.Grid_Sampler(vary)\n", + "\n", + "# Associate the sampler with the campaign\n", + "campaign.set_sampler(sampler)\n", + "\n", + "# print the points\n", + "print(\"There are %d points:\" % (sampler.n_samples()))\n", + "sampler.points" + ] + }, + { + "cell_type": "markdown", + "id": "99c39b4b", + "metadata": {}, + "source": [ + "Run the `actions` (create directories with `hyper_param_tune.py` files in it)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2095968a", + "metadata": {}, + "outputs": [], + "source": [ + "###############################\n", + "# execute the defined actions #\n", + "###############################\n", + "\n", + "campaign.execute().collate()" + ] + }, + { + "cell_type": "markdown", + "id": "b149dd32", + "metadata": {}, + "source": [ + "To run the ensemble, execute:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7b9cb1b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost run_uq_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,script=grid_search,skip=0,PJ=False\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 15:38:16.535325: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:16.686716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:16.686745: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:38:17.563526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:17.563594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:17.563603: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:38:19.173964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:19.173991: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:38:19.174006: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:38:19.174233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:30.534364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:30.678101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:30.678126: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:38:31.523915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:31.523979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:31.523987: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:38:33.109661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:33.109685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:38:33.109702: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:38:33.109951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:46.244240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:38:46.391594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:46.391621: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:38:47.261374: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:47.261439: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:47.261448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:38:48.916947: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:38:48.916972: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:38:48.916988: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:38:48.917225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:04.143172: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:04.290000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:04.290025: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:39:05.206811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:05.206880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:05.206889: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-02-08 15:39:06.943849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:06.943870: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:39:06.943886: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:39:06.944141: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:20.118667: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:20.263838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:20.263863: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:39:21.105383: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:21.105447: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:21.105456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:39:22.695205: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:22.695230: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:39:22.695246: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:39:22.695475: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:37.156612: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-02-08 15:39:37.302893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:37.302914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-02-08 15:39:38.146639: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:38.146704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:38.146713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-02-08 15:39:39.743663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-02-08 15:39:39.743685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-02-08 15:39:39.743701: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-02-08 15:39:39.743931: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "###################################################\n", + "# run the UQ ensemble using the FabSim3 interface #\n", + "###################################################\n", + "\n", + "fab.run_uq_ensemble(CONFIG, campaign.campaign_dir, script='grid_search',\n", + " machine=MACHINE, PJ=PILOT_JOB)\n", + "\n", + "# wait for job to complete\n", + "fab.wait(machine=MACHINE)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9d2c0ddb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost fetch_results\n", + "Executing fabsim localhost verify_last_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,target_filename=output.csv,machine=localhost\n" + ] + } + ], + "source": [ + "# check if all output files are retrieved from the remote machine, returns a Boolean flag\n", + "all_good = fab.verify(CONFIG, campaign.campaign_dir, TARGET_FILENAME, machine=MACHINE)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c2b9838b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost get_uq_samples:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,number_of_samples=6,skip=0\n" + ] + } + ], + "source": [ + "if all_good:\n", + " # copy the results from the FabSim results dir to the EasyVVUQ results dir\n", + " fab.get_uq_samples(CONFIG, campaign.campaign_dir, sampler.n_samples(), machine=MACHINE)\n", + "else:\n", + " print(\"Not all samples executed correctly\")\n", + " import sys\n", + " sys.exit()" + ] + }, + { + "cell_type": "markdown", + "id": "907c295d", + "metadata": {}, + "source": [ + "Briely:\n", + "\n", + "* `fab.run_uq_ensemble`: this command submits the ensemble to the (remote) host for execution. Under the hood it uses the FabSim3 `campaign2ensemble` subroutine to copy the run directories from `WORK_DIR` to the FabSim3 `SWEEP` directory, located in `config_files/grid_search/SWEEP`. From there the ensemble will be sent to the (remote) host.\n", + "* `fab.wait`: this will check every minute on the status of the jobs on the remote host, and sleep otherwise, halting further execution of the script. On the localhost this command doesn't do anything.\n", + "* `fab.verify`: this will execute the `verify_last_ensemble` subroutine to see if the output file `target_filename` for each run in the `SWEEP` directory is present in the corresponding FabSim3 results directory. Returns a boolean flag. `fab.verify` will also call the FabSim `fetch_results` method, which actually retreives the results from the (remote) host. So, if you want to just get the results without verifying the presence of output files, call `fab.fetch_results(machine=MACHINE)` instead. However, if something went wrong on the (remote) host, this will cause an error later on since not all required output files will be transfered on the EasyVVUQ `WORK_DIR`.\n", + "* `fab.get_uq_samples`: copies the samples from the (local) FabSim results directory to the (local) EasyVVUQ campaign directory. It will not delete the results from the FabSim results directory. If you want to save space, you can delete the results on the FabSim side (see `results` directory in your FabSim home directory). You can also call `fab.clear_results(machine, name_results_dir)` to remove a specific FabSim results directory on a given machine.\n", + "\n", + "#### Error handling\n", + "\n", + "If `all_good == False` something went wrong on the (remote) host, and `sys.exit()` is called in our example, giving you the opportunity of investigating what went wrong. It can happen that a (small) number of jobs did not get executed on the remote host for some reason, whereas (most) jobs did execute succesfully. In this case simply resubmitting the failed jobs could be an option:\n", + "\n", + "```python\n", + "fab.remove_succesful_runs(CONFIG, campaign.campaign_dir)\n", + "fab.resubmit_previous_ensemble(CONFIG, 'grid_search')\n", + "```\n", + "\n", + "The first command removes all succesful run directories from the `SWEEP` dir for which the output file `TARGET_FILENAME` has been found. For this to work, `fab.verify` must have been called. Then, `fab.resubmit_previous_ensemble` simply resubmits the runs that are present in the `SWEEP` directory, which by now only contains the failed runs. After the jobs have finished, call `fab.verify` again to see if now `TARGET_FILENAME` is present in the results directory, for every run in the `SWEEP` dir.\n", + "\n", + "Once we are sure we have all required output files, the role of FabSim is over, and we proceed with decoding the output files. In this case, our Python script wrote the training and test accuracy to a CSV file, hence we use the `SimpleCSV` decoder. \n", + "\n", + "**Note**: It is also possible to use a more flexible HDF5 format, by using `uq.decoders.HDF5` instead." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0b55725a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
run_iditerationn_neuronslearning_ratedropout_prob_indropout_prob_hiddenaccuracy_trainaccuracy_test
00000000
010640.0050.00.00.9585000.9543
120640.0100.00.00.9731830.9656
230640.0150.00.00.9784500.9715
3401280.0050.00.00.9632830.9599
4501280.0100.00.00.9774670.9710
5601280.0150.00.00.9846000.9745
\n", + "
" + ], + "text/plain": [ + " run_id iteration n_neurons learning_rate dropout_prob_in \\\n", + " 0 0 0 0 0 \n", + "0 1 0 64 0.005 0.0 \n", + "1 2 0 64 0.010 0.0 \n", + "2 3 0 64 0.015 0.0 \n", + "3 4 0 128 0.005 0.0 \n", + "4 5 0 128 0.010 0.0 \n", + "5 6 0 128 0.015 0.0 \n", + "\n", + " dropout_prob_hidden accuracy_train accuracy_test \n", + " 0 0 0 \n", + "0 0.0 0.958500 0.9543 \n", + "1 0.0 0.973183 0.9656 \n", + "2 0.0 0.978450 0.9715 \n", + "3 0.0 0.963283 0.9599 \n", + "4 0.0 0.977467 0.9710 \n", + "5 0.0 0.984600 0.9745 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#############################################\n", + "# All output files are present, decode them #\n", + "#############################################\n", + "output_columns = [\"accuracy_train\", \"accuracy_test\"]\n", + "\n", + "decoder = uq.decoders.SimpleCSV(\n", + " target_filename=TARGET_FILENAME,\n", + " output_columns=output_columns)\n", + "\n", + "actions = uq.actions.Actions(\n", + " uq.actions.Decode(decoder),\n", + ")\n", + "\n", + "campaign.replace_actions(CAMPAIGN_NAME, actions)\n", + "\n", + "###########################\n", + "# Execute decoding action #\n", + "###########################\n", + "\n", + "campaign.execute().collate()\n", + "\n", + "data_frame = campaign.get_collation_result()\n", + "data_frame" + ] + }, + { + "cell_type": "markdown", + "id": "e1e62a8c", + "metadata": {}, + "source": [ + "Display the hyperparameters with the maximum test accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "99ba74e2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparameters with 97.45% test accuracy:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_neuronslearning_rate
00
51280.015
\n", + "
" + ], + "text/plain": [ + " n_neurons learning_rate\n", + " 0 0\n", + "5 128 0.015" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"Best hyperparameters with %.2f%% test accuracy:\" % (data_frame['accuracy_test'].max().values * 100,))\n", + "data_frame.loc[data_frame['accuracy_test'].idxmax()][vary.keys()]" + ] + }, + { + "cell_type": "markdown", + "id": "5d3a07de", + "metadata": {}, + "source": [ + "## Executing a grid search on a remote host\n", + "\n", + "To run the example script on a remote host, a number of changes must be made. Ensure the remote host is defined in `machines.yml` in your FabSim3 directory, as well as the user login information. Assuming we'll run the ensemble on the Eagle super computer at the Poznan Supercomputing and Networking Center , the entry in `machines_user.yml` could look similar to the following:\n", + "\n", + "```\n", + "eagle_vecma:\n", + " username: \"\"\n", + " home_path_template: \"/tmp/lustre/\"\n", + " budget: \"plgvecma2021\"\n", + " cores: 1\n", + " # job wall time for each job, format Days-Hours:Minutes:Seconds\n", + " job_wall_time : \"0-0:59:00\" # job wall time for each single job without PJ\n", + " PJ_size : \"1\" # number of requested nodes for PJ\n", + " PJ_wall_time : \"0-00:59:00\" # job wall time for PJ\n", + " modules:\n", + " loaded: [\"python/3.7.3\"] \n", + " unloaded: [] \n", + "```\n", + " Here:\n", + " \n", + " * `home_path_template`: the remote root directory for FabSim3, such that for instance the results on the remote machine will be stored in `home_path_template/FabSim3/results`.\n", + " * `budget`: the name of the computational budget that you are allowed to use.\n", + " * `cores`: the number of cores to use *per run*. Our simple Keras script justs need a single core, but applications which already have some built-in paralellism will require more cores.\n", + " * `job_wall_time`: a time limit *per run*, and *without* the use of the QCG PilotJob framework.\n", + " * `PJ_size`: the number of *nodes*, in the case *with* the use of the QCG PilotJob framework. \n", + " * `PJ_wall_time`: a *total* time limit, and *with* the use of the QCG PilotJob framework.\n", + "\n", + "To automatically setup the ssh keys, and prevent having to login manually for every random sample, run the following from the command line:\n", + "\n", + "```\n", + "fabsim eagle_vecma setup_ssh_keys\n", + "```\n", + "\n", + "Once the remote machine is properly setup, we can just set:\n", + "\n", + "```python\n", + "# Use QCG PilotJob or not\n", + "PILOT_JOB = False\n", + "# machine to run ensemble on\n", + "MACHINE = \"eagle_vecma\"\n", + "```\n", + "\n", + "If you now re-run the example script, the ensemble will execute on the remote host, submitting each run as a separate job. By setting `PILOT_JOB=True`, all runs will be packaged in a single job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e57104b3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From a3e43c4a5cca42e577bcaf4a9e494d1230057f86 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Fri, 17 Feb 2023 16:22:35 +0100 Subject: [PATCH 5/7] added get_param_names subroutine --- easyvvuq/sampling/grid_sampler.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/easyvvuq/sampling/grid_sampler.py b/easyvvuq/sampling/grid_sampler.py index cc60743fa..f7c7ef72d 100644 --- a/easyvvuq/sampling/grid_sampler.py +++ b/easyvvuq/sampling/grid_sampler.py @@ -96,6 +96,23 @@ def n_samples(self): # return self.points.shape[0] return self.cumul_sizes[-1] + def get_param_names(self): + """ + Get the names of all parameters that were varied. + + Returns + ------- + param_names : list + List of parameter names. + + """ + param_names = [] + for _vary in self.vary: + for name in _vary.keys(): + if not name in param_names: + param_names.append(name) + return param_names + def __next__(self): """ Return the next sample from the input distributions. From 629f7eb5b84968b67841f6200c2fadb9c722d85c Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Thu, 2 Mar 2023 11:38:52 +0100 Subject: [PATCH 6/7] changed template in tutorial --- .../hyperparameter_tuning_tutorial.ipynb | 597 ++++-------------- ...arameter_tuning_tutorial_with_fabsim.ipynb | 357 ++--------- 2 files changed, 200 insertions(+), 754 deletions(-) diff --git a/tutorials/hyperparameter_tuning_tutorial.ipynb b/tutorials/hyperparameter_tuning_tutorial.ipynb index 081a99391..ee7bab40c 100644 --- a/tutorials/hyperparameter_tuning_tutorial.ipynb +++ b/tutorials/hyperparameter_tuning_tutorial.ipynb @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "id": "00f7ba08", "metadata": {}, "outputs": [], @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, "id": "e7347053", "metadata": {}, "outputs": [], @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "id": "52064503", "metadata": {}, "outputs": [], @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "id": "6a3a8a82", "metadata": {}, "outputs": [], @@ -122,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 5, "id": "3ed08818", "metadata": {}, "outputs": [], @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 6, "id": "a10d571c", "metadata": {}, "outputs": [], @@ -179,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 7, "id": "a3247048", "metadata": {}, "outputs": [], @@ -199,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 8, "id": "29e62d09", "metadata": {}, "outputs": [ @@ -221,7 +221,7 @@ " [128, 0.015]], dtype=object)]" ] }, - "execution_count": 19, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -248,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "2095968a", "metadata": {}, "outputs": [ @@ -256,73 +256,83 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-02-08 16:03:44.946331: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:46.627885: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:44.955703: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:46.628054: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:44.992926: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:46.630651: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:45.009895: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:46.632733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:45.015426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:46.635383: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:45.026090: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:46.637862: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:45.152547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:45.152724: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 16:03:45.185859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:45.185998: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 16:03:45.191458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:45.191592: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 16:03:45.207372: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:45.207499: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 16:03:45.223128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:45.223246: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 16:03:45.239120: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:45.239235: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 16:03:46.712789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:46.713075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:46.713108: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 16:03:46.947109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:46.947414: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:46.947445: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 16:03:46.980504: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:46.980720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:46.980789: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 16:03:47.009215: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:47.009456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:47.009536: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 16:03:47.062563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:47.062813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:47.062891: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 16:03:47.067269: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:47.067493: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:47.067554: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + "2023-03-02 11:34:47.056973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:47.056973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:47.057003: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:34:47.057002: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:34:47.057004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:47.057004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:47.057011: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:47.057011: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:47.057026: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:34:47.057026: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:34:47.057032: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:34:47.057032: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:34:48.728698: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.728836: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.728853: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:34:48.735720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.735854: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.735872: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:34:48.747918: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.747999: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.748009: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:34:48.760968: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.761089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.761108: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:34:48.803344: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.803468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.803486: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:34:48.828321: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.828443: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:48.828462: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-02-08 16:03:51.501659: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:51.506190: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:51.506230: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 16:03:51.506261: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 16:03:51.506664: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:52.172878: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:52.173245: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:34:52.173292: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:34:52.175002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-02 11:34:52.177382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:52.177406: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:34:52.177426: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:34:52.177712: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-02 11:34:52.179983: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:52.179983: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:52.180013: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:34:52.180013: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:34:52.180044: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:34:52.180049: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:34:52.180416: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:51.516118: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 16:03:51.516247: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 16:03:51.516744: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:52.180415: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:51.915109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:51.915697: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 16:03:51.915799: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 16:03:51.916374: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:52.188145: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:52.188183: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:34:52.188216: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:34:52.188602: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:52.248903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:52.249430: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 16:03:52.249657: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 16:03:52.250282: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:34:52.193807: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:34:52.193845: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:34:52.193878: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:34:52.194312: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, @@ -332,219 +342,86 @@ "text": [ "Epoch 1/6\n", "Epoch 1/6\n", - "Epoch 1/6\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:03:52.631532: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:52.632048: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 16:03:52.632157: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 16:03:52.632559: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 16:03:52.949574: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 16:03:52.950163: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 16:03:52.950295: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 16:03:52.950896: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ "Epoch 1/6\n", "Epoch 1/6\n", - "Epoch 1/6\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:03.585261: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 43442 of 60000\n", - "2023-02-08 16:04:04.100835: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 27025 of 60000\n", - "2023-02-08 16:04:04.220694: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 28169 of 60000\n", - "2023-02-08 16:04:05.258903: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 36992 of 60000\n", - "2023-02-08 16:04:05.267869: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 23851 of 60000\n", - "2023-02-08 16:04:07.095526: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 20192 of 60000\n", - "2023-02-08 16:04:07.898318: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "183/469 [==========>...................] - ETA: 2s - loss: 0.6070 - sparse_categorical_accuracy: 0.8185" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:09.827426: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "340/469 [====================>.........] - ETA: 1s - loss: 0.4603 - sparse_categorical_accuracy: 0.8630.085" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:11.536398: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 1/469 [..............................] - ETA: 2:36:25 - loss: 2.3016 - sparse_categorical_accuracy: 0.1094" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:12.699494: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "400/469 [========================>.....] - ETA: 0s - loss: 0.5790 - sparse_categorical_accuracy: 0.8365" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:15.272137: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 46812 of 60000\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "469/469 [==============================] - 24s 17ms/step - loss: 0.4012 - sparse_categorical_accuracy: 0.8810 - val_loss: 0.2117 - val_sparse_categorical_accuracy: 0.9375\n", - "372/469 [======================>.......] - ETA: 0s - loss: 0.4127 - sparse_categorical_accuracy: 0.8787" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:16.448009: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "469/469 [==============================] - 25s 19ms/step - loss: 0.4395 - sparse_categorical_accuracy: 0.8714 - val_loss: 0.2499 - val_sparse_categorical_accuracy: 0.9277\n", + "Epoch 1/6\n", + "Epoch 1/6\n", + "469/469 [==============================] - 15s 13ms/step - loss: 0.4033 - sparse_categorical_accuracy: 0.8792 - val_loss: 0.2163 - val_sparse_categorical_accuracy: 0.9375\n", "Epoch 2/6\n", - "469/469 [==============================] - 28s 19ms/step - loss: 0.5410 - sparse_categorical_accuracy: 0.8465 - val_loss: 0.2965 - val_sparse_categorical_accuracy: 0.9148\n", - "469/469 [==============================] - 28s 17ms/step - loss: 0.3759 - sparse_categorical_accuracy: 0.8899 - val_loss: 0.2053 - val_sparse_categorical_accuracy: 0.9395\n", + "469/469 [==============================] - 15s 9ms/step - loss: 0.4464 - sparse_categorical_accuracy: 0.8710 - val_loss: 0.2590 - val_sparse_categorical_accuracy: 0.9264\n", "Epoch 2/6\n", - "408/469 [=========================>....] - ETA: 0s - loss: 0.5421 - sparse_categorical_accuracy: 0.8485" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-02-08 16:04:20.977864: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "469/469 [==============================] - 6s 12ms/step - loss: 0.2182 - sparse_categorical_accuracy: 0.9376 - val_loss: 0.1825 - val_sparse_categorical_accuracy: 0.9449\n", - "Epoch 3/6\n", - "469/469 [==============================] - 31s 18ms/step - loss: 0.5106 - sparse_categorical_accuracy: 0.8572 - val_loss: 0.2834 - val_sparse_categorical_accuracy: 0.9222\n", + "469/469 [==============================] - 15s 13ms/step - loss: 0.5436 - sparse_categorical_accuracy: 0.8448 - val_loss: 0.2959 - val_sparse_categorical_accuracy: 0.9169\n", + "469/469 [==============================] - 16s 14ms/step - loss: 0.4242 - sparse_categorical_accuracy: 0.8773 - val_loss: 0.2251 - val_sparse_categorical_accuracy: 0.9334\n", "Epoch 2/6\n", - "469/469 [==============================] - 6s 12ms/step - loss: 0.1755 - sparse_categorical_accuracy: 0.9489 - val_loss: 0.1488 - val_sparse_categorical_accuracy: 0.9547\n", - "469/469 [==============================] - 34s 14ms/step - loss: 0.4380 - sparse_categorical_accuracy: 0.8723 - val_loss: 0.2433 - val_sparse_categorical_accuracy: 0.9304\n", - "469/469 [==============================] - 5s 9ms/step - loss: 0.1650 - sparse_categorical_accuracy: 0.9526 - val_loss: 0.1451 - val_sparse_categorical_accuracy: 0.9584\n", - "Epoch 4/6\n", - "469/469 [==============================] - 5s 9ms/step - loss: 0.2626 - sparse_categorical_accuracy: 0.9258 - val_loss: 0.2281 - val_sparse_categorical_accuracy: 0.9353\n", + "469/469 [==============================] - 16s 14ms/step - loss: 0.3825 - sparse_categorical_accuracy: 0.8884 - val_loss: 0.2041 - val_sparse_categorical_accuracy: 0.9407\n", + "Epoch 2/6\n", + "469/469 [==============================] - 16s 14ms/step - loss: 0.5121 - sparse_categorical_accuracy: 0.8543 - val_loss: 0.2868 - val_sparse_categorical_accuracy: 0.9208\n", + "296/469 [=================>............] - ETA: 0s - loss: 0.2019 - sparse_categorical_accuracy: 0.9404Epoch 2/6\n", + "469/469 [==============================] - 2s 5ms/step - loss: 0.2190 - sparse_categorical_accuracy: 0.9385 - val_loss: 0.1947 - val_sparse_categorical_accuracy: 0.9430\n", + "243/469 [==============>...............] - ETA: 1s - loss: 0.2207 - sparse_categorical_accuracy: 0.9359Epoch 3/6\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.1926 - sparse_categorical_accuracy: 0.9432 - val_loss: 0.1692 - val_sparse_categorical_accuracy: 0.9502\n", "Epoch 3/6\n", - "137/469 [=======>......................] - ETA: 2s - loss: 0.1411 - sparse_categorical_accuracy: 0.9607Epoch 3/6\n", - "466/469 [============================>.] - ETA: 0s - loss: 0.2146 - sparse_categorical_accuracy: 0.9399Epoch 2/6\n", - "469/469 [==============================] - 5s 11ms/step - loss: 0.1325 - sparse_categorical_accuracy: 0.9621 - val_loss: 0.1244 - val_sparse_categorical_accuracy: 0.9632\n", - "Epoch 5/6\n", - " 1/469 [..............................] - ETA: 2:00 - loss: 0.1179 - sparse_categorical_accuracy: 0.9766Epoch 2/6\n", - "469/469 [==============================] - 5s 11ms/step - loss: 0.2146 - sparse_categorical_accuracy: 0.9399 - val_loss: 0.1891 - val_sparse_categorical_accuracy: 0.9453\n", - "Epoch 4/6\n", - " 36/469 [=>............................] - ETA: 5s - loss: 0.2037 - sparse_categorical_accuracy: 0.9418Epoch 2/6\n", - "469/469 [==============================] - 6s 11ms/step - loss: 0.1273 - sparse_categorical_accuracy: 0.9635 - val_loss: 0.1145 - val_sparse_categorical_accuracy: 0.9638\n", - "142/469 [========>.....................] - ETA: 3s - loss: 0.2129 - sparse_categorical_accuracy: 0.9404Epoch 4/6\n", - "469/469 [==============================] - 7s 15ms/step - loss: 0.1914 - sparse_categorical_accuracy: 0.9454 - val_loss: 0.1559 - val_sparse_categorical_accuracy: 0.9544\n", + "469/469 [==============================] - 4s 8ms/step - loss: 0.2056 - sparse_categorical_accuracy: 0.9409 - val_loss: 0.1634 - val_sparse_categorical_accuracy: 0.9507\n", "Epoch 3/6\n", - "469/469 [==============================] - 7s 14ms/step - loss: 0.2780 - sparse_categorical_accuracy: 0.9215 - val_loss: 0.2372 - val_sparse_categorical_accuracy: 0.9317\n", - "469/469 [==============================] - 8s 16ms/step - loss: 0.1106 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.1105 - val_sparse_categorical_accuracy: 0.9675\n", - "430/469 [==========================>...] - ETA: 0s - loss: 0.1022 - sparse_categorical_accuracy: 0.9698Epoch 6/6\n", - "469/469 [==============================] - 8s 15ms/step - loss: 0.1801 - sparse_categorical_accuracy: 0.9493 - val_loss: 0.1693 - val_sparse_categorical_accuracy: 0.9522\n", - " 35/469 [=>............................] - ETA: 2s - loss: 0.1041 - sparse_categorical_accuracy: 0.9690Epoch 5/6\n", - "469/469 [==============================] - 7s 14ms/step - loss: 0.2172 - sparse_categorical_accuracy: 0.9384 - val_loss: 0.1806 - val_sparse_categorical_accuracy: 0.9486\n", + "469/469 [==============================] - 4s 8ms/step - loss: 0.1765 - sparse_categorical_accuracy: 0.9489 - val_loss: 0.1405 - val_sparse_categorical_accuracy: 0.9589\n", + "469/469 [==============================] - 4s 9ms/step - loss: 0.2690 - sparse_categorical_accuracy: 0.9233 - val_loss: 0.2342 - val_sparse_categorical_accuracy: 0.9324\n", "Epoch 3/6\n", - "469/469 [==============================] - 7s 14ms/step - loss: 0.1021 - sparse_categorical_accuracy: 0.9697 - val_loss: 0.1086 - val_sparse_categorical_accuracy: 0.9661\n", - "Epoch 5/6\n", - "272/469 [================>.............] - ETA: 1s - loss: 0.1580 - sparse_categorical_accuracy: 0.9552Epoch 3/6\n", - "469/469 [==============================] - 4s 9ms/step - loss: 0.1461 - sparse_categorical_accuracy: 0.9574 - val_loss: 0.1325 - val_sparse_categorical_accuracy: 0.9617\n", - "331/469 [====================>.........] - ETA: 1s - loss: 0.0834 - sparse_categorical_accuracy: 0.9753Epoch 4/6\n", - "469/469 [==============================] - 4s 9ms/step - loss: 0.1660 - sparse_categorical_accuracy: 0.9528 - val_loss: 0.1494 - val_sparse_categorical_accuracy: 0.9563\n", - "Epoch 4/6\n", - "469/469 [==============================] - 5s 10ms/step - loss: 0.1565 - sparse_categorical_accuracy: 0.9557 - val_loss: 0.1514 - val_sparse_categorical_accuracy: 0.9547\n", - " 48/469 [==>...........................] - ETA: 2s - loss: 0.1362 - sparse_categorical_accuracy: 0.9627Epoch 6/6\n", - "469/469 [==============================] - 5s 10ms/step - loss: 0.0833 - sparse_categorical_accuracy: 0.9755 - val_loss: 0.0951 - val_sparse_categorical_accuracy: 0.9699\n", - "320/469 [===================>..........] - ETA: 1s - loss: 0.2365 - sparse_categorical_accuracy: 0.9347Epoch 6/6\n", - "469/469 [==============================] - 5s 11ms/step - loss: 0.0957 - sparse_categorical_accuracy: 0.9730 - val_loss: 0.1015 - val_sparse_categorical_accuracy: 0.9689\n", - "469/469 [==============================] - 5s 9ms/step - loss: 0.2308 - sparse_categorical_accuracy: 0.9354 - val_loss: 0.2063 - val_sparse_categorical_accuracy: 0.9425\n", + "469/469 [==============================] - 3s 7ms/step - loss: 0.1725 - sparse_categorical_accuracy: 0.9514 - val_loss: 0.1574 - val_sparse_categorical_accuracy: 0.9550\n", + " 84/469 [====>.........................] - ETA: 2s - loss: 0.2292 - sparse_categorical_accuracy: 0.9323Epoch 4/6\n", + "469/469 [==============================] - 3s 7ms/step - loss: 0.1440 - sparse_categorical_accuracy: 0.9579 - val_loss: 0.1253 - val_sparse_categorical_accuracy: 0.9631\n", "Epoch 4/6\n", - "469/469 [==============================] - 5s 9ms/step - loss: 0.1211 - sparse_categorical_accuracy: 0.9646 - val_loss: 0.1182 - val_sparse_categorical_accuracy: 0.9655\n", + "221/469 [=============>................] - ETA: 1s - loss: 0.1635 - sparse_categorical_accuracy: 0.9525Epoch 3/6\n", + "113/469 [======>.......................] - ETA: 1s - loss: 0.1428 - sparse_categorical_accuracy: 0.9583Epoch 2/6\n", + "469/469 [==============================] - 4s 8ms/step - loss: 0.1552 - sparse_categorical_accuracy: 0.9557 - val_loss: 0.1408 - val_sparse_categorical_accuracy: 0.9578\n", + "455/469 [============================>.] - ETA: 0s - loss: 0.1411 - sparse_categorical_accuracy: 0.9602Epoch 4/6\n", + "469/469 [==============================] - 3s 7ms/step - loss: 0.2198 - sparse_categorical_accuracy: 0.9381 - val_loss: 0.1971 - val_sparse_categorical_accuracy: 0.9432\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.1407 - sparse_categorical_accuracy: 0.9603 - val_loss: 0.1350 - val_sparse_categorical_accuracy: 0.9623\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.1177 - sparse_categorical_accuracy: 0.9659 - val_loss: 0.1115 - val_sparse_categorical_accuracy: 0.9675\n", + "437/469 [==========================>...] - ETA: 0s - loss: 0.2782 - sparse_categorical_accuracy: 0.9216Epoch 5/6\n", + "469/469 [==============================] - 3s 7ms/step - loss: 0.1296 - sparse_categorical_accuracy: 0.9632 - val_loss: 0.1169 - val_sparse_categorical_accuracy: 0.9652\n", + " 58/469 [==>...........................] - ETA: 1s - loss: 0.1079 - sparse_categorical_accuracy: 0.9674Epoch 4/6\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.2760 - sparse_categorical_accuracy: 0.9222 - val_loss: 0.2362 - val_sparse_categorical_accuracy: 0.9323\n", + "361/469 [======================>.......] - ETA: 0s - loss: 0.1000 - sparse_categorical_accuracy: 0.9704Epoch 4/6\n", + "469/469 [==============================] - 2s 5ms/step - loss: 0.1241 - sparse_categorical_accuracy: 0.9643 - val_loss: 0.1146 - val_sparse_categorical_accuracy: 0.9661\n", "Epoch 5/6\n", - "469/469 [==============================] - 4s 8ms/step - loss: 0.0853 - sparse_categorical_accuracy: 0.9763\n", - "469/469 [==============================] - 5s 11ms/step - loss: 0.1353 - sparse_categorical_accuracy: 0.9609 - val_loss: 0.1248 - val_sparse_categorical_accuracy: 0.9627\n", - "173/469 [==========>...................] - ETA: 2s - loss: 0.1068 - sparse_categorical_accuracy: 0.9694Epoch 5/6\n", - "469/469 [==============================] - 6s 12ms/step - loss: 0.1389 - sparse_categorical_accuracy: 0.9605 - val_loss: 0.1331 - val_sparse_categorical_accuracy: 0.9609\n", - "469/469 [==============================] - 6s 11ms/step - loss: 0.0712 - sparse_categorical_accuracy: 0.9793 - val_loss: 0.0885 - val_sparse_categorical_accuracy: 0.9712\n", - "79/79 [==============================] - 1s 6ms/step - loss: 0.1015 - sparse_categorical_accuracy: 0.9689\n", - "469/469 [==============================] - 5s 10ms/step - loss: 0.1973 - sparse_categorical_accuracy: 0.9447 - val_loss: 0.1837 - val_sparse_categorical_accuracy: 0.9467\n", - "217/469 [============>.................] - ETA: 1s - loss: 0.1289 - sparse_categorical_accuracy: 0.9644Epoch 5/6\n", - "469/469 [==============================] - 5s 9ms/step - loss: 0.1045 - sparse_categorical_accuracy: 0.9696 - val_loss: 0.1058 - val_sparse_categorical_accuracy: 0.9687\n", + "109/469 [=====>........................] - ETA: 1s - loss: 0.2055 - sparse_categorical_accuracy: 0.9414Epoch 5/6\n", + "469/469 [==============================] - 2s 4ms/step - loss: 0.1003 - sparse_categorical_accuracy: 0.9705 - val_loss: 0.1062 - val_sparse_categorical_accuracy: 0.9665\n", + "147/469 [========>.....................] - ETA: 1s - loss: 0.1988 - sparse_categorical_accuracy: 0.9439Epoch 6/6\n", + "119/469 [======>.......................] - ETA: 1s - loss: 0.1241 - sparse_categorical_accuracy: 0.9641Epoch 3/6\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.1030 - sparse_categorical_accuracy: 0.9703 - val_loss: 0.1046 - val_sparse_categorical_accuracy: 0.9687\n", + " 67/469 [===>..........................] - ETA: 2s - loss: 0.2337 - sparse_categorical_accuracy: 0.9356Epoch 5/6\n", + "469/469 [==============================] - 3s 7ms/step - loss: 0.1873 - sparse_categorical_accuracy: 0.9469 - val_loss: 0.1769 - val_sparse_categorical_accuracy: 0.9504\n", + "248/469 [==============>...............] - ETA: 1s - loss: 0.0810 - sparse_categorical_accuracy: 0.9773Epoch 5/6\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.1213 - sparse_categorical_accuracy: 0.9653 - val_loss: 0.1245 - val_sparse_categorical_accuracy: 0.9628\n", + "Epoch 6/6\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.0867 - sparse_categorical_accuracy: 0.9755 - val_loss: 0.0991 - val_sparse_categorical_accuracy: 0.9702\n", + "469/469 [==============================] - 3s 7ms/step - loss: 0.1038 - sparse_categorical_accuracy: 0.9709 - val_loss: 0.1101 - val_sparse_categorical_accuracy: 0.9670\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.2256 - sparse_categorical_accuracy: 0.9364 - val_loss: 0.2024 - val_sparse_categorical_accuracy: 0.9425\n", + "469/469 [==============================] - 3s 6ms/step - loss: 0.0848 - sparse_categorical_accuracy: 0.9758 - val_loss: 0.0951 - val_sparse_categorical_accuracy: 0.9703\n", + "469/469 [==============================] - 2s 3ms/step - loss: 0.0748 - sparse_categorical_accuracy: 0.9791\n", + "469/469 [==============================] - 2s 4ms/step - loss: 0.1070 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.1089 - val_sparse_categorical_accuracy: 0.9682\n", "Epoch 6/6\n", - "469/469 [==============================] - 4s 8ms/step - loss: 0.1281 - sparse_categorical_accuracy: 0.9639\n", - "469/469 [==============================] - 4s 7ms/step - loss: 0.0626 - sparse_categorical_accuracy: 0.9826\n", - "469/469 [==============================] - 5s 10ms/step - loss: 0.1167 - sparse_categorical_accuracy: 0.9655 - val_loss: 0.1198 - val_sparse_categorical_accuracy: 0.9660\n", - "216/469 [============>.................] - ETA: 2s - loss: 0.1815 - sparse_categorical_accuracy: 0.9489Epoch 6/6\n", - "79/79 [==============================] - 1s 8ms/step - loss: 0.0885 - sparse_categorical_accuracy: 0.9712\n", - "79/79 [==============================] - 1s 9ms/step - loss: 0.1331 - sparse_categorical_accuracy: 0.9609\n", - "469/469 [==============================] - 5s 11ms/step - loss: 0.1751 - sparse_categorical_accuracy: 0.9509 - val_loss: 0.1667 - val_sparse_categorical_accuracy: 0.9519\n", - "269/469 [================>.............] - ETA: 2s - loss: 0.1039 - sparse_categorical_accuracy: 0.9699Epoch 6/6\n", - "469/469 [==============================] - 5s 11ms/step - loss: 0.0917 - sparse_categorical_accuracy: 0.9735 - val_loss: 0.1031 - val_sparse_categorical_accuracy: 0.9693\n", - "469/469 [==============================] - 4s 9ms/step - loss: 0.1036 - sparse_categorical_accuracy: 0.9693 - val_loss: 0.1052 - val_sparse_categorical_accuracy: 0.9679\n", - "469/469 [==============================] - 2s 5ms/step - loss: 0.1571 - sparse_categorical_accuracy: 0.9559 - val_loss: 0.1545 - val_sparse_categorical_accuracy: 0.9562\n", - "469/469 [==============================] - 2s 3ms/step - loss: 0.0900 - sparse_categorical_accuracy: 0.9747\n", - "79/79 [==============================] - 0s 2ms/step - loss: 0.1052 - sparse_categorical_accuracy: 0.9679\n" + "469/469 [==============================] - 2s 5ms/step - loss: 0.1628 - sparse_categorical_accuracy: 0.9542 - val_loss: 0.1559 - val_sparse_categorical_accuracy: 0.9545\n", + "Epoch 6/6\n", + "311/469 [==================>...........] - ETA: 0s - loss: 0.0950 - sparse_categorical_accuracy: 0.9729Epoch 4/6\n", + "79/79 [==============================] - 0s 3ms/step - loss: 0.0991 - sparse_categorical_accuracy: 0.9702\n", + " 84/469 [====>.........................] - ETA: 1s - loss: 0.2051 - sparse_categorical_accuracy: 0.9418Epoch 6/6\n", + "469/469 [==============================] - 1s 3ms/step - loss: 0.0933 - sparse_categorical_accuracy: 0.9735\n", + "469/469 [==============================] - 3s 5ms/step - loss: 0.0900 - sparse_categorical_accuracy: 0.9743 - val_loss: 0.0938 - val_sparse_categorical_accuracy: 0.9720\n", + "469/469 [==============================] - 3s 5ms/step - loss: 0.1441 - sparse_categorical_accuracy: 0.9596 - val_loss: 0.1390 - val_sparse_categorical_accuracy: 0.9595\n", + "79/79 [==============================] - 0s 3ms/step - loss: 0.1089 - sparse_categorical_accuracy: 0.9682\n", + "469/469 [==============================] - 2s 5ms/step - loss: 0.1951 - sparse_categorical_accuracy: 0.9449 - val_loss: 0.1784 - val_sparse_categorical_accuracy: 0.9480\n", + "Epoch 5/6\n", + "469/469 [==============================] - 3s 5ms/step - loss: 0.0724 - sparse_categorical_accuracy: 0.9797 - val_loss: 0.0847 - val_sparse_categorical_accuracy: 0.9726\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "469/469 [==============================] - 1s 2ms/step - loss: 0.1467 - sparse_categorical_accuracy: 0.9588\n", - "79/79 [==============================] - 0s 2ms/step - loss: 0.1545 - sparse_categorical_accuracy: 0.9562\n", - "469/469 [==============================] - 1s 1ms/step - loss: 0.0818 - sparse_categorical_accuracy: 0.9765\n", - "79/79 [==============================] - 0s 1ms/step - loss: 0.1031 - sparse_categorical_accuracy: 0.9693\n" + "469/469 [==============================] - 2s 3ms/step - loss: 0.1309 - sparse_categorical_accuracy: 0.9635\n", + "79/79 [==============================] - 0s 3ms/step - loss: 0.1390 - sparse_categorical_accuracy: 0.9595\n", + "460/469 [============================>.] - ETA: 0s - loss: 0.1721 - sparse_categorical_accuracy: 0.9512" ] } ], @@ -558,148 +435,10 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "0b55725a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
run_iditerationn_neuronslearning_ratedropout_prob_indropout_prob_hiddenaccuracy_trainaccuracy_test
00000000
010640.0050.00.00.9587670.9562
120640.0100.00.00.9747000.9679
230640.0150.00.00.9764500.9693
3401280.0050.00.00.9638830.9609
4501280.0100.00.00.9762830.9689
5601280.0150.00.00.9826170.9712
\n", - "
" - ], - "text/plain": [ - " run_id iteration n_neurons learning_rate dropout_prob_in \\\n", - " 0 0 0 0 0 \n", - "0 1 0 64 0.005 0.0 \n", - "1 2 0 64 0.010 0.0 \n", - "2 3 0 64 0.015 0.0 \n", - "3 4 0 128 0.005 0.0 \n", - "4 5 0 128 0.010 0.0 \n", - "5 6 0 128 0.015 0.0 \n", - "\n", - " dropout_prob_hidden accuracy_train accuracy_test \n", - " 0 0 0 \n", - "0 0.0 0.958767 0.9562 \n", - "1 0.0 0.974700 0.9679 \n", - "2 0.0 0.976450 0.9693 \n", - "3 0.0 0.963883 0.9609 \n", - "4 0.0 0.976283 0.9689 \n", - "5 0.0 0.982617 0.9712 " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "data_frame = campaign.get_collation_result()\n", "data_frame" @@ -715,68 +454,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "99ba74e2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best hyperparameters with 97.12% test accuracy:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
n_neuronslearning_rate
00
51280.015
\n", - "
" - ], - "text/plain": [ - " n_neurons learning_rate\n", - " 0 0\n", - "5 128 0.015" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "print(\"Best hyperparameters with %.2f%% test accuracy:\" % (data_frame['accuracy_test'].max().values * 100,))\n", "data_frame.loc[data_frame['accuracy_test'].idxmax()][vary.keys()]" diff --git a/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb b/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb index 737a1ba0c..41825d41e 100644 --- a/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb +++ b/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb @@ -51,7 +51,7 @@ "\n", "The advantage of this construction is that we could offload the ensemble to a remote supercomputer using this same script by simply changing the `MACHINE='localhost'` flag, provided that FabSIm3 is set up on the remote resource.\n", "\n", - "For an example **without FabSim3**, see XXX.\n", + "For an example **without FabSim3**, see `tutorials/hyperparameter_tuning_tutorial.ipynb`.\n", "\n", "For now, import the required libraries below. `fabsim3_cmd_api` is an interface with fabSim3 such that the command-line FabSim3 commands can be executed in a Python script. It is stored locally in `fabsim3_cmd_api.py`." ] @@ -316,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "7b9cb1b8", "metadata": {}, "outputs": [ @@ -324,102 +324,80 @@ "name": "stdout", "output_type": "stream", "text": [ - "Executing fabsim localhost run_uq_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,script=grid_search,skip=0,PJ=False\n" + "Executing fabsim localhost run_uq_ensemble:grid_search,campaign_dir=/tmp/grid_testrebm6ntq,script=grid_search,skip=0,PJ=False\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-02-08 15:38:16.535325: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:35:56.557670: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:16.686716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:16.686745: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:38:17.563526: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:17.563594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:17.563603: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:38:19.173964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:19.173991: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:38:19.174006: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:38:19.174233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:35:56.725197: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:35:56.725224: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:35:57.644413: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:35:57.644488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:35:57.644497: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:35:59.393841: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:35:59.393866: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:35:59.393886: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:35:59.394178: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:30.534364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:12.314798: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:30.678101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:30.678126: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:38:31.523915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:31.523979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:31.523987: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:38:33.109661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:33.109685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:38:33.109702: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:38:33.109951: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:12.475403: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:12.475430: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:36:13.409427: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:13.409501: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:13.409511: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:36:15.210445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:15.210470: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:36:15.210490: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:36:15.210784: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:46.244240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:27.814654: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:38:46.391594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:46.391621: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:38:47.261374: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:47.261439: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:47.261448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:38:48.916947: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:38:48.916972: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:38:48.916988: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:38:48.917225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:27.985756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:27.985783: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:36:28.926507: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:28.926585: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:28.926596: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:36:30.685925: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:30.685950: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:36:30.685969: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:36:30.686252: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:04.143172: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:42.235332: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:04.290000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:04.290025: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:39:05.206811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:05.206880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:05.206889: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + "2023-03-02 11:36:42.397849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:42.397876: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:36:43.325167: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:43.325318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:43.325331: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-02-08 15:39:06.943849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:06.943870: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:39:06.943886: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:39:06.944141: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:20.118667: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:45.073851: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:45.073875: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:36:45.073894: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:36:45.074174: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:20.263838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:20.263863: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:39:21.105383: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:21.105447: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:21.105456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:39:22.695205: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:22.695230: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:39:22.695246: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:39:22.695475: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:56.730036: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:37.156612: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-02-08 15:39:37.302893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:37.302914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", - "2023-02-08 15:39:38.146639: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:38.146704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:38.146713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", - "2023-02-08 15:39:39.743663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", - "2023-02-08 15:39:39.743685: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", - "2023-02-08 15:39:39.743701: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", - "2023-02-08 15:39:39.743931: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "2023-03-02 11:36:56.899197: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:56.899225: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:36:57.892828: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:57.892931: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:57.892948: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:36:59.710915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:36:59.710945: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:36:59.710971: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:36:59.711346: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -436,19 +414,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "9d2c0ddb", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Executing fabsim localhost fetch_results\n", - "Executing fabsim localhost verify_last_ensemble:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,target_filename=output.csv,machine=localhost\n" - ] - } - ], + "outputs": [], "source": [ "# check if all output files are retrieved from the remote machine, returns a Boolean flag\n", "all_good = fab.verify(CONFIG, campaign.campaign_dir, TARGET_FILENAME, machine=MACHINE)" @@ -456,18 +425,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "c2b9838b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Executing fabsim localhost get_uq_samples:grid_search,campaign_dir=/tmp/grid_testsov3dmzm,number_of_samples=6,skip=0\n" - ] - } - ], + "outputs": [], "source": [ "if all_good:\n", " # copy the results from the FabSim results dir to the EasyVVUQ results dir\n", @@ -508,148 +469,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "0b55725a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
run_iditerationn_neuronslearning_ratedropout_prob_indropout_prob_hiddenaccuracy_trainaccuracy_test
00000000
010640.0050.00.00.9585000.9543
120640.0100.00.00.9731830.9656
230640.0150.00.00.9784500.9715
3401280.0050.00.00.9632830.9599
4501280.0100.00.00.9774670.9710
5601280.0150.00.00.9846000.9745
\n", - "
" - ], - "text/plain": [ - " run_id iteration n_neurons learning_rate dropout_prob_in \\\n", - " 0 0 0 0 0 \n", - "0 1 0 64 0.005 0.0 \n", - "1 2 0 64 0.010 0.0 \n", - "2 3 0 64 0.015 0.0 \n", - "3 4 0 128 0.005 0.0 \n", - "4 5 0 128 0.010 0.0 \n", - "5 6 0 128 0.015 0.0 \n", - "\n", - " dropout_prob_hidden accuracy_train accuracy_test \n", - " 0 0 0 \n", - "0 0.0 0.958500 0.9543 \n", - "1 0.0 0.973183 0.9656 \n", - "2 0.0 0.978450 0.9715 \n", - "3 0.0 0.963283 0.9599 \n", - "4 0.0 0.977467 0.9710 \n", - "5 0.0 0.984600 0.9745 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#############################################\n", "# All output files are present, decode them #\n", @@ -686,68 +509,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "99ba74e2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best hyperparameters with 97.45% test accuracy:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
n_neuronslearning_rate
00
51280.015
\n", - "
" - ], - "text/plain": [ - " n_neurons learning_rate\n", - " 0 0\n", - "5 128 0.015" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "print(\"Best hyperparameters with %.2f%% test accuracy:\" % (data_frame['accuracy_test'].max().values * 100,))\n", "data_frame.loc[data_frame['accuracy_test'].idxmax()][vary.keys()]" From 4e194e07796c77cce01db4bea6ece5a070660f78 Mon Sep 17 00:00:00 2001 From: "wouteredeling@gmail.com" Date: Thu, 2 Mar 2023 13:41:12 +0100 Subject: [PATCH 7/7] changed template test --- .../{test_grid.py => test_grid.template} | 0 tests/test_grid_sampler.py | 2 +- ...arameter_tuning_tutorial_with_fabsim.ipynb | 253 +++++++++++++++++- 3 files changed, 245 insertions(+), 10 deletions(-) rename tests/grid_search/{test_grid.py => test_grid.template} (100%) diff --git a/tests/grid_search/test_grid.py b/tests/grid_search/test_grid.template similarity index 100% rename from tests/grid_search/test_grid.py rename to tests/grid_search/test_grid.template diff --git a/tests/test_grid_sampler.py b/tests/test_grid_sampler.py index 9977a6400..9c0f4c05f 100644 --- a/tests/test_grid_sampler.py +++ b/tests/test_grid_sampler.py @@ -19,7 +19,7 @@ def campaign(): params["x2"] = {"type": "boolean", "default": True} # python file is its own template - encoder = uq.encoders.GenericEncoder('tests/grid_search/test_grid.py', + encoder = uq.encoders.GenericEncoder('tests/grid_search/test_grid.template', target_filename='test_grid.py') execute = ExecuteLocal("python3 test_grid.py") diff --git a/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb b/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb index 41825d41e..84adb83dc 100644 --- a/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb +++ b/tutorials/hyperparameter_tuning_tutorial_with_fabsim.ipynb @@ -316,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "7b9cb1b8", "metadata": {}, "outputs": [ @@ -396,8 +396,30 @@ "2023-03-02 11:36:59.710945: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", "2023-03-02 11:36:59.710971: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", "2023-03-02 11:36:59.711346: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-02 11:37:11.878783: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-02 11:37:12.045043: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:37:12.045066: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n", + "2023-03-02 11:37:12.946743: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:37:12.946813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:37:12.946822: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "2023-03-02 11:37:14.633522: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", + "2023-03-02 11:37:14.633546: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)\n", + "2023-03-02 11:37:14.633564: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (wouter-XPS-13-7390): /proc/driver/nvidia/version does not exist\n", + "2023-03-02 11:37:14.633830: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -414,10 +436,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "9d2c0ddb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost fetch_results\n", + "Executing fabsim localhost verify_last_ensemble:grid_search,campaign_dir=/tmp/grid_testrebm6ntq,target_filename=output.csv,machine=localhost\n" + ] + } + ], "source": [ "# check if all output files are retrieved from the remote machine, returns a Boolean flag\n", "all_good = fab.verify(CONFIG, campaign.campaign_dir, TARGET_FILENAME, machine=MACHINE)" @@ -425,10 +456,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "c2b9838b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Executing fabsim localhost get_uq_samples:grid_search,campaign_dir=/tmp/grid_testrebm6ntq,number_of_samples=6,skip=0\n" + ] + } + ], "source": [ "if all_good:\n", " # copy the results from the FabSim results dir to the EasyVVUQ results dir\n", @@ -469,10 +508,148 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "0b55725a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
run_iditerationn_neuronslearning_ratedropout_prob_indropout_prob_hiddenaccuracy_trainaccuracy_test
00000000
010640.0050.00.00.9592670.9544
120640.0100.00.00.9741330.9653
230640.0150.00.00.9797170.9712
3401280.0050.00.00.9633330.9592
4501280.0100.00.00.9786670.9718
5601280.0150.00.00.9836500.9744
\n", + "
" + ], + "text/plain": [ + " run_id iteration n_neurons learning_rate dropout_prob_in \\\n", + " 0 0 0 0 0 \n", + "0 1 0 64 0.005 0.0 \n", + "1 2 0 64 0.010 0.0 \n", + "2 3 0 64 0.015 0.0 \n", + "3 4 0 128 0.005 0.0 \n", + "4 5 0 128 0.010 0.0 \n", + "5 6 0 128 0.015 0.0 \n", + "\n", + " dropout_prob_hidden accuracy_train accuracy_test \n", + " 0 0 0 \n", + "0 0.0 0.959267 0.9544 \n", + "1 0.0 0.974133 0.9653 \n", + "2 0.0 0.979717 0.9712 \n", + "3 0.0 0.963333 0.9592 \n", + "4 0.0 0.978667 0.9718 \n", + "5 0.0 0.983650 0.9744 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "#############################################\n", "# All output files are present, decode them #\n", @@ -509,10 +686,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "99ba74e2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparameters with 97.44% test accuracy:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_neuronslearning_rate
00
51280.015
\n", + "
" + ], + "text/plain": [ + " n_neurons learning_rate\n", + " 0 0\n", + "5 128 0.015" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "print(\"Best hyperparameters with %.2f%% test accuracy:\" % (data_frame['accuracy_test'].max().values * 100,))\n", "data_frame.loc[data_frame['accuracy_test'].idxmax()][vary.keys()]"