-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Profiling of neural networks #26
Changes from 2 commits
3e70801
778c47c
d2e5ea3
9f7bb55
44eaf7b
970da2b
e2ff8c8
6eaa70d
3bf6f10
45a674f
b75a200
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -5,6 +5,7 @@ | |||||
|
||||||
import emloop as el | ||||||
import tensorflow as tf | ||||||
from tensorflow.python.client import timeline | ||||||
|
||||||
from .graph_tower import GraphTower | ||||||
from .model import BaseModel | ||||||
|
@@ -29,16 +30,18 @@ class FrozenModel(el.AbstractModel): | |||||
""" | ||||||
|
||||||
def __init__(self, | ||||||
inputs: List[str], outputs: List[str], restore_from: str, | ||||||
session_config: Optional[dict]=None, n_gpus: int=0, **_): | ||||||
log_dir: str, inputs: List[str], outputs: List[str], restore_from: str, | ||||||
session_config: Optional[dict]=None, n_gpus: int=0, profile: bool=False, **_): | ||||||
""" | ||||||
Initialize new :py:class:`FrozenModel` instance. | ||||||
|
||||||
:param log_dir: path to the logging directory (wherein models should be saved) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This docstring is inaccurate as |
||||||
:param inputs: model input names | ||||||
:param outputs: model output names | ||||||
:param restore_from: restore model path (either a dir or a .pb file) | ||||||
:param session_config: TF session configuration dict | ||||||
:param n_gpus: number of GPUs to use (either 0 or 1) | ||||||
:param profile: whether profile.json should be saved to log_dir | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
""" | ||||||
super().__init__(None, '', restore_from) | ||||||
assert 0 <= n_gpus <= 1, 'FrozenModel can be used only with n_gpus=0 or n_gpus=1' | ||||||
|
@@ -50,6 +53,7 @@ def __init__(self, | |||||
self._graph = tf.Graph() | ||||||
if session_config: | ||||||
session_config = tf.ConfigProto(**session_config) | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this intentional? |
||||||
self._session = tf.Session(graph=self._graph, config=session_config) | ||||||
|
||||||
with self._graph.as_default(): | ||||||
|
@@ -60,6 +64,10 @@ def __init__(self, | |||||
except KeyError: | ||||||
self._is_training = tf.placeholder(tf.bool, [], BaseModel.TRAINING_FLAG_NAME) | ||||||
|
||||||
self._profile = profile | ||||||
self._log_dir = log_dir | ||||||
|
||||||
|
||||||
def run(self, batch: el.Batch, train: bool=False, stream: el.datasets.StreamWrapper=None) -> Mapping[str, object]: | ||||||
""" | ||||||
Run the model with the given ``batch``. | ||||||
|
@@ -83,7 +91,19 @@ def run(self, batch: el.Batch, train: bool=False, stream: el.datasets.StreamWrap | |||||
for output_name in self.output_names: | ||||||
fetches.append(self._tower[output_name]) | ||||||
|
||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict) | ||||||
if self._profile: | ||||||
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) | ||||||
run_metadata = tf.RunMetadata() | ||||||
|
||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict, | ||||||
options=run_options, run_metadata=run_metadata) | ||||||
|
||||||
with open(path.join(self._log_dir, "profile.json"), "w") as ofile: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
tl = timeline.Timeline(run_metadata.step_stats) | ||||||
ofile.write(tl.generate_chrome_trace_format()) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto statistics from a single call. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, this is a bit tricky. Both first (warm-up) and last (possibly smaller batch) profiles may be inaccurate. So what do we want to actually save? I guess keeping last |
||||||
|
||||||
else: | ||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict) | ||||||
|
||||||
return dict(zip(self.output_names, outputs)) | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -8,6 +8,7 @@ | |||||
import numpy as np | ||||||
import emloop as el | ||||||
import tensorflow as tf | ||||||
from tensorflow.python.client import timeline | ||||||
|
||||||
from .third_party.tensorflow.freeze_graph import freeze_graph | ||||||
from .third_party.tensorflow.average_gradients import average_gradients | ||||||
|
@@ -44,7 +45,7 @@ def __init__(self, # pylint: disable=too-many-arguments | |||||
dataset: Optional[el.AbstractDataset], log_dir: Optional[str], inputs: List[str], outputs: List[str], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would make the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this in some way related to this pull request? |
||||||
session_config: Optional[dict]=None, n_gpus: int=0, restore_from: Optional[str]=None, | ||||||
optimizer=None, freeze=False, loss_name: str=DEFAULT_LOSS_NAME, monitor: Optional[str]=None, | ||||||
restore_fallback: Optional[str]=None, clip_gradient: Optional[float]=None, | ||||||
restore_fallback: Optional[str]=None, clip_gradient: Optional[float]=None, profile: bool=False, | ||||||
**kwargs): | ||||||
""" | ||||||
Create new emloop trainable TensorFlow model. | ||||||
|
@@ -82,6 +83,7 @@ def __init__(self, # pylint: disable=too-many-arguments | |||||
:param monitor: monitor signal mean and variance of the tensors which names contain the specified value | ||||||
:param restore_fallback: ignored arg. (allows training from configs saved by emloop where it is added) | ||||||
:param clip_gradient: limit the absolute value of the gradient; set to None for no clipping | ||||||
:param profile: whether profile.json should be saved to log_dir | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto description |
||||||
:param kwargs: additional kwargs forwarded to :py:meth:`_create_model` | ||||||
""" | ||||||
super().__init__(dataset=dataset, log_dir=log_dir, restore_from=restore_from) | ||||||
|
@@ -91,6 +93,7 @@ def __init__(self, # pylint: disable=too-many-arguments | |||||
self._log_dir = log_dir | ||||||
self._freeze_graph = freeze | ||||||
self._clip_gradient = clip_gradient | ||||||
self._profile = profile | ||||||
self._loss_name = loss_name | ||||||
self._train_ops = [] | ||||||
self._graph = self._saver = None | ||||||
|
@@ -223,12 +226,25 @@ def run(self, batch: el.Batch, train: bool=False, stream: el.datasets.StreamWrap | |||||
for output_name in self.output_names: | ||||||
fetches.append(tower[output_name]) | ||||||
|
||||||
run_options = None | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is more or less the same code as in |
||||||
run_metadata = None | ||||||
if self._profile: | ||||||
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) | ||||||
run_metadata = tf.RunMetadata() | ||||||
|
||||||
# run the computational graph for one batch and allow buffering in the meanwhile | ||||||
if stream is not None: | ||||||
with stream.allow_buffering: | ||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict) | ||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict, | ||||||
options=run_options, run_metadata=run_metadata) | ||||||
else: | ||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict) | ||||||
outputs = self._session.run(fetches=fetches, feed_dict=feed_dict, | ||||||
options=run_options, run_metadata=run_metadata) | ||||||
|
||||||
if self._profile: | ||||||
with open(path.join(self._log_dir, "profile.json"), "w") as ofile: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
tl = timeline.Timeline(run_metadata.step_stats) | ||||||
ofile.write(tl.generate_chrome_trace_format()) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to overwrite the profile file on every call to |
||||||
|
||||||
if train: | ||||||
outputs = outputs[1:] | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -18,24 +18,24 @@ | |||||
def test_frozen_model_restore(tmpdir): | ||||||
"""Test frozen model restoration.""" | ||||||
with pytest.raises(ValueError): | ||||||
FrozenModel(inputs=[], outputs=[], restore_from=tmpdir) # there is no .pb file yet | ||||||
FrozenModel(log_dir="/dev/null", inputs=[], outputs=[], restore_from=tmpdir) # there is no .pb file yet | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
dummy_model = TrainableModel(dataset=None, log_dir=tmpdir, **_IO, freeze=True, optimizer=_OPTIMIZER) | ||||||
dummy_model.save('') | ||||||
|
||||||
# restore from directory | ||||||
FrozenModel(**_IO, restore_from=tmpdir) | ||||||
FrozenModel(log_dir="/dev/null", **_IO, restore_from=tmpdir) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
# restore from file | ||||||
FrozenModel(**_IO, restore_from=path.join(tmpdir, 'model.pb')) | ||||||
FrozenModel(log_dir="/dev/null", **_IO, restore_from=path.join(tmpdir, 'model.pb')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
# wrong configurations | ||||||
dummy_model.save('another') | ||||||
with pytest.raises(ValueError): | ||||||
FrozenModel(**_IO, restore_from=tmpdir) # multiple .pb files | ||||||
FrozenModel(log_dir="/dev/null", **_IO, restore_from=tmpdir) # multiple .pb files | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
with pytest.raises(ValueError): | ||||||
FrozenModel(**_IO, restore_from='/something/that/does/not/exist') | ||||||
FrozenModel(log_dir="/dev/null", **_IO, restore_from='/something/that/does/not/exist') | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
|
||||||
def test_frozen_model_misc(tmpdir): | ||||||
|
@@ -44,7 +44,7 @@ def test_frozen_model_misc(tmpdir): | |||||
dummy_model.save('') | ||||||
|
||||||
# restore from directory | ||||||
frozen_model = FrozenModel(**_IO, restore_from=tmpdir, session_config={'allow_soft_placement': True}) | ||||||
frozen_model = FrozenModel(log_dir="/dev/null", **_IO, restore_from=tmpdir, session_config={'allow_soft_placement': True}) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line too long. |
||||||
|
||||||
assert frozen_model.restore_fallback == 'emloop_tensorflow.FrozenModel' | ||||||
assert frozen_model.input_names == _IO['inputs'] | ||||||
|
@@ -63,7 +63,7 @@ def test_frozen_model_run(tmpdir): | |||||
mainloop.run_training(None) | ||||||
model.save('') | ||||||
|
||||||
frozen_model = FrozenModel(inputs=['input'], outputs=['output'], restore_from=tmpdir) | ||||||
frozen_model = FrozenModel(log_dir="/dev/null", inputs=['input'], outputs=['output'], restore_from=tmpdir) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
with pytest.raises(AssertionError): | ||||||
frozen_model.run({}, True, None) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
log_dir
argument should be rather optional as it was previously if I am not mistaken. Of course, we should sanitize arguments similarly to this