feat(tuner): add plot function for tuner.summary (#167)

* feat(tuner): add plot function for tuner.summary * feat(tuner): add plot function for tuner.summary
jina-ai · Oct 24, 2021 · 115a0aa · 115a0aa
1 parent 1c5d00c
commit 115a0aa
Show file tree

Hide file tree

Showing 9 changed files with 132 additions and 54 deletions.
diff --git a/finetuner/__init__.py b/finetuner/__init__.py
@@ -10,7 +10,7 @@
 
 if TYPE_CHECKING:
  from .helper import AnyDNN, DocumentArrayLike
- from .tuner.summary import SummaryCollection
+ from .tuner.summary import Summary
 
 
 # fit interface generated from Tuner
@@ -26,7 +26,7 @@ def fit(
  optimizer: str = 'adam',
  optimizer_kwargs: Optional[Dict] = None,
  device: str = 'cpu',
-) -> 'SummaryCollection':
+) -> 'Summary':
  ...
 
 
@@ -49,7 +49,7 @@ def fit(
  output_dim: Optional[int] = None,
  freeze: bool = False,
  device: str = 'cpu',
-) -> 'SummaryCollection':
+) -> 'Summary':
  ...
 
 
@@ -97,7 +97,7 @@ def fit(
 
 def fit(
  model: 'AnyDNN', train_data: 'DocumentArrayLike', *args, **kwargs
-) -> Optional['SummaryCollection']:
+) -> Optional['Summary']:
  if kwargs.get('to_embedding_model', False):
  from .tailor import to_embedding_model
 

diff --git a/finetuner/tuner/__init__.py b/finetuner/tuner/__init__.py
@@ -4,7 +4,7 @@
 
 if TYPE_CHECKING:
  from .base import BaseTuner
- from .summary import SummaryCollection
+ from .summary import Summary
 
 
 def _get_tuner_class(dnn_model: AnyDNN) -> Type['BaseTuner']:
@@ -36,7 +36,7 @@ def fit(
  optimizer_kwargs: Optional[Dict] = None,
  device: str = 'cpu',
  **kwargs,
-) -> 'SummaryCollection':
+) -> 'Summary':
  """Finetune the model on the training data.
 
  :param embed_model: an embedding model

diff --git a/finetuner/tuner/base.py b/finetuner/tuner/base.py
@@ -9,7 +9,7 @@
 )
 
 from ..helper import AnyDNN, AnyDataLoader, AnyOptimizer, DocumentArrayLike
-from .summary import SummaryCollection
+from .summary import Summary
 
 
 class BaseLoss:
@@ -101,7 +101,7 @@ def fit(
  batch_size: int = 256,
  *args,
  **kwargs,
- ) -> SummaryCollection:
+ ) -> Summary:
  """Fit the :py:attr:`.embed_model` on labeled data.
 
  Note that fitting changes the weights in :py:attr:`.embed_model` in-place. This allows one to consecutively

diff --git a/finetuner/tuner/keras/__init__.py b/finetuner/tuner/keras/__init__.py
@@ -8,7 +8,7 @@
 from . import losses, datasets
 from ..base import BaseTuner, BaseLoss
 from ..dataset.helper import get_dataset
-from ..summary import ScalarSummary, SummaryCollection
+from ..summary import ScalarSequence, Summary
 from ...helper import DocumentArrayLike, AnyDataLoader
 
 
@@ -64,10 +64,10 @@ def _get_optimizer(
 
  def _train(
  self, data: AnyDataLoader, optimizer: Optimizer, description: str
- ) -> ScalarSummary:
+ ) -> ScalarSequence:
  """Train the model on given labeled data"""
 
- _summary = ScalarSummary('Train Loss')
+ _summary = ScalarSequence('Train Loss')
  with ProgressBar(
  description,
  message_on_done=_summary.__str__,
@@ -96,11 +96,11 @@ def _eval(
  self,
  data: AnyDataLoader,
  description: str = 'Evaluating',
- train_loss: Optional[ScalarSummary] = None,
- ) -> ScalarSummary:
+ train_loss: Optional[ScalarSequence] = None,
+ ) -> ScalarSequence:
  """Evaluate the model on given labeled data"""
 
- _summary = ScalarSummary('Eval Loss')
+ _summary = ScalarSequence('Eval Loss')
 
  with ProgressBar(
  description,
@@ -130,7 +130,7 @@ def fit(
  optimizer_kwargs: Optional[Dict] = None,
  device: str = 'cpu',
  **kwargs,
- ) -> SummaryCollection:
+ ) -> Summary:
  """Finetune the model on the training data.
 
  :param train_data: Data on which to train the model
@@ -171,8 +171,8 @@ def fit(
 
  _optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)
 
- m_train_loss = ScalarSummary('train')
- m_eval_loss = ScalarSummary('eval')
+ m_train_loss = ScalarSequence('train')
+ m_eval_loss = ScalarSequence('eval')
 
  with get_device(device):
  for epoch in range(epochs):
@@ -187,7 +187,7 @@ def fit(
  le = self._eval(_eval_data, train_loss=m_train_loss)
  m_eval_loss += le
 
- return SummaryCollection(m_train_loss, m_eval_loss)
+ return Summary(m_train_loss, m_eval_loss)
 
  def save(self, *args, **kwargs):
  """Save the embedding model.

diff --git a/finetuner/tuner/paddle/__init__.py b/finetuner/tuner/paddle/__init__.py
@@ -8,7 +8,7 @@
 from . import losses, datasets
 from ..base import BaseTuner, BaseLoss
 from ..dataset.helper import get_dataset
-from ..summary import ScalarSummary, SummaryCollection
+from ..summary import ScalarSequence, Summary
 from ...helper import DocumentArrayLike, AnyDataLoader
 
 
@@ -63,13 +63,13 @@ def _eval(
  self,
  data: AnyDataLoader,
  description: str = 'Evaluating',
- train_loss: Optional[ScalarSummary] = None,
- ) -> ScalarSummary:
+ train_loss: Optional[ScalarSequence] = None,
+ ) -> ScalarSequence:
  """Evaluate the model on given labeled data"""
 
  self._embed_model.eval()
 
- _summary = ScalarSummary('Eval Loss')
+ _summary = ScalarSequence('Eval Loss')
 
  with ProgressBar(
  description,
@@ -90,12 +90,12 @@ def _eval(
 
  def _train(
  self, data: AnyDataLoader, optimizer: Optimizer, description: str
- ) -> ScalarSummary:
+ ) -> ScalarSequence:
  """Train the model on given labeled data"""
 
  self._embed_model.train()
 
- _summary = ScalarSummary('Train Loss')
+ _summary = ScalarSequence('Train Loss')
  with ProgressBar(
  description,
  message_on_done=_summary.__str__,
@@ -130,7 +130,7 @@ def fit(
  optimizer_kwargs: Optional[Dict] = None,
  device: str = 'cpu',
  **kwargs,
- ) -> SummaryCollection:
+ ) -> Summary:
  """Finetune the model on the training data.
 
  :param train_data: Data on which to train the model
@@ -164,8 +164,8 @@ def fit(
 
  _optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)
 
- m_train_loss = ScalarSummary('train')
- m_eval_loss = ScalarSummary('eval')
+ m_train_loss = ScalarSequence('train')
+ m_eval_loss = ScalarSequence('eval')
 
  for epoch in range(epochs):
  _data = self._get_data_loader(
@@ -186,7 +186,7 @@ def fit(
  le = self._eval(_data, train_loss=m_train_loss)
  m_eval_loss += le
 
- return SummaryCollection(m_train_loss, m_eval_loss)
+ return Summary(m_train_loss, m_eval_loss)
 
  def save(self, *args, **kwargs):
  """Save the embedding model.

diff --git a/finetuner/tuner/pytorch/__init__.py b/finetuner/tuner/pytorch/__init__.py
@@ -8,7 +8,7 @@
 from . import losses, datasets
 from ..base import BaseTuner, BaseLoss
 from ..dataset.helper import get_dataset
-from ..summary import ScalarSummary, SummaryCollection
+from ..summary import ScalarSequence, Summary
 from ...helper import DocumentArrayLike, AnyDataLoader
 
 
@@ -67,13 +67,13 @@ def _eval(
  self,
  data: AnyDataLoader,
  description: str = 'Evaluating',
- train_loss: Optional[ScalarSummary] = None,
- ) -> ScalarSummary:
+ train_loss: Optional[ScalarSequence] = None,
+ ) -> ScalarSequence:
  """Evaluate the model on given labeled data"""
 
  self._embed_model.eval()
 
- _summary = ScalarSummary('Eval Loss')
+ _summary = ScalarSequence('Eval Loss')
 
  with ProgressBar(
  description,
@@ -99,12 +99,12 @@ def _eval(
 
  def _train(
  self, data: AnyDataLoader, optimizer: Optimizer, description: str
- ) -> ScalarSummary:
+ ) -> ScalarSequence:
  """Train the model on given labeled data"""
 
  self._embed_model.train()
 
- _summary = ScalarSummary('Train Loss')
+ _summary = ScalarSequence('Train Loss')
  with ProgressBar(
  description,
  message_on_done=_summary.__str__,
@@ -142,7 +142,7 @@ def fit(
  optimizer_kwargs: Optional[Dict] = None,
  device: str = 'cpu',
  **kwargs,
- ) -> SummaryCollection:
+ ) -> Summary:
  """Finetune the model on the training data.
 
  :param train_data: Data on which to train the model
@@ -179,8 +179,8 @@ def fit(
  # Get optimizer
  _optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)
 
- m_train_loss = ScalarSummary('train')
- m_eval_loss = ScalarSummary('eval')
+ m_train_loss = ScalarSequence('train')
+ m_eval_loss = ScalarSequence('eval')
 
  for epoch in range(epochs):
  _data = self._get_data_loader(
@@ -201,7 +201,7 @@ def fit(
  le = self._eval(_data, train_loss=m_train_loss)
  m_eval_loss += le
 
- return SummaryCollection(m_train_loss, m_eval_loss)
+ return Summary(m_train_loss, m_eval_loss)
 
  def save(self, *args, **kwargs):
  """Save the embedding model.

diff --git a/finetuner/tuner/summary.py b/finetuner/tuner/summary.py
@@ -8,43 +8,45 @@
 ] #: The type of numerics including numpy data type
 
 
-class ScalarSummary:
- def __init__(self, name: str = '', data: Optional[List[NumericType]] = None):
+class ScalarSequence:
+ def __init__(self, name: str):
  """Create a record for storing a list of scalar values e.g. losses/metrics
 
  :param name: the name of that record
- :param data: the data record to initialize from
  """
 
- self._name = name or ''
- self._record = data or []
+ self.name = name
+ self._record = []
 
- def __iadd__(self, other: Union[List[NumericType], float, 'ScalarSummary']):
+ def __iadd__(self, other: Union[List[NumericType], float, 'ScalarSequence']):
  if isinstance(other, list):
  self._record += other
- elif isinstance(other, ScalarSummary):
+ elif isinstance(other, ScalarSequence):
  self._record += other._record
+ elif isinstance(other, np.ndarray) and np.squeeze(other).ndim == 1:
+ self._record += [v for v in np.squeeze(other)]
  else:
  self._record.append(other)
  return self
 
  def __str__(self):
  if self._record:
- return (
- f'{self._name}: {np.mean([float(loss) for loss in self._record]):.2f}'
- )
+ return f'{self.name}: {np.mean([float(loss) for loss in self._record]):.2f}'
  else:
- return f'{self._name} has no record'
+ return f'{self.name} has no record'
 
  def floats(self) -> List[NumericType]:
  """Return all numbers as a list of Python native float """
  return [float(v) for v in self._record]
 
+ def __bool__(self):
+ return bool(self._record)
+
 
-class SummaryCollection:
- def __init__(self, *records: ScalarSummary):
+class Summary:
+ def __init__(self, *records: ScalarSequence):
  """Create a collection of summaries. """
- self._records = records
+ self._records = [r for r in records if r]
 
  def save(self, filepath: str):
  """Store all summary into a JSON file"""
@@ -56,4 +58,57 @@ def save(self, filepath: str):
 
  def dict(self) -> Dict[str, List[NumericType]]:
  """Return all summaries as a Dictionary, where key is the name and value is the record"""
- return {r._name: r.floats() for r in self._records}
+ return {r.name: r.floats() for r in self._records}
+
+ def plot(
+ self,
+ output: Optional[str] = None,
+ max_plot_points: Optional[int] = None,
+ **kwargs,
+ ):
+ """Plot all records in the summary into one plot.
+
+ .. note::
+ This function requires ``matplotlib`` to be installed.
+
+ :param output: Optional path to store the visualization. If not given, show in UI
+ :param max_plot_points: the maximum number of points to plot. When the actual number of plots is larger than
+ given number, then a linspace sampling is conducted first to get the actual number of points for plotting.
+ :param kwargs: extra kwargs pass to matplotlib.plot
+ """
+ import matplotlib.pyplot as plt
+
+ fig, axes = plt.subplots(
+ 1,
+ len(self._records),
+ figsize=(6 * len(self._records), 6),
+ constrained_layout=True,
+ )
+ if not isinstance(axes, np.ndarray):
+ # when only one record, axes is not a list, so wrap it
+ axes = [axes]
+
+ plt_kwargs = dict(alpha=0.8, linewidth=1)
+ plt_kwargs.update(kwargs)
+
+ for idx, record in enumerate(self._records):
+ axes[idx].plot(
+ *self._sample_points(record.floats(), max_len=max_plot_points),
+ **plt_kwargs,
+ )
+ axes[idx].set_ylabel(record.name)
+ axes[idx].set_box_aspect(1)
+ axes[idx].set_xlabel('Steps')
+
+ if output:
+ plt.savefig(output, bbox_inches='tight', pad_inches=0.1)
+ else:
+ plt.show()
+
+ @staticmethod
+ def _sample_points(arr, max_len: int):
+ if not max_len or max_len > len(arr):
+ return list(range(0, len(arr))), arr
+ else:
+ idx = np.round(np.linspace(0, len(arr) - 1, max_len)).astype(int)
+ return idx, [arr[j] for j in idx]