Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
feat(tuner): add plot function for tuner.summary (#167)
Browse files Browse the repository at this point in the history
* feat(tuner): add plot function for tuner.summary

* feat(tuner): add plot function for tuner.summary
  • Loading branch information
hanxiao committed Oct 24, 2021
1 parent 1c5d00c commit 115a0aa
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 54 deletions.
8 changes: 4 additions & 4 deletions finetuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

if TYPE_CHECKING:
from .helper import AnyDNN, DocumentArrayLike
from .tuner.summary import SummaryCollection
from .tuner.summary import Summary


# fit interface generated from Tuner
Expand All @@ -26,7 +26,7 @@ def fit(
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
) -> 'SummaryCollection':
) -> 'Summary':
...


Expand All @@ -49,7 +49,7 @@ def fit(
output_dim: Optional[int] = None,
freeze: bool = False,
device: str = 'cpu',
) -> 'SummaryCollection':
) -> 'Summary':
...


Expand Down Expand Up @@ -97,7 +97,7 @@ def fit(

def fit(
model: 'AnyDNN', train_data: 'DocumentArrayLike', *args, **kwargs
) -> Optional['SummaryCollection']:
) -> Optional['Summary']:
if kwargs.get('to_embedding_model', False):
from .tailor import to_embedding_model

Expand Down
4 changes: 2 additions & 2 deletions finetuner/tuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

if TYPE_CHECKING:
from .base import BaseTuner
from .summary import SummaryCollection
from .summary import Summary


def _get_tuner_class(dnn_model: AnyDNN) -> Type['BaseTuner']:
Expand Down Expand Up @@ -36,7 +36,7 @@ def fit(
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs,
) -> 'SummaryCollection':
) -> 'Summary':
"""Finetune the model on the training data.
:param embed_model: an embedding model
Expand Down
4 changes: 2 additions & 2 deletions finetuner/tuner/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)

from ..helper import AnyDNN, AnyDataLoader, AnyOptimizer, DocumentArrayLike
from .summary import SummaryCollection
from .summary import Summary


class BaseLoss:
Expand Down Expand Up @@ -101,7 +101,7 @@ def fit(
batch_size: int = 256,
*args,
**kwargs,
) -> SummaryCollection:
) -> Summary:
"""Fit the :py:attr:`.embed_model` on labeled data.
Note that fitting changes the weights in :py:attr:`.embed_model` in-place. This allows one to consecutively
Expand Down
20 changes: 10 additions & 10 deletions finetuner/tuner/keras/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import losses, datasets
from ..base import BaseTuner, BaseLoss
from ..dataset.helper import get_dataset
from ..summary import ScalarSummary, SummaryCollection
from ..summary import ScalarSequence, Summary
from ...helper import DocumentArrayLike, AnyDataLoader


Expand Down Expand Up @@ -64,10 +64,10 @@ def _get_optimizer(

def _train(
self, data: AnyDataLoader, optimizer: Optimizer, description: str
) -> ScalarSummary:
) -> ScalarSequence:
"""Train the model on given labeled data"""

_summary = ScalarSummary('Train Loss')
_summary = ScalarSequence('Train Loss')
with ProgressBar(
description,
message_on_done=_summary.__str__,
Expand Down Expand Up @@ -96,11 +96,11 @@ def _eval(
self,
data: AnyDataLoader,
description: str = 'Evaluating',
train_loss: Optional[ScalarSummary] = None,
) -> ScalarSummary:
train_loss: Optional[ScalarSequence] = None,
) -> ScalarSequence:
"""Evaluate the model on given labeled data"""

_summary = ScalarSummary('Eval Loss')
_summary = ScalarSequence('Eval Loss')

with ProgressBar(
description,
Expand Down Expand Up @@ -130,7 +130,7 @@ def fit(
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs,
) -> SummaryCollection:
) -> Summary:
"""Finetune the model on the training data.
:param train_data: Data on which to train the model
Expand Down Expand Up @@ -171,8 +171,8 @@ def fit(

_optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)

m_train_loss = ScalarSummary('train')
m_eval_loss = ScalarSummary('eval')
m_train_loss = ScalarSequence('train')
m_eval_loss = ScalarSequence('eval')

with get_device(device):
for epoch in range(epochs):
Expand All @@ -187,7 +187,7 @@ def fit(
le = self._eval(_eval_data, train_loss=m_train_loss)
m_eval_loss += le

return SummaryCollection(m_train_loss, m_eval_loss)
return Summary(m_train_loss, m_eval_loss)

def save(self, *args, **kwargs):
"""Save the embedding model.
Expand Down
20 changes: 10 additions & 10 deletions finetuner/tuner/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import losses, datasets
from ..base import BaseTuner, BaseLoss
from ..dataset.helper import get_dataset
from ..summary import ScalarSummary, SummaryCollection
from ..summary import ScalarSequence, Summary
from ...helper import DocumentArrayLike, AnyDataLoader


Expand Down Expand Up @@ -63,13 +63,13 @@ def _eval(
self,
data: AnyDataLoader,
description: str = 'Evaluating',
train_loss: Optional[ScalarSummary] = None,
) -> ScalarSummary:
train_loss: Optional[ScalarSequence] = None,
) -> ScalarSequence:
"""Evaluate the model on given labeled data"""

self._embed_model.eval()

_summary = ScalarSummary('Eval Loss')
_summary = ScalarSequence('Eval Loss')

with ProgressBar(
description,
Expand All @@ -90,12 +90,12 @@ def _eval(

def _train(
self, data: AnyDataLoader, optimizer: Optimizer, description: str
) -> ScalarSummary:
) -> ScalarSequence:
"""Train the model on given labeled data"""

self._embed_model.train()

_summary = ScalarSummary('Train Loss')
_summary = ScalarSequence('Train Loss')
with ProgressBar(
description,
message_on_done=_summary.__str__,
Expand Down Expand Up @@ -130,7 +130,7 @@ def fit(
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs,
) -> SummaryCollection:
) -> Summary:
"""Finetune the model on the training data.
:param train_data: Data on which to train the model
Expand Down Expand Up @@ -164,8 +164,8 @@ def fit(

_optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)

m_train_loss = ScalarSummary('train')
m_eval_loss = ScalarSummary('eval')
m_train_loss = ScalarSequence('train')
m_eval_loss = ScalarSequence('eval')

for epoch in range(epochs):
_data = self._get_data_loader(
Expand All @@ -186,7 +186,7 @@ def fit(
le = self._eval(_data, train_loss=m_train_loss)
m_eval_loss += le

return SummaryCollection(m_train_loss, m_eval_loss)
return Summary(m_train_loss, m_eval_loss)

def save(self, *args, **kwargs):
"""Save the embedding model.
Expand Down
20 changes: 10 additions & 10 deletions finetuner/tuner/pytorch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import losses, datasets
from ..base import BaseTuner, BaseLoss
from ..dataset.helper import get_dataset
from ..summary import ScalarSummary, SummaryCollection
from ..summary import ScalarSequence, Summary
from ...helper import DocumentArrayLike, AnyDataLoader


Expand Down Expand Up @@ -67,13 +67,13 @@ def _eval(
self,
data: AnyDataLoader,
description: str = 'Evaluating',
train_loss: Optional[ScalarSummary] = None,
) -> ScalarSummary:
train_loss: Optional[ScalarSequence] = None,
) -> ScalarSequence:
"""Evaluate the model on given labeled data"""

self._embed_model.eval()

_summary = ScalarSummary('Eval Loss')
_summary = ScalarSequence('Eval Loss')

with ProgressBar(
description,
Expand All @@ -99,12 +99,12 @@ def _eval(

def _train(
self, data: AnyDataLoader, optimizer: Optimizer, description: str
) -> ScalarSummary:
) -> ScalarSequence:
"""Train the model on given labeled data"""

self._embed_model.train()

_summary = ScalarSummary('Train Loss')
_summary = ScalarSequence('Train Loss')
with ProgressBar(
description,
message_on_done=_summary.__str__,
Expand Down Expand Up @@ -142,7 +142,7 @@ def fit(
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs,
) -> SummaryCollection:
) -> Summary:
"""Finetune the model on the training data.
:param train_data: Data on which to train the model
Expand Down Expand Up @@ -179,8 +179,8 @@ def fit(
# Get optimizer
_optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)

m_train_loss = ScalarSummary('train')
m_eval_loss = ScalarSummary('eval')
m_train_loss = ScalarSequence('train')
m_eval_loss = ScalarSequence('eval')

for epoch in range(epochs):
_data = self._get_data_loader(
Expand All @@ -201,7 +201,7 @@ def fit(
le = self._eval(_data, train_loss=m_train_loss)
m_eval_loss += le

return SummaryCollection(m_train_loss, m_eval_loss)
return Summary(m_train_loss, m_eval_loss)

def save(self, *args, **kwargs):
"""Save the embedding model.
Expand Down
85 changes: 70 additions & 15 deletions finetuner/tuner/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,45 @@
] #: The type of numerics including numpy data type


class ScalarSummary:
def __init__(self, name: str = '', data: Optional[List[NumericType]] = None):
class ScalarSequence:
def __init__(self, name: str):
"""Create a record for storing a list of scalar values e.g. losses/metrics
:param name: the name of that record
:param data: the data record to initialize from
"""

self._name = name or ''
self._record = data or []
self.name = name
self._record = []

def __iadd__(self, other: Union[List[NumericType], float, 'ScalarSummary']):
def __iadd__(self, other: Union[List[NumericType], float, 'ScalarSequence']):
if isinstance(other, list):
self._record += other
elif isinstance(other, ScalarSummary):
elif isinstance(other, ScalarSequence):
self._record += other._record
elif isinstance(other, np.ndarray) and np.squeeze(other).ndim == 1:
self._record += [v for v in np.squeeze(other)]
else:
self._record.append(other)
return self

def __str__(self):
if self._record:
return (
f'{self._name}: {np.mean([float(loss) for loss in self._record]):.2f}'
)
return f'{self.name}: {np.mean([float(loss) for loss in self._record]):.2f}'
else:
return f'{self._name} has no record'
return f'{self.name} has no record'

def floats(self) -> List[NumericType]:
"""Return all numbers as a list of Python native float """
return [float(v) for v in self._record]

def __bool__(self):
return bool(self._record)


class SummaryCollection:
def __init__(self, *records: ScalarSummary):
class Summary:
def __init__(self, *records: ScalarSequence):
"""Create a collection of summaries. """
self._records = records
self._records = [r for r in records if r]

def save(self, filepath: str):
"""Store all summary into a JSON file"""
Expand All @@ -56,4 +58,57 @@ def save(self, filepath: str):

def dict(self) -> Dict[str, List[NumericType]]:
"""Return all summaries as a Dictionary, where key is the name and value is the record"""
return {r._name: r.floats() for r in self._records}
return {r.name: r.floats() for r in self._records}

def plot(
self,
output: Optional[str] = None,
max_plot_points: Optional[int] = None,
**kwargs,
):
"""Plot all records in the summary into one plot.
.. note::
This function requires ``matplotlib`` to be installed.
:param output: Optional path to store the visualization. If not given, show in UI
:param max_plot_points: the maximum number of points to plot. When the actual number of plots is larger than
given number, then a linspace sampling is conducted first to get the actual number of points for plotting.
:param kwargs: extra kwargs pass to matplotlib.plot
"""
import matplotlib.pyplot as plt

fig, axes = plt.subplots(
1,
len(self._records),
figsize=(6 * len(self._records), 6),
constrained_layout=True,
)
if not isinstance(axes, np.ndarray):
# when only one record, axes is not a list, so wrap it
axes = [axes]

plt_kwargs = dict(alpha=0.8, linewidth=1)
plt_kwargs.update(kwargs)

for idx, record in enumerate(self._records):
axes[idx].plot(
*self._sample_points(record.floats(), max_len=max_plot_points),
**plt_kwargs,
)
axes[idx].set_ylabel(record.name)
axes[idx].set_box_aspect(1)
axes[idx].set_xlabel('Steps')

if output:
plt.savefig(output, bbox_inches='tight', pad_inches=0.1)
else:
plt.show()

@staticmethod
def _sample_points(arr, max_len: int):
if not max_len or max_len > len(arr):
return list(range(0, len(arr))), arr
else:
idx = np.round(np.linspace(0, len(arr) - 1, max_len)).astype(int)
return idx, [arr[j] for j in idx]
Loading

0 comments on commit 115a0aa

Please sign in to comment.