Global model supporting functions (#9901)

* Dataset utilities added. * Global model definition * Dataset modules added. * Dataset modules fix. * global features model training added * global features fix * Test dataset update * PR fixes * repo sync * repo sync * Syncing 2 * Syncing 2 * Added global model supporting modules * code style fixes * Minor style fixes
tensorflow · Apr 16, 2021 · 79946a7 · 79946a7
1 parent 81a19f6
commit 79946a7
Show file tree

Hide file tree

Showing 4 changed files with 434 additions and 1 deletion.
diff --git a/research/delf/delf/python/pooling_layers/pooling.py b/research/delf/delf/python/pooling_layers/pooling.py
@@ -177,7 +177,7 @@ def gem(x, axis=None, power=3., eps=1e-6):
     axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
     power: Float, power > 0 is an inverse exponent parameter (GeM power).
     eps: Float, parameter for numerical stability.
-    
+
   Returns:
     output: [B, D] A float32 Tensor.
   """

diff --git a/research/delf/delf/python/training/global_features_utils.py b/research/delf/delf/python/training/global_features_utils.py
@@ -0,0 +1,236 @@
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for the global model training."""
+
+import os
+
+from absl import logging
+
+import numpy as np
+from tensorboard import program
+import tensorflow as tf
+
+from delf.python.datasets.revisited_op import dataset
+
+
+class AverageMeter():
+  """Computes and stores the average and current value of loss."""
+
+  def __init__(self):
+    """Initialization of the AverageMeter."""
+    self.reset()
+
+  def reset(self):
+    """Resets all the values."""
+    self.val = 0
+    self.avg = 0
+    self.sum = 0
+    self.count = 0
+
+  def update(self, val, n=1):
+    """Updates values in the AverageMeter.
+
+    Args:
+      val: Float, loss value.
+      n: Integer, number of instances.
+    """
+    self.val = val
+    self.sum += val * n
+    self.count += n
+    self.avg = self.sum / self.count
+
+
+def compute_metrics_and_print(dataset_name, sorted_index_ids, ground_truth,
+                              desired_pr_ranks=None, log=True):
+  """Computes and logs ground-truth metrics for Revisited datasets.
+
+  Args:
+    dataset_name: String, name of the dataset.
+    sorted_index_ids: Integer NumPy array of shape [#queries, #index_images].
+      For each query, contains an array denoting the most relevant index images,
+      sorted from most to least relevant.
+    ground_truth: List containing ground-truth information for dataset. Each
+      entry is a dict corresponding to the ground-truth information for a query.
+      The dict has keys 'ok' and 'junk', mapping to a NumPy array of integers.
+    desired_pr_ranks: List of integers containing the desired precision/recall
+      ranks to be reported. E.g., if precision@1/recall@1 and
+      precision@10/recall@10 are desired, this should be set to [1, 10]. The
+      largest item should be <= #sorted_index_ids. Default: [1, 5, 10].
+
+  Returns:
+    mAP: (metricsE, metricsM, metricsH) Tuple of the metrics for different
+      levels of complexity. Each metrics is a list containing:
+      mean_average_precision (float), mean_precisions (NumPy array of
+      floats, with shape [len(desired_pr_ranks)]), mean_recalls (NumPy array
+      of floats, with shape [len(desired_pr_ranks)]), average_precisions
+      (NumPy array of floats, with shape [#queries]), precisions (NumPy array of
+      floats, with shape [#queries, len(desired_pr_ranks)]), recalls (NumPy
+      array of floats, with shape [#queries, len(desired_pr_ranks)]).
+
+  Raises:
+    ValueError: If an unknown dataset name is provided as an argument.
+  """
+  _DATASETS = ['roxford5k', 'rparis6k']
+  if dataset not in _DATASETS:
+    raise ValueError('Unknown dataset: {}!'.format(dataset))
+
+  if desired_pr_ranks is None:
+    desired_pr_ranks = [1, 5, 10]
+
+  (easy_ground_truth, medium_ground_truth,
+   hard_ground_truth) = dataset.ParseEasyMediumHardGroundTruth(ground_truth)
+
+  metrics_easy = dataset.ComputeMetrics(sorted_index_ids, easy_ground_truth,
+                                        desired_pr_ranks)
+  metrics_medium = dataset.ComputeMetrics(sorted_index_ids,
+                                          medium_ground_truth,
+                                          desired_pr_ranks)
+  metrics_hard = dataset.ComputeMetrics(sorted_index_ids, hard_ground_truth,
+                                        desired_pr_ranks)
+
+  debug_and_log(
+    '>> {}: mAP E: {}, M: {}, H: {}'.format(
+      dataset_name, np.around(metrics_easy[0] * 100, decimals=2),
+      np.around(metrics_medium[0] * 100, decimals=2),
+      np.around(metrics_hard[0] * 100, decimals=2)), log=log)
+
+  debug_and_log(
+    '>> {}: mP@k{} E: {}, M: {}, H: {}'.format(
+      dataset_name, desired_pr_ranks,
+      np.around(metrics_easy[1] * 100, decimals=2),
+      np.around(metrics_medium[1] * 100, decimals=2),
+      np.around(metrics_hard[1] * 100, decimals=2)), log=log)
+
+  return metrics_easy, metrics_medium, metrics_hard
+
+
+def htime(time_difference):
+  """Time formatting function.
+
+  Depending on the value of `time_difference` outputs time in an appropriate
+  time format.
+
+  Args:
+    time_difference: Float, time difference between the two events.
+
+  Returns:
+    time: String representing time in an appropriate time format.
+  """
+  time_difference = round(time_difference)
+
+  days = time_difference // 86400
+  hours = time_difference // 3600 % 24
+  minutes = time_difference // 60 % 60
+  seconds = time_difference % 60
+
+  if days > 0:
+    return '{:d}d {:d}h {:d}m {:d}s'.format(days, hours, minutes, seconds)
+  if hours > 0:
+    return '{:d}h {:d}m {:d}s'.format(hours, minutes, seconds)
+  if minutes > 0:
+    return '{:d}m {:d}s'.format(minutes, seconds)
+  return '{:d}s'.format(seconds)
+
+
+def debug_and_log(msg, debug=True, log=True, debug_on_the_same_line=False):
+  """Outputs `msg` to both stdout (if in the debug mode) and the log file.
+
+  Args:
+    msg: String, message to be logged.
+    debug: Bool, if True, will print `msg` to stdout.
+    log: Bool, if True, will redirect `msg` to the logfile.
+    debug_on_the_same_line: Bool, if True, will print `msg` to stdout without
+      a new line. When using this mode, logging to a logfile is disabled.
+  """
+  if debug_on_the_same_line:
+    print(msg, end='')
+    return
+  if debug:
+    print(msg)
+  if log:
+    logging.info(msg)
+
+
+def launch_tensorboard(log_dir):
+  """Runs tensorboard with the given `log_dir`.
+  
+  Args:
+    log_dir: String, directory to start tensorboard in.
+  """
+  tb = program.TensorBoard()
+  tb.configure(argv=[None, '--logdir', log_dir])
+  url = tb.launch()
+  debug_and_log("Launching Tensorboard: {}".format(url))
+
+
+def get_standard_keras_models():
+  """Gets the standard keras model names.
+
+  Returns:
+    model_names: List, names of the standard keras models.
+  """
+  model_names = sorted(name for name in tf.keras.applications.__dict__
+                       if not name.startswith("__")
+                       and callable(tf.keras.applications.__dict__[name]))
+  return model_names
+
+
+def create_model_directory(training_dataset, arch, pool, whitening,
+                           pretrained, loss, loss_margin, optimizer, lr,
+                           weight_decay, neg_num, query_size, pool_size,
+                           batch_size, update_every, image_size, directory):
+  """Based on the model parameters, creates the model directory.
+
+  If the model directory does not exist, the directory is created.
+
+  Args:
+    training_dataset: String, training dataset name.
+    arch: String, model architecture.
+    pool: String, pooling option.
+    whitening: Bool, whether the model is trained with global whitening.
+    pretrained: Bool, whether the model is initialized with the precomputed
+      weights.
+    loss: String, training loss type.
+    loss_margin: Float, loss margin.
+    optimizer: Sting, used optimizer.
+    lr: Float, initial learning rate.
+    weight_decay: Float, weight decay.
+    neg_num: Integer, Number of negative images per train/val tuple.
+    query_size: Integer, number of queries per one training epoch.
+    pool_size: Integer, size of the pool for hard negative mining.
+    batch_size: Integer, batch size.
+    update_every: Integer, frequency of the model weights update.
+    image_size: Integer, maximum size of longer image side used for training.
+    directory: String, destination where trained network should be saved.
+
+  Returns:
+    folder: String, path to the model folder.
+  """
+  folder = '{}_{}_{}'.format(training_dataset, arch, pool)
+  if whitening:
+    folder += '_whiten'
+  if not pretrained:
+    folder += '_notpretrained'
+  folder += ('_{}_m{:.2f}_{}_lr{:.1e}_wd{:.1e}_nnum{}_qsize{}_psize{}_bsize{}'
+             '_uevery{}_imsize{}').format(
+            loss, loss_margin, optimizer, lr, weight_decay, neg_num,
+            query_size, pool_size, batch_size, update_every, image_size)
+
+  folder = os.path.join(directory, folder)
+  debug_and_log(
+    '>> Creating directory if does not exist:\n>> \'{}\''.format(folder))
+  if not os.path.exists(folder):
+    os.makedirs(folder)
+  return folder
diff --git a/research/delf/delf/python/whiten.py b/research/delf/delf/python/whiten.py
@@ -0,0 +1,121 @@
+# Copyright 2021 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Whitening learning functions."""
+
+import os
+
+import numpy as np
+
+
+def apply_whitening(descriptors, mean_descriptor_vector, projection,
+                    output_dim=None):
+  """Applies the whitening to the descriptors as a post-processing step.
+
+  Args:
+    descriptors: [N, D] NumPy array of L2-normalized descriptors to be
+      post-processed.
+    mean_descriptor_vector: Mean descriptor vector.
+    projection: Whitening projection matrix.
+    output_dim: Integer, parameter for the dimensionality reduction. If
+      `output_dim` is None, the dimensionality reduction is not performed.
+
+  Returns:
+    descriptors_whitened: [N, output_dim] NumPy array of L2-normalized
+      descriptors `descriptors` after whitening application.
+  """
+  eps = 1e-6
+  if output_dim is None:
+    output_dim = projection.shape[0]
+
+  descriptors = np.dot(projection[:output_dim, :],
+                       descriptors - mean_descriptor_vector)
+  descriptors_whitened = descriptors / (
+            np.linalg.norm(descriptors, ord=2, axis=0, keepdims=True) + eps)
+  return descriptors_whitened
+
+
+def learn_whitening(descriptors, qidxs, pidxs):
+  """Learning the post-processing of fine-tuned descriptor vectors.
+
+  This method of whitening learning leverages the provided labeled data and
+  uses linear discriminant projections. The projection is decomposed into two
+  parts: whitening and rotation. The whitening part is the inverse of the
+  square-root of the intraclass (matching pairs) covariance matrix. The
+  rotation part is the PCA of the interclass (non-matching pairs) covariance
+  matrix in the whitened space. The described approach acts as a
+  post-processing step, equivalently, once the fine-tuning of the CNN is
+  finished. For more information about the method refer to the section 3.4
+  of https://arxiv.org/pdf/1711.02512.pdf.
+
+  Args:
+    descriptors: [N, D] NumPy array of L2-normalized descriptors.
+    qidxs: List of query indexes.
+    pidxs: List of positive pairs indexes.
+
+  Returns:
+    mean_descriptor_vector: [N, 1] NumPy array, mean descriptor vector.
+    projection: [N, N] NumPy array, whitening projection matrix.
+  """
+  # Calculating the mean descriptor vector, which is used to perform centering.
+  mean_descriptor_vector = descriptors[:, qidxs].mean(axis=1, keepdims=True)
+  # Interclass (matching pairs) difference.
+  interclass_difference = descriptors[:, qidxs] - descriptors[:, pidxs]
+  covariance_matrix = (np.dot(interclass_difference, interclass_difference.T) /
+                      interclass_difference.shape[1])
+
+  # Whitening part.
+  projection = np.linalg.inv(cholesky(covariance_matrix))
+
+  projected_X = np.dot(projection, descriptors - mean_descriptor_vector)
+  non_matching_covariance_matrix = np.dot(projected_X, projected_X.T)
+  eigval, eigvec = np.linalg.eig(non_matching_covariance_matrix)
+  order = eigval.argsort()[::-1]
+  eigvec = eigvec[:, order]
+
+  # Rotational part.
+  projection = np.dot(eigvec.T, projection)
+  return mean_descriptor_vector, projection
+
+
+def cholesky(matrix):
+  """Cholesky decomposition.
+
+  Cholesky decomposition suitable for non-positive definite matrices: involves
+  adding a small value `alpha` on the matrix diagonal until the matrix
+  becomes positive definite.
+
+  Args:
+    matrix: [K, K] Square matrix to be decomposed.
+
+  Returns:
+    decomposition: [K, K] Upper-triangular Cholesky factor of `matrix`,
+      a matrix with real and positive diagonal entries.
+  """
+  alpha = 0
+  while True:
+    try:
+      # If the input parameter matrix is not positive-definite,
+      # the decomposition fails and we iteratively add a small value `alpha` on
+      # the matrix diagonal.
+      decomposition = np.linalg.cholesky(matrix + alpha * np.eye(*matrix.shape))
+      return decomposition
+    except np.linalg.LinAlgError:
+      if alpha == 0:
+        alpha = 1e-10
+      else:
+        alpha *= 10
+      print(
+        ">>>> {}::cholesky: Matrix is not positive definite, adding {:.0e} "
+        "on the diagonal".format(os.path.basename(__file__), alpha))