Source code for zoo.tfpark.zoo_optimizer

#
# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from bigdl.optim.optimizer import OptimMethod
from zoo.util.tf import process_grad


[docs]class FakeOptimMethod(OptimMethod): def __init__(self): super(FakeOptimMethod, self).__init__(None, "float")
# cannot subclass tf.train.Optimizer without importing it import tensorflow as tf
[docs]def get_gradients_for_keras(optimizer, loss, params): from tensorflow.python.util import nest from tensorflow.python.keras import backend from tensorflow.python.ops import gradients from tensorflow.python.ops import clip_ops from tensorflow.python.keras.optimizers import TFOptimizer params = nest.flatten(params) if isinstance(optimizer, TFOptimizer): scope_name = optimizer.optimizer._name else: scope_name = optimizer._name with backend.get_graph().as_default(), backend.name_scope(scope_name + "/gradients"): grads = gradients.gradients(loss, params) all_reduced_grads = [] for grad, param in zip(grads, params): if grad is None: raise ValueError("Variable {} has `None` for gradient. " "Please make sure that all of your ops have a " "gradient defined (i.e. are differentiable). " "Common ops without gradient: " "K.argmax, K.round, K.eval.".format(param)) grad = process_grad(grad) with tf.control_dependencies([param]): grad_i = tf.identity(grad, name="zoo_identity_op_for_grad") all_reduced_grads.append(grad_i) grads = all_reduced_grads if hasattr(optimizer, "clipnorm"): grads = [clip_ops.clip_by_norm(g, optimizer.clipnorm) for g in grads] if hasattr(optimizer, "clipvalue"): grads = [ clip_ops.clip_by_value(g, -optimizer.clipvalue, optimizer.clipvalue) for g in grads ] return grads
[docs]class ZooOptimizer(tf.train.Optimizer): """An optimizer that wraps another tf.Optimizer, using an allreduce to combine gradient values before applying gradients to model weights.""" def __init__(self, optimizer, name=None): if name is None: name = "Zoo{}".format(type(optimizer).__name__) super(ZooOptimizer, self).__init__(name=name, use_locking=False) self._optimizer = optimizer
[docs] def compute_gradients(self, *args, **kwargs): """Compute gradients of all trainable variables. See Optimizer.compute_gradients() for more info. In DistributedOptimizer, compute_gradients() is overriden to also allreduce the gradients before returning them. """ gradients = self._optimizer.compute_gradients(*args, **kwargs) results = [] for grad_var in gradients: grad = grad_var[0] var = grad_var[1] grad = process_grad(grad) if grad is not None: with tf.control_dependencies([var]): grad_i = tf.identity(grad, name="zoo_identity_op_for_grad") results.append((grad_i, var)) else: results.append((grad, var)) return results
[docs] def apply_gradients(self, *args, **kwargs): """Calls this same method on the underlying optimizer.""" return self._optimizer.apply_gradients(*args, **kwargs)
[docs] def get_slot(self, *args, **kwargs): """Calls this same method on the underlying optimizer.""" return self._optimizer.get_slot(*args, **kwargs)
[docs] def get_slot_names(self, *args, **kwargs): """Calls this same method on the underlying optimizer.""" return self._optimizer.get_slot_names(*args, **kwargs)
[docs] def variables(self, *args, **kwargs): """Calls this same method on the underlying optimizer.""" return self._optimizer.variables(*args, **kwargs)
def _resource_apply_sparse(self, *args, **kwargs): self._optimizer._resource_apply_sparse(*args, **kwargs) def _resource_apply_dense(self, *args, **kwargs): self._optimizer._resource_apply_sparse(*args, **kwargs) def _apply_sparse(self, *args, **kwargs): self._optimizer._apply_sparse(*args, **kwargs) def _apply_dense(self, *args, **kwargs): self._optimizer._apply_dense(*args, **kwargs)