"""
The FastGradientMethod attack.
"""
import warnings
import numpy as np
import tensorflow as tf
from cleverhans.attacks.attack import Attack
from cleverhans.compat import reduce_max, reduce_sum, softmax_cross_entropy_with_logits
from cleverhans import utils_tf
[docs]class FastGradientMethod(Attack):
"""
This attack was originally implemented by Goodfellow et al. (2014) with the
infinity norm (and is known as the "Fast Gradient Sign Method"). This
implementation extends the attack to other norms, and is therefore called
the Fast Gradient Method.
Paper link: https://arxiv.org/abs/1412.6572
:param model: cleverhans.model.Model
:param sess: optional tf.Session
:param dtypestr: dtype of the data
:param kwargs: passed through to super constructor
"""
def __init__(self, model, sess=None, dtypestr='float32', **kwargs):
"""
Create a FastGradientMethod instance.
Note: the model parameter should be an instance of the
cleverhans.model.Model abstraction provided by CleverHans.
"""
super(FastGradientMethod, self).__init__(model, sess, dtypestr, **kwargs)
self.feedable_kwargs = ('eps', 'y', 'y_target', 'clip_min', 'clip_max')
self.structural_kwargs = ['ord', 'sanity_checks', 'clip_grad', 'loss_fn']
[docs] def generate(self, x, **kwargs):
"""
Returns the graph for Fast Gradient Method adversarial examples.
:param x: The model's symbolic inputs.
:param kwargs: See `parse_params`
"""
# Parse and save attack-specific parameters
assert self.parse_params(**kwargs)
labels, _nb_classes = self.get_or_guess_labels(x, kwargs)
return fgm(
x,
self.model.get_logits(x),
y=labels,
eps=self.eps,
ord=self.ord,
loss_fn=self.loss_fn,
clip_min=self.clip_min,
clip_max=self.clip_max,
clip_grad=self.clip_grad,
targeted=(self.y_target is not None),
sanity_checks=self.sanity_checks)
[docs] def parse_params(self,
eps=0.3,
ord=np.inf,
loss_fn=softmax_cross_entropy_with_logits,
y=None,
y_target=None,
clip_min=None,
clip_max=None,
clip_grad=False,
sanity_checks=True,
**kwargs):
"""
Take in a dictionary of parameters and applies attack-specific checks
before saving them as attributes.
Attack-specific parameters:
:param eps: (optional float) attack step size (input variation)
:param ord: (optional) Order of the norm (mimics NumPy).
Possible values: np.inf, 1 or 2.
:param loss_fn: Loss function that takes (labels, logits) as arguments and returns loss
:param y: (optional) A tensor with the true labels. Only provide
this parameter if you'd like to use true labels when crafting
adversarial samples. Otherwise, model predictions are used as
labels to avoid the "label leaking" effect (explained in this
paper: https://arxiv.org/abs/1611.01236). Default is None.
Labels should be one-hot-encoded.
:param y_target: (optional) A tensor with the labels to target. Leave
y_target=None if y is also set. Labels should be
one-hot-encoded.
:param clip_min: (optional float) Minimum input component value
:param clip_max: (optional float) Maximum input component value
:param clip_grad: (optional bool) Ignore gradient components
at positions where the input is already at the boundary
of the domain, and the update step will get clipped out.
:param sanity_checks: bool, if True, include asserts
(Turn them off to use less runtime / memory or for unit tests that
intentionally pass strange input)
"""
# Save attack-specific parameters
self.eps = eps
self.ord = ord
self.loss_fn = loss_fn
self.y = y
self.y_target = y_target
self.clip_min = clip_min
self.clip_max = clip_max
self.clip_grad = clip_grad
self.sanity_checks = sanity_checks
if self.y is not None and self.y_target is not None:
raise ValueError("Must not set both y and y_target")
# Check if order of the norm is acceptable given current implementation
if self.ord not in [np.inf, int(1), int(2)]:
raise ValueError("Norm order must be either np.inf, 1, or 2.")
if self.clip_grad and (self.clip_min is None or self.clip_max is None):
raise ValueError("Must set clip_min and clip_max if clip_grad is set")
if len(kwargs.keys()) > 0:
warnings.warn("kwargs is unused and will be removed on or after "
"2019-04-26.")
return True
[docs]def fgm(x,
logits,
y=None,
eps=0.3,
ord=np.inf,
loss_fn=softmax_cross_entropy_with_logits,
clip_min=None,
clip_max=None,
clip_grad=False,
targeted=False,
sanity_checks=True):
"""
TensorFlow implementation of the Fast Gradient Method.
:param x: the input placeholder
:param logits: output of model.get_logits
:param y: (optional) A placeholder for the true labels. If targeted
is true, then provide the target label. Otherwise, only provide
this parameter if you'd like to use true labels when crafting
adversarial samples. Otherwise, model predictions are used as
labels to avoid the "label leaking" effect (explained in this
paper: https://arxiv.org/abs/1611.01236). Default is None.
Labels should be one-hot-encoded.
:param eps: the epsilon (input variation parameter)
:param ord: (optional) Order of the norm (mimics NumPy).
Possible values: np.inf, 1 or 2.
:param loss_fn: Loss function that takes (labels, logits) as arguments and returns loss
:param clip_min: Minimum float value for adversarial example components
:param clip_max: Maximum float value for adversarial example components
:param clip_grad: (optional bool) Ignore gradient components
at positions where the input is already at the boundary
of the domain, and the update step will get clipped out.
:param targeted: Is the attack targeted or untargeted? Untargeted, the
default, will try to make the label incorrect. Targeted
will instead try to move in the direction of being more
like y.
:return: a tensor for the adversarial example
"""
asserts = []
# If a data range was specified, check that the input was in that range
if clip_min is not None:
asserts.append(utils_tf.assert_greater_equal(
x, tf.cast(clip_min, x.dtype)))
if clip_max is not None:
asserts.append(utils_tf.assert_less_equal(x, tf.cast(clip_max, x.dtype)))
# Make sure the caller has not passed probs by accident
assert logits.op.type != 'Softmax'
if y is None:
# Using model predictions as ground truth to avoid label leaking
preds_max = reduce_max(logits, 1, keepdims=True)
y = tf.to_float(tf.equal(logits, preds_max))
y = tf.stop_gradient(y)
y = y / reduce_sum(y, 1, keepdims=True)
# Compute loss
loss = loss_fn(labels=y, logits=logits)
if targeted:
loss = -loss
# Define gradient of loss wrt input
grad, = tf.gradients(loss, x)
if clip_grad:
grad = utils_tf.zero_out_clipped_grads(grad, x, clip_min, clip_max)
optimal_perturbation = optimize_linear(grad, eps, ord)
# Add perturbation to original example to obtain adversarial example
adv_x = x + optimal_perturbation
# If clipping is needed, reset all values outside of [clip_min, clip_max]
if (clip_min is not None) or (clip_max is not None):
# We don't currently support one-sided clipping
assert clip_min is not None and clip_max is not None
adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)
if sanity_checks:
with tf.control_dependencies(asserts):
adv_x = tf.identity(adv_x)
return adv_x
[docs]def optimize_linear(grad, eps, ord=np.inf):
"""
Solves for the optimal input to a linear function under a norm constraint.
Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad)
:param grad: tf tensor containing a batch of gradients
:param eps: float scalar specifying size of constraint region
:param ord: int specifying order of norm
:returns:
tf tensor containing optimal perturbation
"""
# In Python 2, the `list` call in the following line is redundant / harmless.
# In Python 3, the `list` call is needed to convert the iterator returned by `range` into a list.
red_ind = list(range(1, len(grad.get_shape())))
avoid_zero_div = 1e-12
if ord == np.inf:
# Take sign of gradient
optimal_perturbation = tf.sign(grad)
# The following line should not change the numerical results.
# It applies only because `optimal_perturbation` is the output of
# a `sign` op, which has zero derivative anyway.
# It should not be applied for the other norms, where the
# perturbation has a non-zero derivative.
optimal_perturbation = tf.stop_gradient(optimal_perturbation)
elif ord == 1:
abs_grad = tf.abs(grad)
sign = tf.sign(grad)
max_abs_grad = tf.reduce_max(abs_grad, red_ind, keepdims=True)
tied_for_max = tf.to_float(tf.equal(abs_grad, max_abs_grad))
num_ties = tf.reduce_sum(tied_for_max, red_ind, keepdims=True)
optimal_perturbation = sign * tied_for_max / num_ties
elif ord == 2:
square = tf.maximum(avoid_zero_div,
reduce_sum(tf.square(grad),
reduction_indices=red_ind,
keepdims=True))
optimal_perturbation = grad / tf.sqrt(square)
else:
raise NotImplementedError("Only L-inf, L1 and L2 norms are "
"currently implemented.")
# Scale perturbation to be the solution for the norm=eps rather than
# norm=1 problem
scaled_perturbation = utils_tf.mul(eps, optimal_perturbation)
return scaled_perturbation