unet_model_helpers.py

import tensorflow as tf

def output_layer(inputs, depth):
    """
    convert the unscaled inputs to label maps
    
    Args: 
        inputs (4d tensor [float]) : unscaled inputs
    
    Returns:
        output (4d tensor [int]) : label map
    """
    with tf.variable_scope('output_layer'):
        #if it is binary classification
        if depth == 1:
            #scale the unscaled inputs to 0 - 1
            probs = tf.nn.sigmoid(inputs)

        #convert the probability map to label map by simply thresholding
            output = tf.cast(probs >= 0.5, tf.uint8)
        else:
            #scale the unscaled inputs to 0 - 1
            probs = tf.nn.softmax(inputs)
            #convert the probability map to label map by selecting index of the highest probability
            #note: expand_dims function is used to change shape of "output" from [batch_size, height, width] to [batch_size, height, width, 1]
            output = tf.expand_dims(tf.argmax(probs, axis = -1), -1)
    return output

def calc_loss(logits, labels, depth):
    """
    flatten logits and labels to 2D tensors, where dimensions are [batch_size x height x width, depth]
    calculate the loss with the following formula:
    loss = [cross entropy loss] + [intersection over union loss]
    
    Args:
        logits (matrix [float]) : unscaled output generated by the network, dims: [batch_size, height, width, depth] 
        labels (matrix [float]) : groun-truth, dims: [batch_size, height, width, depth]
        depth (int) : depth of the label layer (1 for binary classification, num_of_classes for multi-label classification)
        
    Returns:
        loss (float) : loss 
    """
    with tf.variable_scope('seg_loss_layer'):
        
        #flatten logits and labels
        logits_flat = tf.reshape(logits, [-1, depth])
        labels_flat = tf.reshape(labels, [-1, depth]) 
        
        cross_entropy_loss = calc_cross_ent_loss(logits_flat, labels_flat, depth)
        iou_loss = calc_iou_loss(logits_flat, labels_flat, depth)
            
        loss =  iou_loss + cross_entropy_loss 
        
    return loss
        
def calc_iou_loss(logits_flat, labels_flat, depth):
    """    
    calculate intersection over union loss
    unscaled scores need to be converted to probability distribution using sigmoid or softmax depending on
    number of classes. If it a binary classification, use sigmoid. Use softmax if it is multi-class classification
    
    Args:
        logits (matrix [float]) : flattened version of the unscaled output generated by the network
        labels (matrix [float]) : flattened verison of the ground-truth
        depth (int) : depth of the label layer (1 for binary classification, num_of_classes for multi-label classification)
        
    Returns: 
        loss (float) : scalar loss
    """
    #convert unscaled output generated by the network to probs
    if depth == 1:
        probs_flat = tf.nn.sigmoid(logits_flat)
        
    #     #probs and labels for both foreground and background classes
    #     # probs_flat = tf.concat([probs_flat, tf.subtract(tf.constant(1.0), probs_flat)], axis = 1)
    #     # labels_flat = tf.concat([labels_flat, tf.subtract(tf.constant(1.0), labels_flat)], axis = 1)
    else:
        probs_flat = tf.nn.softmax(logits_flat)

    #calculate intersection over union loss
    with tf.variable_scope('iou_loss'):
        
        #calculate intersection of probs_flat and labels_flat (pixelwise multiplication)
        inter = tf.multiply(probs_flat, labels_flat)
        
        #calculate union of probs_flat and labels_flat
        union = tf.subtract(tf.add(probs_flat, labels_flat), inter)
        
        #sum each column of inter and union
        inter_sum = tf.reduce_sum(inter)
        union_sum = tf.reduce_sum(union)
        inter_sum += 1e-16
        union_sum += 1e-16
        loss = tf.multiply(tf.constant(-1.0), tf.log(tf.divide(inter_sum, union_sum)))
        
    return loss

def iou_single(preds,labels,depth):
    inter = tf.reduce_sum(tf.multiply(labels,preds))
    union = tf.subtract(tf.reduce_sum(tf.add(labels, preds)), inter)

    return tf.divide(inter, union + 1e-16)

def iou(preds, labels ,batch_size,depth):
    labels = tf.expand_dims(tf.argmax(labels, axis = -1), -1)
    iou,_ = tf.metrics.mean_iou(
    labels,
    preds,
    depth)
    return iou
    # return tf.reduce_mean([iou_single(logits[i,:,:,:],labels[i,:,:,:],depth) for i in range(batch_size)])

def calc_cross_ent_loss(logits_flat, labels_flat, depth):
    """        
    if it is a binary classification, calculate sigmoid cross entropy loss given the logits and the ground-truth
    otherwise, calculate softmax cross entropy loss given the logits and the ground-truth
    
    Args:
        logits (matrix [float])      : flattened version of the unscaled output generated by the network
        labels (matrix [float])      : flattened verison of the ground-truth
        depth (int) : depth of the label layer (1 for binary classification
        
    Returns:
        loss (float) : scalar loss
    """
    with tf.variable_scope('cross_ent_loss'):
        if depth == 1:
            loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = labels_flat, logits = logits_flat))
        else:
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = labels_flat, logits = logits_flat))
            
    return loss

def conv_block(inputs, filters, kernel_size, strides, training, scope_name,bn=True):
    """
    2d convolution block
    
    Args:
        inputs (4d tensor [float]) : input 4d tensor 
        filters (int)              : number of output filters
        kernel_size (int)          : size of the kernel for the convolution
        strides (int)              : strides for the convolution 
        training (bool)            : True = training, False = test
        scope_name (str)           : name of the block
        
    Returns:
        output (4d tensor [float]) : output
    """
    with tf.variable_scope(scope_name):    
        logits = conv2d(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, conv_name = 'conv2d')
        if bn:
            logits_bn = tf.layers.batch_normalization(logits, fused = True, axis = 1, training = training)
            output = tf.nn.relu(logits_bn)
            # output = tf.nn.elu(logits_bn)
        else:
            output = tf.nn.relu(logits)
            # output = tf.nn.elu(logits)

    return output

def conv2d(inputs, filters, kernel_size, strides, conv_name):
    """
    2d convolution
    
    Args:
        inputs (4d tensor [float]) : input 4d tensor
        filters (int)              : number of output filters
        kernel_size (int)          : soze of the kernel for the convolution
        strides (int)              : strides for the convolution
        conv_name (str)            : name of the convolution operation
    """
    logits = tf.layers.conv2d(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, 
                              data_format = 'channels_first', padding = 'same', name = conv_name)
    return logits
        
def upsample_concat(inputs1, inputs2, num_of_channels_reduce_factor, training, scope_name):
    """
    double height and width, reduce number of channels
    concatenate inputs1 and upsampled version of inputs2
    
    Args: 
        inputs1 (4d tensor [float])         : input that would be concatenated with inputs2
        inputs2 (4d tensor [float])         : input that would be upsampled and concatenated with inputs1
        training (1d tensor [bool])         : True = training, False = test
        num_of_channels_reduce_factor (int) : 2 = # of channels is halved
                                              4 = # of channels is divided by 4
        scope_name (str)                    : name of the upsampling layer   
    
    Returns:
        output (4d tensor [float])  : output
    """
    
    num_of_filters2 = inputs2.get_shape().as_list()[1]
    
    with tf.variable_scope(scope_name):
        # logits = tf.layers.conv2d_transpose(inputs = inputs2, filters = num_of_filters2 // num_of_channels_reduce_factor, 
        #                                     kernel_size = 2, strides = (2, 2), 
        #                                     data_format = 'channels_first', padding = 'same', name = 'deconv')   
        inputs2 = tf.transpose(inputs2, [0, 2, 3, 1])
        x_shape = inputs2.shape
        new_size = (x_shape[1]*2, x_shape[2]*2)
        upsampled = tf.image.resize_images(inputs2, new_size,method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        upsampled = tf.transpose(upsampled, [0, 3, 1, 2])

        # if bn:
        #     logits_bn = tf.layers.batch_normalization(logits, fused = True, axis = 1, training = training)
        #     inputs2_upsampled = tf.nn.relu(logits_bn)
        # else:
        #     inputs2_upsampled = tf.nn.relu(logits)
     
        #concat along the first dimension
        output = tf.concat([upsampled, inputs1], axis = 1)
        
    return output

def deconv(inputs,filters,training, scope_name,bn=True):
    with tf.variable_scope(scope_name):
        logits = tf.layers.conv2d_transpose(inputs = inputs, filters = filters, 
                                                kernel_size = 2, strides = (2, 2), 
                                                data_format = 'channels_first', padding = 'same', name = 'deconv')
        if bn:
            logits_bn = tf.layers.batch_normalization(logits, fused = True, axis = 1, training = training)
            upsampled = tf.nn.relu(logits_bn)
        else:
            upsampled = tf.nn.relu(logits)

        return upsampled
def conv_block_sequence(inputs, filters, num_of_conv_blocks, training, scope_name ,kernel_size=3,bn=True):
    """
    two consecutive convolutions in Unet model
    
    Args: 
        inputs (4d tensor [float]) : input 4d tensor
        filters (int)              : number of output filters for the first and second convolutions
        num_of_conv_blocks (int)   : number of convolutional blocks in a row
        training (bool)            : True = training, False = test
        scope_name (str)           : name of the sequence
        
    Returns:
        layer2_output (4d tensor [float]) : output
    """
    strides = (1, 1)
    
    outputs = inputs
    
    with tf.variable_scope(scope_name):
        #apply convolution blocks in a row
        for conv_block_no in range(1, num_of_conv_blocks + 1):
            outputs = conv_block(outputs, filters, kernel_size, strides, training, 'conv_' + str(conv_block_no),bn=bn)

    return outputs
    
def max_pool(inputs, scope_name):
    """
    Pooling operation that reduces width and height of the input layer to half
    
    Args: 
        inputs (4d tensor [float]) : input 4d tensor
        scope_name (str)           : name of the pooling layer
        
    Returns:
        output (4d tensor [float]) : output
    """
    
    with tf.variable_scope(scope_name):
        output = tf.layers.max_pooling2d(inputs = inputs, pool_size = (2, 2), strides = (2, 2), data_format='channels_first') 
    
    return output
    
    
def calc_loss_p(logits, labels, batch_size,depth):
    with tf.variable_scope('pansharpen_loss_layer'):
        probs = tf.nn.sigmoid(logits)
        return (1.-Q(labels, probs,batch_size,depth)) + (1.- psnr(labels, probs)/ 50.)

        # return tf.losses.mean_squared_error(labels,tf.nn.sigmoid(logits))

def __conv(input,filter):
    return tf.nn.conv2d(
    input,
    filter,
    [1,1,1,1],
    "SAME",
    use_cudnn_on_gpu=True,
    data_format='NHWC',
    dilations=[1, 1, 1, 1],
    name=None)

def q_fs(img1,img2):
    BLOCK_SIZE = 8
    patch_h =patch_w =256
    N = BLOCK_SIZE**2
    sum2_filter = tf.ones((BLOCK_SIZE,BLOCK_SIZE,1,1))
    img1_sq   = img1*img1;
    img2_sq   = img2*img2;
    img12 = img1*img2;

    img1_sum  =   __conv(tf.reshape(img1,[1,patch_h,patch_w,1]), sum2_filter)    
    img2_sum   =  __conv(tf.reshape(img2,[1,patch_h,patch_w,1]), sum2_filter)     
    img1_sq_sum = __conv(tf.reshape(img1_sq,[1,patch_h,patch_w,1]), sum2_filter)  
    img2_sq_sum = __conv(tf.reshape(img2_sq,[1,patch_h,patch_w,1]), sum2_filter)  
    img12_sum = __conv(tf.reshape(img12,[1,patch_h,patch_w,1]), sum2_filter) 

    img12_sum_mul = img1_sum*img2_sum
    img12_sq_sum_mul = img1_sum*img1_sum + img2_sum*img2_sum
    numerator = 4*(N*img12_sum - img12_sum_mul)*img12_sum_mul
    denominator1 = N*(img1_sq_sum + img2_sq_sum) - img12_sq_sum_mul
    denominator = denominator1*img12_sq_sum_mul

    dd = tf.where(tf.less(denominator, 1e-7), tf.ones_like(denominator), denominator)
    dd = numerator/dd
    # cln = tf.where(tf.is_nan(dd),tf.zeros_like(denominator), dd)
    cln = tf.where(tf.less(denominator, 1e-7), tf.zeros_like(dd), dd)

    return tf.reduce_mean(cln[:,4:-4,4:-4,:])


def Q(y_true,y_pred,batch_size,ms_channels):
    return tf.reduce_mean([tf.reduce_mean([q_fs( y_true[i,:,:,b],y_pred[i,:,:,b]) for b in range(ms_channels)]) for i in range(batch_size)])

def log10(x):
    numerator = tf.log(x)
    denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
    return numerator / denominator

def mse(y_true, y_pred):
    return tf.reduce_mean((y_true-y_pred)**2)

def psnr(y_true, y_pred,norm=1):
    normalization = 1.0
    
    msev = mse(y_true, y_pred)
    value = 10.0 * log10(normalization / msev)
    return value