Skip to content
This repository has been archived by the owner on Jun 25, 2024. It is now read-only.

Commit

Permalink
fix merge
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrey Rykov committed Mar 7, 2017
2 parents d520f37 + 60f9720 commit a560b91
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 15 deletions.
91 changes: 91 additions & 0 deletions PASCAL_VOC/get_data_from_XML.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import numpy as np
import os
from xml.etree import ElementTree

class XML_preprocessor(object):

def __init__(self, data_path):
self.path_prefix = data_path
self.num_classes = 20
self.data = dict()
self._preprocess_XML()

def _preprocess_XML(self):
filenames = os.listdir(self.path_prefix)
for filename in filenames:
tree = ElementTree.parse(self.path_prefix + filename)
root = tree.getroot()
bounding_boxes = []
one_hot_classes = []
size_tree = root.find('size')
width = float(size_tree.find('width').text)
height = float(size_tree.find('height').text)
for object_tree in root.findall('object'):
for bounding_box in object_tree.iter('bndbox'):
xmin = float(bounding_box.find('xmin').text)/width
ymin = float(bounding_box.find('ymin').text)/height
xmax = float(bounding_box.find('xmax').text)/width
ymax = float(bounding_box.find('ymax').text)/height
bounding_box = [xmin,ymin,xmax,ymax]
bounding_boxes.append(bounding_box)
class_name = object_tree.find('name').text
one_hot_class = self._to_one_hot(class_name)
one_hot_classes.append(one_hot_class)
image_name = root.find('filename').text
bounding_boxes = np.asarray(bounding_boxes)
one_hot_classes = np.asarray(one_hot_classes)
image_data = np.hstack((bounding_boxes, one_hot_classes))
self.data[image_name] = image_data

def _to_one_hot(self,name):
one_hot_vector = [0] * self.num_classes
if name == 'aeroplane':
one_hot_vector[0] = 1
elif name == 'bicycle':
one_hot_vector[1] = 1
elif name == 'bird':
one_hot_vector[2] = 1
elif name == 'boat':
one_hot_vector[3] = 1
elif name == 'bottle':
one_hot_vector[4] = 1
elif name == 'bus':
one_hot_vector[5] = 1
elif name == 'car':
one_hot_vector[6] = 1
elif name == 'cat':
one_hot_vector[7] = 1
elif name == 'chair':
one_hot_vector[8] = 1
elif name == 'cow':
one_hot_vector[9] = 1
elif name == 'diningtable':
one_hot_vector[10] = 1
elif name == 'dog':
one_hot_vector[11] = 1
elif name == 'horse':
one_hot_vector[12] = 1
elif name == 'motorbike':
one_hot_vector[13] = 1
elif name == 'person':
one_hot_vector[14] = 1
elif name == 'pottedplant':
one_hot_vector[15] = 1
elif name == 'sheep':
one_hot_vector[16] = 1
elif name == 'sofa':
one_hot_vector[17] = 1
elif name == 'train':
one_hot_vector[18] = 1
elif name == 'tvmonitor':
one_hot_vector[19] = 1
else:
print('unknown label: %s' %name)

return one_hot_vector

## example on how to use it
# import pickle
# data = XML_preprocessor('VOC2007/Annotations/').data
# pickle.dump(data,open('VOC2007.p','wb'))

3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](LICENSE)
# A port of [SSD: Single Shot MultiBox Detector](https://github.com/weiliu89/caffe/tree/ssd) to [Keras](https://keras.io) framework.
For more details, please refer to [arXiv paper](http://arxiv.org/abs/1512.02325).
For forward pass for 300x300 model, please, follow `SSD.ipynb` for examples. For training procedure for 300x300 model, please, follow `SSD_training.ipynb` for examples. Moreover, in `testing_utils` folder there is a useful script to test `SSD` on video or on camera input.

Weights are ported from the original models and are available [here](https://mega.nz/#F!7RowVLCL!q3cEVRK9jyOSB9el3SssIA). You need `weights_SSD300.hdf5`, `weights_300x300_old.hdf5` is for the old version of architecture with 3x3 convolution for `pool6`.

This code was tested with `Keras` v1.2.2, `Tensorflow` v1.0.0, `OpenCV` v3.1.0-dev
12 changes: 6 additions & 6 deletions SSD_training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
"\n",
" def lighting(self, img):\n",
" cov = np.cov(img.reshape(-1, 3) / 255.0, rowvar=False)\n",
" eigval, eigvec = np.linalg.eig(cov)\n",
" eigval, eigvec = np.linalg.eigh(cov)\n",
" noise = np.random.randn(3) * self.lighting_std\n",
" noise = eigvec.dot(eigval * noise) * 255\n",
" img += noise\n",
Expand Down Expand Up @@ -516,21 +516,21 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"display_name": "Python 3",
"language": "python",
"name": "conda-root-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
Expand Down
18 changes: 9 additions & 9 deletions ssd_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _l1_smooth_loss(self, y_true, y_pred):
"""
abs_loss = tf.abs(y_true - y_pred)
sq_loss = 0.5 * (y_true - y_pred)**2
l1_loss = tf.select(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
return tf.reduce_sum(l1_loss, -1)

def _softmax_loss(self, y_true, y_pred):
Expand All @@ -64,7 +64,7 @@ def _softmax_loss(self, y_true, y_pred):
"""
y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15)
softmax_loss = -tf.reduce_sum(y_true * tf.log(y_pred),
reduction_indices=-1)
axis=-1)
return softmax_loss

def compute_loss(self, y_true, y_pred):
Expand Down Expand Up @@ -93,26 +93,26 @@ def compute_loss(self, y_true, y_pred):
y_pred[:, :, :4])

# get positives loss
num_pos = tf.reduce_sum(y_true[:, :, -8], reduction_indices=-1)
num_pos = tf.reduce_sum(y_true[:, :, -8], axis=-1)
pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8],
reduction_indices=1)
axis=1)
pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8],
reduction_indices=1)
axis=1)

# get negatives loss, we penalize only confidence here
num_neg = tf.minimum(self.neg_pos_ratio * num_pos,
num_boxes - num_pos)
pos_num_neg_mask = tf.greater(num_neg, 0)
has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask))
num_neg = tf.concat(0, [num_neg,
num_neg = tf.concat(axis=0, values=[num_neg,
[(1 - has_min) * self.negatives_for_hard]])
num_neg_batch = tf.reduce_min(tf.boolean_mask(num_neg,
tf.greater(num_neg, 0)))
num_neg_batch = tf.to_int32(num_neg_batch)
confs_start = 4 + self.background_label_id + 1
confs_end = confs_start + self.num_classes - 1
max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end],
reduction_indices=2)
axis=2)
_, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]),
k=num_neg_batch)
batch_idx = tf.expand_dims(tf.range(0, batch_size), 1)
Expand All @@ -126,12 +126,12 @@ def compute_loss(self, y_true, y_pred):
full_indices)
neg_conf_loss = tf.reshape(neg_conf_loss,
[batch_size, num_neg_batch])
neg_conf_loss = tf.reduce_sum(neg_conf_loss, reduction_indices=1)
neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1)

# loss is sum of positives and negatives
total_loss = pos_conf_loss + neg_conf_loss
total_loss /= (num_pos + tf.to_float(num_neg_batch))
num_pos = tf.select(tf.not_equal(num_pos, 0), num_pos,
num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos,
tf.ones_like(num_pos))
total_loss += (self.alpha * pos_loc_loss) / num_pos
return total_loss

0 comments on commit a560b91

Please sign in to comment.