fix merge

rykov8 · Mar 7, 2017 · a560b91 · a560b91
2 parents d520f37 + 60f9720
commit a560b91
Show file tree

Hide file tree

Showing 4 changed files with 109 additions and 15 deletions.
diff --git a/PASCAL_VOC/get_data_from_XML.py b/PASCAL_VOC/get_data_from_XML.py
@@ -0,0 +1,91 @@
+import numpy as np
+import os
+from xml.etree import ElementTree
+
+class XML_preprocessor(object):
+
+    def __init__(self, data_path):
+        self.path_prefix = data_path
+        self.num_classes = 20
+        self.data = dict()
+        self._preprocess_XML()
+
+    def _preprocess_XML(self):
+        filenames = os.listdir(self.path_prefix)
+        for filename in filenames:
+            tree = ElementTree.parse(self.path_prefix + filename)
+            root = tree.getroot()
+            bounding_boxes = []
+            one_hot_classes = []
+            size_tree = root.find('size')
+            width = float(size_tree.find('width').text)
+            height = float(size_tree.find('height').text)
+            for object_tree in root.findall('object'):
+                for bounding_box in object_tree.iter('bndbox'):
+                    xmin = float(bounding_box.find('xmin').text)/width
+                    ymin = float(bounding_box.find('ymin').text)/height
+                    xmax = float(bounding_box.find('xmax').text)/width
+                    ymax = float(bounding_box.find('ymax').text)/height
+                bounding_box = [xmin,ymin,xmax,ymax]
+                bounding_boxes.append(bounding_box)
+                class_name = object_tree.find('name').text
+                one_hot_class = self._to_one_hot(class_name)
+                one_hot_classes.append(one_hot_class)
+            image_name = root.find('filename').text
+            bounding_boxes = np.asarray(bounding_boxes)
+            one_hot_classes = np.asarray(one_hot_classes)
+            image_data = np.hstack((bounding_boxes, one_hot_classes))
+            self.data[image_name] = image_data
+
+    def _to_one_hot(self,name):
+        one_hot_vector = [0] * self.num_classes
+        if name == 'aeroplane':
+            one_hot_vector[0] = 1
+        elif name == 'bicycle':
+            one_hot_vector[1] = 1
+        elif name == 'bird':
+            one_hot_vector[2] = 1
+        elif name == 'boat':
+            one_hot_vector[3] = 1
+        elif name == 'bottle':
+            one_hot_vector[4] = 1
+        elif name == 'bus':
+            one_hot_vector[5] = 1
+        elif name == 'car':
+            one_hot_vector[6] = 1
+        elif name == 'cat':
+            one_hot_vector[7] = 1
+        elif name == 'chair':
+            one_hot_vector[8] = 1
+        elif name == 'cow':
+            one_hot_vector[9] = 1
+        elif name == 'diningtable':
+            one_hot_vector[10] = 1
+        elif name == 'dog':
+            one_hot_vector[11] = 1
+        elif name == 'horse':
+            one_hot_vector[12] = 1
+        elif name == 'motorbike':
+            one_hot_vector[13] = 1
+        elif name == 'person':
+            one_hot_vector[14] = 1
+        elif name == 'pottedplant':
+            one_hot_vector[15] = 1
+        elif name == 'sheep':
+            one_hot_vector[16] = 1
+        elif name == 'sofa':
+            one_hot_vector[17] = 1
+        elif name == 'train':
+            one_hot_vector[18] = 1
+        elif name == 'tvmonitor':
+            one_hot_vector[19] = 1
+        else:
+            print('unknown label: %s' %name)
+
+        return one_hot_vector
+
+## example on how to use it
+# import pickle
+# data = XML_preprocessor('VOC2007/Annotations/').data
+# pickle.dump(data,open('VOC2007.p','wb'))
+
diff --git a/README.md b/README.md
@@ -1,5 +1,8 @@
+[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](LICENSE)
 # A port of [SSD: Single Shot MultiBox Detector](https://github.com/weiliu89/caffe/tree/ssd) to [Keras](https://keras.io) framework.
 For more details, please refer to [arXiv paper](http://arxiv.org/abs/1512.02325).
 For forward pass for 300x300 model, please, follow `SSD.ipynb` for examples. For training procedure for 300x300 model, please, follow `SSD_training.ipynb` for examples. Moreover, in `testing_utils` folder there is a useful script to test `SSD` on video or on camera input.
 
 Weights are ported from the original models and are available [here](https://mega.nz/#F!7RowVLCL!q3cEVRK9jyOSB9el3SssIA). You need `weights_SSD300.hdf5`, `weights_300x300_old.hdf5` is for the old version of architecture with 3x3 convolution for `pool6`.
+
+This code was tested with `Keras` v1.2.2, `Tensorflow` v1.0.0, `OpenCV` v3.1.0-dev
diff --git a/SSD_training.ipynb b/SSD_training.ipynb
@@ -158,7 +158,7 @@
     "\n",
     "    def lighting(self, img):\n",
     "        cov = np.cov(img.reshape(-1, 3) / 255.0, rowvar=False)\n",
-    "        eigval, eigvec = np.linalg.eig(cov)\n",
+    "        eigval, eigvec = np.linalg.eigh(cov)\n",
     "        noise = np.random.randn(3) * self.lighting_std\n",
     "        noise = eigvec.dot(eigval * noise) * 255\n",
     "        img += noise\n",
@@ -516,21 +516,21 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python [conda root]",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "conda-root-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.13"
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,

diff --git a/ssd_training.py b/ssd_training.py
@@ -47,7 +47,7 @@ def _l1_smooth_loss(self, y_true, y_pred):
         """
         abs_loss = tf.abs(y_true - y_pred)
         sq_loss = 0.5 * (y_true - y_pred)**2
-        l1_loss = tf.select(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
+        l1_loss = tf.where(tf.less(abs_loss, 1.0), sq_loss, abs_loss - 0.5)
         return tf.reduce_sum(l1_loss, -1)
 
     def _softmax_loss(self, y_true, y_pred):
@@ -64,7 +64,7 @@ def _softmax_loss(self, y_true, y_pred):
         """
         y_pred = tf.maximum(tf.minimum(y_pred, 1 - 1e-15), 1e-15)
         softmax_loss = -tf.reduce_sum(y_true * tf.log(y_pred),
-                                      reduction_indices=-1)
+                                      axis=-1)
         return softmax_loss
 
     def compute_loss(self, y_true, y_pred):
@@ -93,26 +93,26 @@ def compute_loss(self, y_true, y_pred):
                                         y_pred[:, :, :4])
 
         # get positives loss
-        num_pos = tf.reduce_sum(y_true[:, :, -8], reduction_indices=-1)
+        num_pos = tf.reduce_sum(y_true[:, :, -8], axis=-1)
         pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8],
-                                     reduction_indices=1)
+                                     axis=1)
         pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8],
-                                      reduction_indices=1)
+                                      axis=1)
 
         # get negatives loss, we penalize only confidence here
         num_neg = tf.minimum(self.neg_pos_ratio * num_pos,
                              num_boxes - num_pos)
         pos_num_neg_mask = tf.greater(num_neg, 0)
         has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask))
-        num_neg = tf.concat(0, [num_neg,
+        num_neg = tf.concat(axis=0, values=[num_neg,
                                 [(1 - has_min) * self.negatives_for_hard]])
         num_neg_batch = tf.reduce_min(tf.boolean_mask(num_neg,
                                                       tf.greater(num_neg, 0)))
         num_neg_batch = tf.to_int32(num_neg_batch)
         confs_start = 4 + self.background_label_id + 1
         confs_end = confs_start + self.num_classes - 1
         max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end],
-                                  reduction_indices=2)
+                                  axis=2)
         _, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]),
                                  k=num_neg_batch)
         batch_idx = tf.expand_dims(tf.range(0, batch_size), 1)
@@ -126,12 +126,12 @@ def compute_loss(self, y_true, y_pred):
                                   full_indices)
         neg_conf_loss = tf.reshape(neg_conf_loss,
                                    [batch_size, num_neg_batch])
-        neg_conf_loss = tf.reduce_sum(neg_conf_loss, reduction_indices=1)
+        neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1)
 
         # loss is sum of positives and negatives
         total_loss = pos_conf_loss + neg_conf_loss
         total_loss /= (num_pos + tf.to_float(num_neg_batch))
-        num_pos = tf.select(tf.not_equal(num_pos, 0), num_pos,
+        num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos,
                             tf.ones_like(num_pos))
         total_loss += (self.alpha * pos_loc_loss) / num_pos
         return total_loss