Add files via upload

2025-12-24 12:37:59 +08:00 · 2020-05-21 18:56:45 +08:00
parent d2fa3d6cfb
commit 1ced062962
7 changed files with 1000 additions and 0 deletions
--- a/tfyolov3/convert_weights.py
+++ b/tfyolov3/convert_weights.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+import tensorflow as tf
+
+import yolo_v3
+import yolo_v3_tiny
+
+from utils import load_coco_names, load_weights
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string(
+    'class_names', 'coco.names', 'File with class names')
+tf.app.flags.DEFINE_string(
+    'weights_file', 'yolov3.weights', 'Binary file with detector weights')
+tf.app.flags.DEFINE_string(
+    'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC')
+tf.app.flags.DEFINE_bool(
+    'tiny', False, 'Use tiny version of YOLOv3')
+tf.app.flags.DEFINE_bool(
+    'spp', False, 'Use SPP version of YOLOv3')
+tf.app.flags.DEFINE_string(
+    'ckpt_file', './saved_model/model.ckpt', 'Chceckpoint file')
+
+
+def main(argv=None):
+    if FLAGS.tiny:
+        model = yolo_v3_tiny.yolo_v3_tiny
+    elif FLAGS.spp:
+        model = yolo_v3.yolo_v3_spp
+    else:
+        model = yolo_v3.yolo_v3
+
+    classes = load_coco_names(FLAGS.class_names)
+
+    # placeholder for detector inputs
+    # any size > 320 will work here
+    inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
+
+    with tf.variable_scope('detector'):
+        detections = model(inputs, len(classes),
+                           data_format=FLAGS.data_format)
+        load_ops = load_weights(tf.global_variables(
+            scope='detector'), FLAGS.weights_file)
+
+    saver = tf.train.Saver(tf.global_variables(scope='detector'))
+
+    with tf.Session() as sess:
+        sess.run(load_ops)
+
+        save_path = saver.save(sess, save_path=FLAGS.ckpt_file)
+        print('Model saved in path: {}'.format(save_path))
+
+
+if __name__ == '__main__':
+    tf.app.run()
--- a/tfyolov3/convert_weights_pb.py
+++ b/tfyolov3/convert_weights_pb.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import tensorflow as tf
+import yolo_v3
+import yolo_v3_tiny
+from PIL import Image, ImageDraw
+
+from utils import load_weights, load_coco_names, detections_boxes, freeze_graph
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string(
+    'class_names', 'yolov3coco.names', 'File with class names')
+tf.app.flags.DEFINE_string(
+    'weights_file', 'yolov3.weights', 'Binary file with detector weights')
+tf.app.flags.DEFINE_string(
+    'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
+tf.app.flags.DEFINE_string(
+    'output_graph', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model output path')
+
+tf.app.flags.DEFINE_bool(
+    'tiny', False, 'Use tiny version of YOLOv3')
+tf.app.flags.DEFINE_bool(
+    'spp', False, 'Use SPP version of YOLOv3')
+tf.app.flags.DEFINE_integer(
+    'size', 416, 'Image size')
+
+
+
+def main(argv=None):
+    if FLAGS.tiny:
+        model = yolo_v3_tiny.yolo_v3_tiny
+    elif FLAGS.spp:
+        model = yolo_v3.yolo_v3_spp
+    else:
+        model = yolo_v3.yolo_v3
+
+    classes = load_coco_names(FLAGS.class_names)
+
+    # placeholder for detector inputs
+    inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs")
+
+    with tf.variable_scope('detector'):
+        detections = model(inputs, len(classes), data_format=FLAGS.data_format)
+        load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file)
+
+    # Sets the output nodes in the current session
+    boxes = detections_boxes(detections)
+
+    with tf.Session() as sess:
+        sess.run(load_ops)
+        freeze_graph(sess, FLAGS.output_graph)
+
+if __name__ == '__main__':
+    tf.app.run()
--- a/tfyolov3/demo.py
+++ b/tfyolov3/demo.py
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+import time
+
+import yolo_v3
+import yolo_v3_tiny
+
+from utils import load_coco_names, draw_boxes, get_boxes_and_inputs, get_boxes_and_inputs_pb, non_max_suppression, \
+                  load_graph, letter_box_image
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string(
+    'input_img', '2in.jpg', 'Input image')
+tf.app.flags.DEFINE_string(
+    'output_img', '2out.jpg', 'Output image')
+tf.app.flags.DEFINE_string(
+    'class_names', 'yolov3coco.names', 'File with class names')
+tf.app.flags.DEFINE_string(
+    'weights_file', 'yolov3.weights', 'Binary file with detector weights')
+tf.app.flags.DEFINE_string(
+    'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
+tf.app.flags.DEFINE_string(
+    'ckpt_file', '', 'Checkpoint file')
+    #'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file')
+tf.app.flags.DEFINE_string(
+    'frozen_model', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model')
+tf.app.flags.DEFINE_bool(
+    'tiny', False, 'Use tiny version of YOLOv3')
+tf.app.flags.DEFINE_bool(
+    'spp', False, 'Use SPP version of YOLOv3')
+
+tf.app.flags.DEFINE_integer(
+    'size', 416, 'Image size')
+
+tf.app.flags.DEFINE_float(
+    'conf_threshold', 0.5, 'Confidence threshold')
+tf.app.flags.DEFINE_float(
+    'iou_threshold', 0.4, 'IoU threshold')
+
+tf.app.flags.DEFINE_float(
+    'gpu_memory_fraction', 1.0, 'Gpu memory fraction to use')
+
+def main(argv=None):
+
+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
+
+    config = tf.ConfigProto(
+        gpu_options=gpu_options,
+        log_device_placement=False,
+    )
+
+    img = Image.open(FLAGS.input_img)
+    img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
+    img_resized = img_resized.astype(np.float32)
+    classes = load_coco_names(FLAGS.class_names)
+
+    if FLAGS.frozen_model:
+
+        t0 = time.time()
+        frozenGraph = load_graph(FLAGS.frozen_model)
+        print("Loaded graph in {:.2f}s".format(time.time()-t0))
+
+        #print(frozenGraph.inputs)
+        #print(frozenGraph.outputs)
+
+        boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
+
+        with tf.Session(graph=frozenGraph, config=config) as sess:
+            t0 = time.time()
+            detected_boxes = sess.run(
+                boxes, feed_dict={inputs: [img_resized]})
+
+    else:
+        if FLAGS.tiny:
+            model = yolo_v3_tiny.yolo_v3_tiny
+        elif FLAGS.spp:
+            model = yolo_v3.yolo_v3_spp
+        else:
+            model = yolo_v3.yolo_v3
+
+        boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format)
+
+        saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
+
+        with tf.Session(config=config) as sess:
+            t0 = time.time()
+            saver.restore(sess, FLAGS.ckpt_file)
+            print('Model restored in {:.2f}s'.format(time.time()-t0))
+
+            t0 = time.time()
+            detected_boxes = sess.run(
+                boxes, feed_dict={inputs: [img_resized]})
+
+    filtered_boxes = non_max_suppression(detected_boxes,
+                                         confidence_threshold=FLAGS.conf_threshold,
+                                         iou_threshold=FLAGS.iou_threshold)
+    print("Predictions found in {:.2f}s".format(time.time() - t0))
+
+    draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True)
+
+    img.save(FLAGS.output_img)
+
+
+if __name__ == '__main__':
+    tf.app.run()
--- a/tfyolov3/pb_to_tensorboard.py
+++ b/tfyolov3/pb_to_tensorboard.py
@@ -0,0 +1,86 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ================================
+"""Imports a protobuf model as a graph in Tensorboard."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import sys
+
+from tensorflow.core.framework import graph_pb2
+from tensorflow.python.client import session
+from tensorflow.python.framework import importer
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import app
+from tensorflow.python.platform import gfile
+from tensorflow.python.summary import summary
+
+# Try importing TensorRT ops if available
+# TODO(aaroey): ideally we should import everything from contrib, but currently
+# tensorrt module would cause build errors when being imported in
+# tensorflow/contrib/__init__.py. Fix it.
+# pylint: disable=unused-import,g-import-not-at-top,wildcard-import
+try:
+  from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import *
+except ImportError:
+  pass
+# pylint: enable=unused-import,g-import-not-at-top,wildcard-import
+
+def import_to_tensorboard(model_dir, log_dir):
+  """View an imported protobuf model (`.pb` file) as a graph in Tensorboard.
+
+  Args:
+    model_dir: The location of the protobuf (`pb`) model to visualize
+    log_dir: The location for the Tensorboard log to begin visualization from.
+
+  Usage:
+    Call this function with your model location and desired log directory.
+    Launch Tensorboard by pointing it to the log directory.
+    View your imported `.pb` model as a graph.
+  """
+  with session.Session(graph=ops.Graph()) as sess:
+    with gfile.GFile(model_dir, "rb") as f:
+      graph_def = graph_pb2.GraphDef()
+      graph_def.ParseFromString(f.read())
+      importer.import_graph_def(graph_def)
+
+    pb_visual_writer = summary.FileWriter(log_dir)
+    pb_visual_writer.add_graph(sess.graph)
+    print("Model Imported. Visualize by running: "
+          "tensorboard --logdir={}".format(log_dir))
+
+
+def main(unused_args):
+  import_to_tensorboard(FLAGS.model_dir, FLAGS.log_dir)
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  parser.register("type", "bool", lambda v: v.lower() == "true")
+  parser.add_argument(
+      "--model_dir",
+      type=str,
+      default="",
+      required=True,
+      help="The location of the protobuf (\'pb\') model to visualize.")
+  parser.add_argument(
+      "--log_dir",
+      type=str,
+      default="",
+      required=True,
+      help="The location for the Tensorboard log to begin visualization from.")
+  FLAGS, unparsed = parser.parse_known_args()
+  app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/tfyolov3/utils.py
+++ b/tfyolov3/utils.py
@@ -0,0 +1,301 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import tensorflow as tf
+from PIL import ImageDraw, Image
+
+
+def get_boxes_and_inputs_pb(frozen_graph):
+
+    with frozen_graph.as_default():
+        boxes = tf.get_default_graph().get_tensor_by_name("output_boxes:0")
+        inputs = tf.get_default_graph().get_tensor_by_name("inputs:0")
+
+    return boxes, inputs
+
+
+def get_boxes_and_inputs(model, num_classes, size, data_format):
+
+    inputs = tf.placeholder(tf.float32, [1, size, size, 3])
+
+    with tf.variable_scope('detector'):
+        detections = model(inputs, num_classes,
+                           data_format=data_format)
+
+    boxes = detections_boxes(detections)
+
+    return boxes, inputs
+
+
+def load_graph(frozen_graph_filename):
+
+    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+
+    with tf.Graph().as_default() as graph:
+        tf.import_graph_def(graph_def, name="")
+
+    return graph
+
+
+def freeze_graph(sess, output_graph):
+
+    output_node_names = [
+        "output_boxes",
+        "inputs",
+    ]
+    output_node_names = ",".join(output_node_names)
+
+    output_graph_def = tf.graph_util.convert_variables_to_constants(
+        sess,
+        tf.get_default_graph().as_graph_def(),
+        output_node_names.split(",")
+    )
+
+    with tf.gfile.GFile(output_graph, "wb") as f:
+        f.write(output_graph_def.SerializeToString())
+
+    print("{} ops written to {}.".format(len(output_graph_def.node), output_graph))
+
+
+def load_weights(var_list, weights_file):
+    """
+    Loads and converts pre-trained weights.
+    :param var_list: list of network variables.
+    :param weights_file: name of the binary file.
+    :return: list of assign ops
+    """
+    with open(weights_file, "rb") as fp:
+        _ = np.fromfile(fp, dtype=np.int32, count=5)
+
+        weights = np.fromfile(fp, dtype=np.float32)
+
+    ptr = 0
+    i = 0
+    assign_ops = []
+    while i < len(var_list) - 1:
+        var1 = var_list[i]
+        var2 = var_list[i + 1]
+        # do something only if we process conv layer
+        if 'Conv' in var1.name.split('/')[-2]:
+            # check type of next layer
+            if 'BatchNorm' in var2.name.split('/')[-2]:
+                # load batch norm params
+                gamma, beta, mean, var = var_list[i + 1:i + 5]
+                batch_norm_vars = [beta, gamma, mean, var]
+                for var in batch_norm_vars:
+                    shape = var.shape.as_list()
+                    num_params = np.prod(shape)
+                    var_weights = weights[ptr:ptr + num_params].reshape(shape)
+                    ptr += num_params
+                    assign_ops.append(
+                        tf.assign(var, var_weights, validate_shape=True))
+
+                # we move the pointer by 4, because we loaded 4 variables
+                i += 4
+            elif 'Conv' in var2.name.split('/')[-2]:
+                # load biases
+                bias = var2
+                bias_shape = bias.shape.as_list()
+                bias_params = np.prod(bias_shape)
+                bias_weights = weights[ptr:ptr +
+                                       bias_params].reshape(bias_shape)
+                ptr += bias_params
+                assign_ops.append(
+                    tf.assign(bias, bias_weights, validate_shape=True))
+
+                # we loaded 1 variable
+                i += 1
+            # we can load weights of conv layer
+            shape = var1.shape.as_list()
+            num_params = np.prod(shape)
+
+            var_weights = weights[ptr:ptr + num_params].reshape(
+                (shape[3], shape[2], shape[0], shape[1]))
+            # remember to transpose to column-major
+            var_weights = np.transpose(var_weights, (2, 3, 1, 0))
+            ptr += num_params
+            assign_ops.append(
+                tf.assign(var1, var_weights, validate_shape=True))
+            i += 1
+
+    return assign_ops
+
+
+def detections_boxes(detections):
+    """
+    Converts center x, center y, width and height values to coordinates of top left and bottom right points.
+
+    :param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5))
+    :return: converted detections of same shape as input
+    """
+    center_x, center_y, width, height, attrs = tf.split(
+        detections, [1, 1, 1, 1, -1], axis=-1)
+    w2 = width / 2
+    h2 = height / 2
+    x0 = center_x - w2
+    y0 = center_y - h2
+    x1 = center_x + w2
+    y1 = center_y + h2
+
+    boxes = tf.concat([x0, y0, x1, y1], axis=-1)
+    detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes")
+    return detections
+
+
+def _iou(box1, box2):
+    """
+    Computes Intersection over Union value for 2 bounding boxes
+
+    :param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2]
+    :param box2: same as box1
+    :return: IoU
+    """
+    b1_x0, b1_y0, b1_x1, b1_y1 = box1
+    b2_x0, b2_y0, b2_x1, b2_y1 = box2
+
+    int_x0 = max(b1_x0, b2_x0)
+    int_y0 = max(b1_y0, b2_y0)
+    int_x1 = min(b1_x1, b2_x1)
+    int_y1 = min(b1_y1, b2_y1)
+
+    int_area = max(int_x1 - int_x0, 0) * max(int_y1 - int_y0, 0)
+
+    b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
+    b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)
+
+    # we add small epsilon of 1e-05 to avoid division by 0
+    iou = int_area / (b1_area + b2_area - int_area + 1e-05)
+    return iou
+
+
+def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4):
+    """
+    Applies Non-max suppression to prediction boxes.
+
+    :param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence
+    :param confidence_threshold: the threshold for deciding if prediction is valid
+    :param iou_threshold: the threshold for deciding if two boxes overlap
+    :return: dict: class -> [(box, score)]
+    """
+    conf_mask = np.expand_dims(
+        (predictions_with_boxes[:, :, 4] > confidence_threshold), -1)
+    predictions = predictions_with_boxes * conf_mask
+
+    result = {}
+    for i, image_pred in enumerate(predictions):
+        shape = image_pred.shape
+        # non_zero_idxs = np.nonzero(image_pred)
+        # image_pred = image_pred[non_zero_idxs]
+        temp = image_pred
+        sum_t = np.sum(temp, axis=1)
+        non_zero_idx = sum_t != 0
+        image_pred = image_pred[non_zero_idx, :]
+        image_pred = image_pred.reshape(-1, shape[-1])
+
+        bbox_attrs = image_pred[:, :5]
+        classes = image_pred[:, 5:]
+        classes = np.argmax(classes, axis=-1)
+
+        unique_classes = list(set(classes.reshape(-1)))
+
+        for cls in unique_classes:
+            cls_mask = classes == cls
+            cls_boxes = bbox_attrs[np.nonzero(cls_mask)]
+            cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]]
+            cls_scores = cls_boxes[:, -1]
+            cls_boxes = cls_boxes[:, :-1]
+
+            while len(cls_boxes) > 0:
+                box = cls_boxes[0]
+                score = cls_scores[0]
+                if cls not in result:
+                    result[cls] = []
+                result[cls].append((box, score))
+                cls_boxes = cls_boxes[1:]
+                cls_scores = cls_scores[1:]
+                ious = np.array([_iou(box, x) for x in cls_boxes])
+                iou_mask = ious < iou_threshold
+                cls_boxes = cls_boxes[np.nonzero(iou_mask)]
+                cls_scores = cls_scores[np.nonzero(iou_mask)]
+
+    return result
+
+
+def load_coco_names(file_name):
+    names = {}
+    with open(file_name) as f:
+        for id, name in enumerate(f):
+            names[id] = name
+    return names
+
+
+def draw_boxes(boxes, img, cls_names, detection_size, is_letter_box_image):
+    draw = ImageDraw.Draw(img)
+
+    for cls, bboxs in boxes.items():
+        color = tuple(np.random.randint(0, 256, 3))
+        for box, score in bboxs:
+            box = convert_to_original_size(box, np.array(detection_size),
+                                           np.array(img.size),
+                                           is_letter_box_image)
+            draw.rectangle(box, outline=color)
+            draw.text(box[:2], '{} {:.2f}%'.format(
+                cls_names[cls], score * 100), fill=color)
+
+
+def convert_to_original_size(box, size, original_size, is_letter_box_image):
+    if is_letter_box_image:
+        box = box.reshape(2, 2)
+        box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size)
+        box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size)
+    else:
+        ratio = original_size / size
+        box = box.reshape(2, 2) * ratio
+    return list(box.reshape(-1))
+
+
+def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray:
+    """
+    Fit image with final image with output_width and output_height.
+    :param image: PILLOW Image object.
+    :param output_height: width of the final image.
+    :param output_width: height of the final image.
+    :param fill_value: fill value for empty area. Can be uint8 or np.ndarray
+    :return: numpy image fit within letterbox. dtype=uint8, shape=(output_height, output_width)
+    """
+
+    height_ratio = float(output_height)/image.size[1]
+    width_ratio = float(output_width)/image.size[0]
+    fit_ratio = min(width_ratio, height_ratio)
+    fit_height = int(image.size[1] * fit_ratio)
+    fit_width = int(image.size[0] * fit_ratio)
+    fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR))
+
+    if isinstance(fill_value, int):
+        fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype)
+
+    to_return = np.tile(fill_value, (output_height, output_width, 1))
+    pad_top = int(0.5 * (output_height - fit_height))
+    pad_left = int(0.5 * (output_width - fit_width))
+    to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image
+    return to_return
+
+
+def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray:
+    """
+    Parameters should have same shape and dimension space. (Width, Height) or (Height, Width)
+    :param letter_pos: The current position within letterbox image including fill value area.
+    :param current_size: The size of whole image including fill value area.
+    :param ori_image_size: The size of image before being letter boxed.
+    :return:
+    """
+    letter_pos = np.asarray(letter_pos, dtype=np.float)
+    current_size = np.asarray(current_size, dtype=np.float)
+    ori_image_size = np.asarray(ori_image_size, dtype=np.float)
+    final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1])
+    pad = 0.5 * (current_size - final_ratio * ori_image_size)
+    pad = pad.astype(np.int32)
+    to_return_pos = (letter_pos - pad) / final_ratio
+    return to_return_pos
--- a/tfyolov3/yolo_v3.py
+++ b/tfyolov3/yolo_v3.py
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import tensorflow as tf
+
+slim = tf.contrib.slim
+
+_BATCH_NORM_DECAY = 0.9
+_BATCH_NORM_EPSILON = 1e-05
+_LEAKY_RELU = 0.1
+
+_ANCHORS = [(10, 13), (16, 30), (33, 23),
+            (30, 61), (62, 45), (59, 119),
+            (116, 90), (156, 198), (373, 326)]
+
+
+def darknet53(inputs):
+    """
+    Builds Darknet-53 model.
+    """
+    inputs = _conv2d_fixed_padding(inputs, 32, 3)
+    inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2)
+    inputs = _darknet53_block(inputs, 32)
+    inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2)
+
+    for i in range(2):
+        inputs = _darknet53_block(inputs, 64)
+
+    inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2)
+
+    for i in range(8):
+        inputs = _darknet53_block(inputs, 128)
+
+    route_1 = inputs
+    inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2)
+
+    for i in range(8):
+        inputs = _darknet53_block(inputs, 256)
+
+    route_2 = inputs
+    inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2)
+
+    for i in range(4):
+        inputs = _darknet53_block(inputs, 512)
+
+    return route_1, route_2, inputs
+
+
+def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1):
+    if strides > 1:
+        inputs = _fixed_padding(inputs, kernel_size)
+    inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
+                         padding=('SAME' if strides == 1 else 'VALID'))
+    return inputs
+
+
+def _darknet53_block(inputs, filters):
+    shortcut = inputs
+    inputs = _conv2d_fixed_padding(inputs, filters, 1)
+    inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
+
+    inputs = inputs + shortcut
+    return inputs
+
+
+def _spp_block(inputs, data_format='NCHW'):
+    return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'),
+                      slim.max_pool2d(inputs, 9, 1, 'SAME'),
+                      slim.max_pool2d(inputs, 5, 1, 'SAME'),
+                      inputs],
+                     axis=1 if data_format == 'NCHW' else 3)
+
+
+@tf.contrib.framework.add_arg_scope
+def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs):
+    """
+    Pads the input along the spatial dimensions independently of input size.
+
+    Args:
+      inputs: A tensor of size [batch, channels, height_in, width_in] or
+        [batch, height_in, width_in, channels] depending on data_format.
+      kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+                   Should be a positive integer.
+      data_format: The input format ('NHWC' or 'NCHW').
+      mode: The mode for tf.pad.
+
+    Returns:
+      A tensor with the same format as the input with the data either intact
+      (if kernel_size == 1) or padded (if kernel_size > 1).
+    """
+    pad_total = kernel_size - 1
+    pad_beg = pad_total // 2
+    pad_end = pad_total - pad_beg
+
+    if kwargs['data_format'] == 'NCHW':
+        padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
+                                        [pad_beg, pad_end],
+                                        [pad_beg, pad_end]],
+                               mode=mode)
+    else:
+        padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
+                                        [pad_beg, pad_end], [0, 0]], mode=mode)
+    return padded_inputs
+
+
+def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False):
+    inputs = _conv2d_fixed_padding(inputs, filters, 1)
+    inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
+    inputs = _conv2d_fixed_padding(inputs, filters, 1)
+
+    if with_spp:
+        inputs = _spp_block(inputs, data_format)
+        inputs = _conv2d_fixed_padding(inputs, filters, 1)
+
+    inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
+    inputs = _conv2d_fixed_padding(inputs, filters, 1)
+    route = inputs
+    inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
+    return route, inputs
+
+
+def _get_size(shape, data_format):
+    if len(shape) == 4:
+        shape = shape[1:]
+    return shape[1:3] if data_format == 'NCHW' else shape[0:2]
+
+
+def _detection_layer(inputs, num_classes, anchors, img_size, data_format):
+    num_anchors = len(anchors)
+    predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1,
+                              stride=1, normalizer_fn=None,
+                              activation_fn=None,
+                              biases_initializer=tf.zeros_initializer())
+
+    shape = predictions.get_shape().as_list()
+    grid_size = _get_size(shape, data_format)
+    dim = grid_size[0] * grid_size[1]
+    bbox_attrs = 5 + num_classes
+
+    if data_format == 'NCHW':
+        predictions = tf.reshape(
+            predictions, [-1, num_anchors * bbox_attrs, dim])
+        predictions = tf.transpose(predictions, [0, 2, 1])
+
+    predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs])
+
+    stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
+
+    anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
+
+    box_centers, box_sizes, confidence, classes = tf.split(
+        predictions, [2, 2, 1, num_classes], axis=-1)
+
+    box_centers = tf.nn.sigmoid(box_centers)
+    confidence = tf.nn.sigmoid(confidence)
+
+    grid_x = tf.range(grid_size[0], dtype=tf.float32)
+    grid_y = tf.range(grid_size[1], dtype=tf.float32)
+    a, b = tf.meshgrid(grid_x, grid_y)
+
+    x_offset = tf.reshape(a, (-1, 1))
+    y_offset = tf.reshape(b, (-1, 1))
+
+    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
+    x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
+
+    box_centers = box_centers + x_y_offset
+    box_centers = box_centers * stride
+
+    anchors = tf.tile(anchors, [dim, 1])
+    box_sizes = tf.exp(box_sizes) * anchors
+    box_sizes = box_sizes * stride
+
+    detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
+
+    classes = tf.nn.sigmoid(classes)
+    predictions = tf.concat([detections, classes], axis=-1)
+    return predictions
+
+
+def _upsample(inputs, out_shape, data_format='NCHW'):
+    # tf.image.resize_nearest_neighbor accepts input in format NHWC
+    if data_format == 'NCHW':
+        inputs = tf.transpose(inputs, [0, 2, 3, 1])
+
+    if data_format == 'NCHW':
+        new_height = out_shape[3]
+        new_width = out_shape[2]
+    else:
+        new_height = out_shape[2]
+        new_width = out_shape[1]
+
+    inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
+
+    # back to NCHW if needed
+    if data_format == 'NCHW':
+        inputs = tf.transpose(inputs, [0, 3, 1, 2])
+
+    inputs = tf.identity(inputs, name='upsampled')
+    return inputs
+
+
+def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False):
+    """
+    Creates YOLO v3 model.
+
+    :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
+        Dimension batch_size may be undefined. The channel order is RGB.
+    :param num_classes: number of predicted classes.
+    :param is_training: whether is training or not.
+    :param data_format: data format NCHW or NHWC.
+    :param reuse: whether or not the network and its variables should be reused.
+    :param with_spp: whether or not is using spp layer.
+    :return:
+    """
+    # it will be needed later on
+    img_size = inputs.get_shape().as_list()[1:3]
+
+    # transpose the inputs to NCHW
+    if data_format == 'NCHW':
+        inputs = tf.transpose(inputs, [0, 3, 1, 2])
+
+    # normalize values to range [0..1]
+    inputs = inputs / 255
+
+    # set batch norm params
+    batch_norm_params = {
+        'decay': _BATCH_NORM_DECAY,
+        'epsilon': _BATCH_NORM_EPSILON,
+        'scale': True,
+        'is_training': is_training,
+        'fused': None,  # Use fused batch norm if possible.
+    }
+
+    # Set activation_fn and parameters for conv2d, batch_norm.
+    with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse):
+        with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm,
+                            normalizer_params=batch_norm_params,
+                            biases_initializer=None,
+                            activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
+            with tf.variable_scope('darknet-53'):
+                route_1, route_2, inputs = darknet53(inputs)
+
+            with tf.variable_scope('yolo-v3'):
+                route, inputs = _yolo_block(inputs, 512, data_format, with_spp)
+
+                detect_1 = _detection_layer(
+                    inputs, num_classes, _ANCHORS[6:9], img_size, data_format)
+                detect_1 = tf.identity(detect_1, name='detect_1')
+
+                inputs = _conv2d_fixed_padding(route, 256, 1)
+                upsample_size = route_2.get_shape().as_list()
+                inputs = _upsample(inputs, upsample_size, data_format)
+                inputs = tf.concat([inputs, route_2],
+                                   axis=1 if data_format == 'NCHW' else 3)
+
+                route, inputs = _yolo_block(inputs, 256)
+
+                detect_2 = _detection_layer(
+                    inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
+                detect_2 = tf.identity(detect_2, name='detect_2')
+
+                inputs = _conv2d_fixed_padding(route, 128, 1)
+                upsample_size = route_1.get_shape().as_list()
+                inputs = _upsample(inputs, upsample_size, data_format)
+                inputs = tf.concat([inputs, route_1],
+                                   axis=1 if data_format == 'NCHW' else 3)
+
+                _, inputs = _yolo_block(inputs, 128)
+
+                detect_3 = _detection_layer(
+                    inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
+                detect_3 = tf.identity(detect_3, name='detect_3')
+
+                detections = tf.concat([detect_1, detect_2, detect_3], axis=1)
+                detections = tf.identity(detections, name='detections')
+                return detections
+
+
+def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
+    """
+    Creates YOLO v3 with SPP  model.
+
+    :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
+        Dimension batch_size may be undefined. The channel order is RGB.
+    :param num_classes: number of predicted classes.
+    :param is_training: whether is training or not.
+    :param data_format: data format NCHW or NHWC.
+    :param reuse: whether or not the network and its variables should be reused.
+    :return:
+    """
+    return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True)
--- a/tfyolov3/yolo_v3_tiny.py
+++ b/tfyolov3/yolo_v3_tiny.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import tensorflow as tf
+from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \
+    _detection_layer, _upsample
+
+slim = tf.contrib.slim
+
+_BATCH_NORM_DECAY = 0.9
+_BATCH_NORM_EPSILON = 1e-05
+_LEAKY_RELU = 0.1
+
+_ANCHORS = [(10, 14),  (23, 27),  (37, 58),
+            (81, 82),  (135, 169),  (344, 319)]
+
+
+def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
+    """
+    Creates YOLO v3 tiny model.
+
+    :param inputs: a 4-D tensor of size [batch_size, height, width, channels].
+        Dimension batch_size may be undefined. The channel order is RGB.
+    :param num_classes: number of predicted classes.
+    :param is_training: whether is training or not.
+    :param data_format: data format NCHW or NHWC.
+    :param reuse: whether or not the network and its variables should be reused.
+    :return:
+    """
+    # it will be needed later on
+    img_size = inputs.get_shape().as_list()[1:3]
+
+    # transpose the inputs to NCHW
+    if data_format == 'NCHW':
+        inputs = tf.transpose(inputs, [0, 3, 1, 2])
+
+    # normalize values to range [0..1]
+    inputs = inputs / 255
+
+    # set batch norm params
+    batch_norm_params = {
+        'decay': _BATCH_NORM_DECAY,
+        'epsilon': _BATCH_NORM_EPSILON,
+        'scale': True,
+        'is_training': is_training,
+        'fused': None,  # Use fused batch norm if possible.
+    }
+
+    # Set activation_fn and parameters for conv2d, batch_norm.
+    with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format):
+        with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse):
+            with slim.arg_scope([slim.conv2d],
+                                normalizer_fn=slim.batch_norm,
+                                normalizer_params=batch_norm_params,
+                                biases_initializer=None,
+                                activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
+
+                with tf.variable_scope('yolo-v3-tiny'):
+                    for i in range(6):
+                        inputs = _conv2d_fixed_padding(
+                            inputs, 16 * pow(2, i), 3)
+
+                        if i == 4:
+                            route_1 = inputs
+
+                        if i == 5:
+                            inputs = slim.max_pool2d(
+                                inputs, [2, 2], stride=1, padding="SAME", scope='pool2')
+                        else:
+                            inputs = slim.max_pool2d(
+                                inputs, [2, 2], scope='pool2')
+
+                    inputs = _conv2d_fixed_padding(inputs, 1024, 3)
+                    inputs = _conv2d_fixed_padding(inputs, 256, 1)
+                    route_2 = inputs
+
+                    inputs = _conv2d_fixed_padding(inputs, 512, 3)
+                    # inputs = _conv2d_fixed_padding(inputs, 255, 1)
+
+                    detect_1 = _detection_layer(
+                        inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
+                    detect_1 = tf.identity(detect_1, name='detect_1')
+
+                    inputs = _conv2d_fixed_padding(route_2, 128, 1)
+                    upsample_size = route_1.get_shape().as_list()
+                    inputs = _upsample(inputs, upsample_size, data_format)
+
+                    inputs = tf.concat([inputs, route_1],
+                                       axis=1 if data_format == 'NCHW' else 3)
+
+                    inputs = _conv2d_fixed_padding(inputs, 256, 3)
+                    # inputs = _conv2d_fixed_padding(inputs, 255, 1)
+
+                    detect_2 = _detection_layer(
+                        inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
+                    detect_2 = tf.identity(detect_2, name='detect_2')
+
+                    detections = tf.concat([detect_1, detect_2], axis=1)
+                    detections = tf.identity(detections, name='detections')
+                    return detections