Add files via upload

This commit is contained in:
Oscar
2020-05-21 18:56:45 +08:00
committed by GitHub
parent d2fa3d6cfb
commit 1ced062962
7 changed files with 1000 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
import tensorflow as tf
import yolo_v3
import yolo_v3_tiny
from utils import load_coco_names, load_weights
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'class_names', 'coco.names', 'File with class names')
tf.app.flags.DEFINE_string(
'weights_file', 'yolov3.weights', 'Binary file with detector weights')
tf.app.flags.DEFINE_string(
'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC')
tf.app.flags.DEFINE_bool(
'tiny', False, 'Use tiny version of YOLOv3')
tf.app.flags.DEFINE_bool(
'spp', False, 'Use SPP version of YOLOv3')
tf.app.flags.DEFINE_string(
'ckpt_file', './saved_model/model.ckpt', 'Chceckpoint file')
def main(argv=None):
if FLAGS.tiny:
model = yolo_v3_tiny.yolo_v3_tiny
elif FLAGS.spp:
model = yolo_v3.yolo_v3_spp
else:
model = yolo_v3.yolo_v3
classes = load_coco_names(FLAGS.class_names)
# placeholder for detector inputs
# any size > 320 will work here
inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
with tf.variable_scope('detector'):
detections = model(inputs, len(classes),
data_format=FLAGS.data_format)
load_ops = load_weights(tf.global_variables(
scope='detector'), FLAGS.weights_file)
saver = tf.train.Saver(tf.global_variables(scope='detector'))
with tf.Session() as sess:
sess.run(load_ops)
save_path = saver.save(sess, save_path=FLAGS.ckpt_file)
print('Model saved in path: {}'.format(save_path))
if __name__ == '__main__':
tf.app.run()

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
import yolo_v3
import yolo_v3_tiny
from PIL import Image, ImageDraw
from utils import load_weights, load_coco_names, detections_boxes, freeze_graph
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'class_names', 'yolov3coco.names', 'File with class names')
tf.app.flags.DEFINE_string(
'weights_file', 'yolov3.weights', 'Binary file with detector weights')
tf.app.flags.DEFINE_string(
'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
tf.app.flags.DEFINE_string(
'output_graph', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model output path')
tf.app.flags.DEFINE_bool(
'tiny', False, 'Use tiny version of YOLOv3')
tf.app.flags.DEFINE_bool(
'spp', False, 'Use SPP version of YOLOv3')
tf.app.flags.DEFINE_integer(
'size', 416, 'Image size')
def main(argv=None):
if FLAGS.tiny:
model = yolo_v3_tiny.yolo_v3_tiny
elif FLAGS.spp:
model = yolo_v3.yolo_v3_spp
else:
model = yolo_v3.yolo_v3
classes = load_coco_names(FLAGS.class_names)
# placeholder for detector inputs
inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs")
with tf.variable_scope('detector'):
detections = model(inputs, len(classes), data_format=FLAGS.data_format)
load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file)
# Sets the output nodes in the current session
boxes = detections_boxes(detections)
with tf.Session() as sess:
sess.run(load_ops)
freeze_graph(sess, FLAGS.output_graph)
if __name__ == '__main__':
tf.app.run()

109
tfyolov3/demo.py Normal file
View File

@@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from PIL import Image
import time
import yolo_v3
import yolo_v3_tiny
from utils import load_coco_names, draw_boxes, get_boxes_and_inputs, get_boxes_and_inputs_pb, non_max_suppression, \
load_graph, letter_box_image
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
'input_img', '2in.jpg', 'Input image')
tf.app.flags.DEFINE_string(
'output_img', '2out.jpg', 'Output image')
tf.app.flags.DEFINE_string(
'class_names', 'yolov3coco.names', 'File with class names')
tf.app.flags.DEFINE_string(
'weights_file', 'yolov3.weights', 'Binary file with detector weights')
tf.app.flags.DEFINE_string(
'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
tf.app.flags.DEFINE_string(
'ckpt_file', '', 'Checkpoint file')
#'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file')
tf.app.flags.DEFINE_string(
'frozen_model', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model')
tf.app.flags.DEFINE_bool(
'tiny', False, 'Use tiny version of YOLOv3')
tf.app.flags.DEFINE_bool(
'spp', False, 'Use SPP version of YOLOv3')
tf.app.flags.DEFINE_integer(
'size', 416, 'Image size')
tf.app.flags.DEFINE_float(
'conf_threshold', 0.5, 'Confidence threshold')
tf.app.flags.DEFINE_float(
'iou_threshold', 0.4, 'IoU threshold')
tf.app.flags.DEFINE_float(
'gpu_memory_fraction', 1.0, 'Gpu memory fraction to use')
def main(argv=None):
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
config = tf.ConfigProto(
gpu_options=gpu_options,
log_device_placement=False,
)
img = Image.open(FLAGS.input_img)
img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
img_resized = img_resized.astype(np.float32)
classes = load_coco_names(FLAGS.class_names)
if FLAGS.frozen_model:
t0 = time.time()
frozenGraph = load_graph(FLAGS.frozen_model)
print("Loaded graph in {:.2f}s".format(time.time()-t0))
#print(frozenGraph.inputs)
#print(frozenGraph.outputs)
boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
with tf.Session(graph=frozenGraph, config=config) as sess:
t0 = time.time()
detected_boxes = sess.run(
boxes, feed_dict={inputs: [img_resized]})
else:
if FLAGS.tiny:
model = yolo_v3_tiny.yolo_v3_tiny
elif FLAGS.spp:
model = yolo_v3.yolo_v3_spp
else:
model = yolo_v3.yolo_v3
boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format)
saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
with tf.Session(config=config) as sess:
t0 = time.time()
saver.restore(sess, FLAGS.ckpt_file)
print('Model restored in {:.2f}s'.format(time.time()-t0))
t0 = time.time()
detected_boxes = sess.run(
boxes, feed_dict={inputs: [img_resized]})
filtered_boxes = non_max_suppression(detected_boxes,
confidence_threshold=FLAGS.conf_threshold,
iou_threshold=FLAGS.iou_threshold)
print("Predictions found in {:.2f}s".format(time.time() - t0))
draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True)
img.save(FLAGS.output_img)
if __name__ == '__main__':
tf.app.run()

View File

@@ -0,0 +1,86 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ================================
"""Imports a protobuf model as a graph in Tensorboard."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
from tensorflow.core.framework import graph_pb2
from tensorflow.python.client import session
from tensorflow.python.framework import importer
from tensorflow.python.framework import ops
from tensorflow.python.platform import app
from tensorflow.python.platform import gfile
from tensorflow.python.summary import summary
# Try importing TensorRT ops if available
# TODO(aaroey): ideally we should import everything from contrib, but currently
# tensorrt module would cause build errors when being imported in
# tensorflow/contrib/__init__.py. Fix it.
# pylint: disable=unused-import,g-import-not-at-top,wildcard-import
try:
from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import *
except ImportError:
pass
# pylint: enable=unused-import,g-import-not-at-top,wildcard-import
def import_to_tensorboard(model_dir, log_dir):
"""View an imported protobuf model (`.pb` file) as a graph in Tensorboard.
Args:
model_dir: The location of the protobuf (`pb`) model to visualize
log_dir: The location for the Tensorboard log to begin visualization from.
Usage:
Call this function with your model location and desired log directory.
Launch Tensorboard by pointing it to the log directory.
View your imported `.pb` model as a graph.
"""
with session.Session(graph=ops.Graph()) as sess:
with gfile.GFile(model_dir, "rb") as f:
graph_def = graph_pb2.GraphDef()
graph_def.ParseFromString(f.read())
importer.import_graph_def(graph_def)
pb_visual_writer = summary.FileWriter(log_dir)
pb_visual_writer.add_graph(sess.graph)
print("Model Imported. Visualize by running: "
"tensorboard --logdir={}".format(log_dir))
def main(unused_args):
import_to_tensorboard(FLAGS.model_dir, FLAGS.log_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--model_dir",
type=str,
default="",
required=True,
help="The location of the protobuf (\'pb\') model to visualize.")
parser.add_argument(
"--log_dir",
type=str,
default="",
required=True,
help="The location for the Tensorboard log to begin visualization from.")
FLAGS, unparsed = parser.parse_known_args()
app.run(main=main, argv=[sys.argv[0]] + unparsed)

301
tfyolov3/utils.py Normal file
View File

@@ -0,0 +1,301 @@
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from PIL import ImageDraw, Image
def get_boxes_and_inputs_pb(frozen_graph):
with frozen_graph.as_default():
boxes = tf.get_default_graph().get_tensor_by_name("output_boxes:0")
inputs = tf.get_default_graph().get_tensor_by_name("inputs:0")
return boxes, inputs
def get_boxes_and_inputs(model, num_classes, size, data_format):
inputs = tf.placeholder(tf.float32, [1, size, size, 3])
with tf.variable_scope('detector'):
detections = model(inputs, num_classes,
data_format=data_format)
boxes = detections_boxes(detections)
return boxes, inputs
def load_graph(frozen_graph_filename):
with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name="")
return graph
def freeze_graph(sess, output_graph):
output_node_names = [
"output_boxes",
"inputs",
]
output_node_names = ",".join(output_node_names)
output_graph_def = tf.graph_util.convert_variables_to_constants(
sess,
tf.get_default_graph().as_graph_def(),
output_node_names.split(",")
)
with tf.gfile.GFile(output_graph, "wb") as f:
f.write(output_graph_def.SerializeToString())
print("{} ops written to {}.".format(len(output_graph_def.node), output_graph))
def load_weights(var_list, weights_file):
"""
Loads and converts pre-trained weights.
:param var_list: list of network variables.
:param weights_file: name of the binary file.
:return: list of assign ops
"""
with open(weights_file, "rb") as fp:
_ = np.fromfile(fp, dtype=np.int32, count=5)
weights = np.fromfile(fp, dtype=np.float32)
ptr = 0
i = 0
assign_ops = []
while i < len(var_list) - 1:
var1 = var_list[i]
var2 = var_list[i + 1]
# do something only if we process conv layer
if 'Conv' in var1.name.split('/')[-2]:
# check type of next layer
if 'BatchNorm' in var2.name.split('/')[-2]:
# load batch norm params
gamma, beta, mean, var = var_list[i + 1:i + 5]
batch_norm_vars = [beta, gamma, mean, var]
for var in batch_norm_vars:
shape = var.shape.as_list()
num_params = np.prod(shape)
var_weights = weights[ptr:ptr + num_params].reshape(shape)
ptr += num_params
assign_ops.append(
tf.assign(var, var_weights, validate_shape=True))
# we move the pointer by 4, because we loaded 4 variables
i += 4
elif 'Conv' in var2.name.split('/')[-2]:
# load biases
bias = var2
bias_shape = bias.shape.as_list()
bias_params = np.prod(bias_shape)
bias_weights = weights[ptr:ptr +
bias_params].reshape(bias_shape)
ptr += bias_params
assign_ops.append(
tf.assign(bias, bias_weights, validate_shape=True))
# we loaded 1 variable
i += 1
# we can load weights of conv layer
shape = var1.shape.as_list()
num_params = np.prod(shape)
var_weights = weights[ptr:ptr + num_params].reshape(
(shape[3], shape[2], shape[0], shape[1]))
# remember to transpose to column-major
var_weights = np.transpose(var_weights, (2, 3, 1, 0))
ptr += num_params
assign_ops.append(
tf.assign(var1, var_weights, validate_shape=True))
i += 1
return assign_ops
def detections_boxes(detections):
"""
Converts center x, center y, width and height values to coordinates of top left and bottom right points.
:param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5))
:return: converted detections of same shape as input
"""
center_x, center_y, width, height, attrs = tf.split(
detections, [1, 1, 1, 1, -1], axis=-1)
w2 = width / 2
h2 = height / 2
x0 = center_x - w2
y0 = center_y - h2
x1 = center_x + w2
y1 = center_y + h2
boxes = tf.concat([x0, y0, x1, y1], axis=-1)
detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes")
return detections
def _iou(box1, box2):
"""
Computes Intersection over Union value for 2 bounding boxes
:param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2]
:param box2: same as box1
:return: IoU
"""
b1_x0, b1_y0, b1_x1, b1_y1 = box1
b2_x0, b2_y0, b2_x1, b2_y1 = box2
int_x0 = max(b1_x0, b2_x0)
int_y0 = max(b1_y0, b2_y0)
int_x1 = min(b1_x1, b2_x1)
int_y1 = min(b1_y1, b2_y1)
int_area = max(int_x1 - int_x0, 0) * max(int_y1 - int_y0, 0)
b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)
# we add small epsilon of 1e-05 to avoid division by 0
iou = int_area / (b1_area + b2_area - int_area + 1e-05)
return iou
def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4):
"""
Applies Non-max suppression to prediction boxes.
:param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence
:param confidence_threshold: the threshold for deciding if prediction is valid
:param iou_threshold: the threshold for deciding if two boxes overlap
:return: dict: class -> [(box, score)]
"""
conf_mask = np.expand_dims(
(predictions_with_boxes[:, :, 4] > confidence_threshold), -1)
predictions = predictions_with_boxes * conf_mask
result = {}
for i, image_pred in enumerate(predictions):
shape = image_pred.shape
# non_zero_idxs = np.nonzero(image_pred)
# image_pred = image_pred[non_zero_idxs]
temp = image_pred
sum_t = np.sum(temp, axis=1)
non_zero_idx = sum_t != 0
image_pred = image_pred[non_zero_idx, :]
image_pred = image_pred.reshape(-1, shape[-1])
bbox_attrs = image_pred[:, :5]
classes = image_pred[:, 5:]
classes = np.argmax(classes, axis=-1)
unique_classes = list(set(classes.reshape(-1)))
for cls in unique_classes:
cls_mask = classes == cls
cls_boxes = bbox_attrs[np.nonzero(cls_mask)]
cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]]
cls_scores = cls_boxes[:, -1]
cls_boxes = cls_boxes[:, :-1]
while len(cls_boxes) > 0:
box = cls_boxes[0]
score = cls_scores[0]
if cls not in result:
result[cls] = []
result[cls].append((box, score))
cls_boxes = cls_boxes[1:]
cls_scores = cls_scores[1:]
ious = np.array([_iou(box, x) for x in cls_boxes])
iou_mask = ious < iou_threshold
cls_boxes = cls_boxes[np.nonzero(iou_mask)]
cls_scores = cls_scores[np.nonzero(iou_mask)]
return result
def load_coco_names(file_name):
names = {}
with open(file_name) as f:
for id, name in enumerate(f):
names[id] = name
return names
def draw_boxes(boxes, img, cls_names, detection_size, is_letter_box_image):
draw = ImageDraw.Draw(img)
for cls, bboxs in boxes.items():
color = tuple(np.random.randint(0, 256, 3))
for box, score in bboxs:
box = convert_to_original_size(box, np.array(detection_size),
np.array(img.size),
is_letter_box_image)
draw.rectangle(box, outline=color)
draw.text(box[:2], '{} {:.2f}%'.format(
cls_names[cls], score * 100), fill=color)
def convert_to_original_size(box, size, original_size, is_letter_box_image):
if is_letter_box_image:
box = box.reshape(2, 2)
box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size)
box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size)
else:
ratio = original_size / size
box = box.reshape(2, 2) * ratio
return list(box.reshape(-1))
def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray:
"""
Fit image with final image with output_width and output_height.
:param image: PILLOW Image object.
:param output_height: width of the final image.
:param output_width: height of the final image.
:param fill_value: fill value for empty area. Can be uint8 or np.ndarray
:return: numpy image fit within letterbox. dtype=uint8, shape=(output_height, output_width)
"""
height_ratio = float(output_height)/image.size[1]
width_ratio = float(output_width)/image.size[0]
fit_ratio = min(width_ratio, height_ratio)
fit_height = int(image.size[1] * fit_ratio)
fit_width = int(image.size[0] * fit_ratio)
fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR))
if isinstance(fill_value, int):
fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype)
to_return = np.tile(fill_value, (output_height, output_width, 1))
pad_top = int(0.5 * (output_height - fit_height))
pad_left = int(0.5 * (output_width - fit_width))
to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image
return to_return
def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray:
"""
Parameters should have same shape and dimension space. (Width, Height) or (Height, Width)
:param letter_pos: The current position within letterbox image including fill value area.
:param current_size: The size of whole image including fill value area.
:param ori_image_size: The size of image before being letter boxed.
:return:
"""
letter_pos = np.asarray(letter_pos, dtype=np.float)
current_size = np.asarray(current_size, dtype=np.float)
ori_image_size = np.asarray(ori_image_size, dtype=np.float)
final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1])
pad = 0.5 * (current_size - final_ratio * ori_image_size)
pad = pad.astype(np.int32)
to_return_pos = (letter_pos - pad) / final_ratio
return to_return_pos

292
tfyolov3/yolo_v3.py Normal file
View File

@@ -0,0 +1,292 @@
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
slim = tf.contrib.slim
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-05
_LEAKY_RELU = 0.1
_ANCHORS = [(10, 13), (16, 30), (33, 23),
(30, 61), (62, 45), (59, 119),
(116, 90), (156, 198), (373, 326)]
def darknet53(inputs):
"""
Builds Darknet-53 model.
"""
inputs = _conv2d_fixed_padding(inputs, 32, 3)
inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2)
inputs = _darknet53_block(inputs, 32)
inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2)
for i in range(2):
inputs = _darknet53_block(inputs, 64)
inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2)
for i in range(8):
inputs = _darknet53_block(inputs, 128)
route_1 = inputs
inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2)
for i in range(8):
inputs = _darknet53_block(inputs, 256)
route_2 = inputs
inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2)
for i in range(4):
inputs = _darknet53_block(inputs, 512)
return route_1, route_2, inputs
def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1):
if strides > 1:
inputs = _fixed_padding(inputs, kernel_size)
inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
padding=('SAME' if strides == 1 else 'VALID'))
return inputs
def _darknet53_block(inputs, filters):
shortcut = inputs
inputs = _conv2d_fixed_padding(inputs, filters, 1)
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
inputs = inputs + shortcut
return inputs
def _spp_block(inputs, data_format='NCHW'):
return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'),
slim.max_pool2d(inputs, 9, 1, 'SAME'),
slim.max_pool2d(inputs, 5, 1, 'SAME'),
inputs],
axis=1 if data_format == 'NCHW' else 3)
@tf.contrib.framework.add_arg_scope
def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs):
"""
Pads the input along the spatial dimensions independently of input size.
Args:
inputs: A tensor of size [batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format.
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
Should be a positive integer.
data_format: The input format ('NHWC' or 'NCHW').
mode: The mode for tf.pad.
Returns:
A tensor with the same format as the input with the data either intact
(if kernel_size == 1) or padded (if kernel_size > 1).
"""
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
if kwargs['data_format'] == 'NCHW':
padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
[pad_beg, pad_end],
[pad_beg, pad_end]],
mode=mode)
else:
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
[pad_beg, pad_end], [0, 0]], mode=mode)
return padded_inputs
def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False):
inputs = _conv2d_fixed_padding(inputs, filters, 1)
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
inputs = _conv2d_fixed_padding(inputs, filters, 1)
if with_spp:
inputs = _spp_block(inputs, data_format)
inputs = _conv2d_fixed_padding(inputs, filters, 1)
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
inputs = _conv2d_fixed_padding(inputs, filters, 1)
route = inputs
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
return route, inputs
def _get_size(shape, data_format):
if len(shape) == 4:
shape = shape[1:]
return shape[1:3] if data_format == 'NCHW' else shape[0:2]
def _detection_layer(inputs, num_classes, anchors, img_size, data_format):
num_anchors = len(anchors)
predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1,
stride=1, normalizer_fn=None,
activation_fn=None,
biases_initializer=tf.zeros_initializer())
shape = predictions.get_shape().as_list()
grid_size = _get_size(shape, data_format)
dim = grid_size[0] * grid_size[1]
bbox_attrs = 5 + num_classes
if data_format == 'NCHW':
predictions = tf.reshape(
predictions, [-1, num_anchors * bbox_attrs, dim])
predictions = tf.transpose(predictions, [0, 2, 1])
predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs])
stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
box_centers, box_sizes, confidence, classes = tf.split(
predictions, [2, 2, 1, num_classes], axis=-1)
box_centers = tf.nn.sigmoid(box_centers)
confidence = tf.nn.sigmoid(confidence)
grid_x = tf.range(grid_size[0], dtype=tf.float32)
grid_y = tf.range(grid_size[1], dtype=tf.float32)
a, b = tf.meshgrid(grid_x, grid_y)
x_offset = tf.reshape(a, (-1, 1))
y_offset = tf.reshape(b, (-1, 1))
x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
box_centers = box_centers + x_y_offset
box_centers = box_centers * stride
anchors = tf.tile(anchors, [dim, 1])
box_sizes = tf.exp(box_sizes) * anchors
box_sizes = box_sizes * stride
detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
classes = tf.nn.sigmoid(classes)
predictions = tf.concat([detections, classes], axis=-1)
return predictions
def _upsample(inputs, out_shape, data_format='NCHW'):
# tf.image.resize_nearest_neighbor accepts input in format NHWC
if data_format == 'NCHW':
inputs = tf.transpose(inputs, [0, 2, 3, 1])
if data_format == 'NCHW':
new_height = out_shape[3]
new_width = out_shape[2]
else:
new_height = out_shape[2]
new_width = out_shape[1]
inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
# back to NCHW if needed
if data_format == 'NCHW':
inputs = tf.transpose(inputs, [0, 3, 1, 2])
inputs = tf.identity(inputs, name='upsampled')
return inputs
def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False):
"""
Creates YOLO v3 model.
:param inputs: a 4-D tensor of size [batch_size, height, width, channels].
Dimension batch_size may be undefined. The channel order is RGB.
:param num_classes: number of predicted classes.
:param is_training: whether is training or not.
:param data_format: data format NCHW or NHWC.
:param reuse: whether or not the network and its variables should be reused.
:param with_spp: whether or not is using spp layer.
:return:
"""
# it will be needed later on
img_size = inputs.get_shape().as_list()[1:3]
# transpose the inputs to NCHW
if data_format == 'NCHW':
inputs = tf.transpose(inputs, [0, 3, 1, 2])
# normalize values to range [0..1]
inputs = inputs / 255
# set batch norm params
batch_norm_params = {
'decay': _BATCH_NORM_DECAY,
'epsilon': _BATCH_NORM_EPSILON,
'scale': True,
'is_training': is_training,
'fused': None, # Use fused batch norm if possible.
}
# Set activation_fn and parameters for conv2d, batch_norm.
with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse):
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params,
biases_initializer=None,
activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
with tf.variable_scope('darknet-53'):
route_1, route_2, inputs = darknet53(inputs)
with tf.variable_scope('yolo-v3'):
route, inputs = _yolo_block(inputs, 512, data_format, with_spp)
detect_1 = _detection_layer(
inputs, num_classes, _ANCHORS[6:9], img_size, data_format)
detect_1 = tf.identity(detect_1, name='detect_1')
inputs = _conv2d_fixed_padding(route, 256, 1)
upsample_size = route_2.get_shape().as_list()
inputs = _upsample(inputs, upsample_size, data_format)
inputs = tf.concat([inputs, route_2],
axis=1 if data_format == 'NCHW' else 3)
route, inputs = _yolo_block(inputs, 256)
detect_2 = _detection_layer(
inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
detect_2 = tf.identity(detect_2, name='detect_2')
inputs = _conv2d_fixed_padding(route, 128, 1)
upsample_size = route_1.get_shape().as_list()
inputs = _upsample(inputs, upsample_size, data_format)
inputs = tf.concat([inputs, route_1],
axis=1 if data_format == 'NCHW' else 3)
_, inputs = _yolo_block(inputs, 128)
detect_3 = _detection_layer(
inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
detect_3 = tf.identity(detect_3, name='detect_3')
detections = tf.concat([detect_1, detect_2, detect_3], axis=1)
detections = tf.identity(detections, name='detections')
return detections
def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
"""
Creates YOLO v3 with SPP model.
:param inputs: a 4-D tensor of size [batch_size, height, width, channels].
Dimension batch_size may be undefined. The channel order is RGB.
:param num_classes: number of predicted classes.
:param is_training: whether is training or not.
:param data_format: data format NCHW or NHWC.
:param reuse: whether or not the network and its variables should be reused.
:return:
"""
return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True)

100
tfyolov3/yolo_v3_tiny.py Normal file
View File

@@ -0,0 +1,100 @@
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \
_detection_layer, _upsample
slim = tf.contrib.slim
_BATCH_NORM_DECAY = 0.9
_BATCH_NORM_EPSILON = 1e-05
_LEAKY_RELU = 0.1
_ANCHORS = [(10, 14), (23, 27), (37, 58),
(81, 82), (135, 169), (344, 319)]
def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
"""
Creates YOLO v3 tiny model.
:param inputs: a 4-D tensor of size [batch_size, height, width, channels].
Dimension batch_size may be undefined. The channel order is RGB.
:param num_classes: number of predicted classes.
:param is_training: whether is training or not.
:param data_format: data format NCHW or NHWC.
:param reuse: whether or not the network and its variables should be reused.
:return:
"""
# it will be needed later on
img_size = inputs.get_shape().as_list()[1:3]
# transpose the inputs to NCHW
if data_format == 'NCHW':
inputs = tf.transpose(inputs, [0, 3, 1, 2])
# normalize values to range [0..1]
inputs = inputs / 255
# set batch norm params
batch_norm_params = {
'decay': _BATCH_NORM_DECAY,
'epsilon': _BATCH_NORM_EPSILON,
'scale': True,
'is_training': is_training,
'fused': None, # Use fused batch norm if possible.
}
# Set activation_fn and parameters for conv2d, batch_norm.
with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format):
with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse):
with slim.arg_scope([slim.conv2d],
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params,
biases_initializer=None,
activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
with tf.variable_scope('yolo-v3-tiny'):
for i in range(6):
inputs = _conv2d_fixed_padding(
inputs, 16 * pow(2, i), 3)
if i == 4:
route_1 = inputs
if i == 5:
inputs = slim.max_pool2d(
inputs, [2, 2], stride=1, padding="SAME", scope='pool2')
else:
inputs = slim.max_pool2d(
inputs, [2, 2], scope='pool2')
inputs = _conv2d_fixed_padding(inputs, 1024, 3)
inputs = _conv2d_fixed_padding(inputs, 256, 1)
route_2 = inputs
inputs = _conv2d_fixed_padding(inputs, 512, 3)
# inputs = _conv2d_fixed_padding(inputs, 255, 1)
detect_1 = _detection_layer(
inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
detect_1 = tf.identity(detect_1, name='detect_1')
inputs = _conv2d_fixed_padding(route_2, 128, 1)
upsample_size = route_1.get_shape().as_list()
inputs = _upsample(inputs, upsample_size, data_format)
inputs = tf.concat([inputs, route_1],
axis=1 if data_format == 'NCHW' else 3)
inputs = _conv2d_fixed_padding(inputs, 256, 3)
# inputs = _conv2d_fixed_padding(inputs, 255, 1)
detect_2 = _detection_layer(
inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
detect_2 = tf.identity(detect_2, name='detect_2')
detections = tf.concat([detect_1, detect_2], axis=1)
detections = tf.identity(detections, name='detections')
return detections