mirror of
https://github.com/oscar-davids/lpmsdemo.git
synced 2025-12-24 12:37:59 +08:00
Add files via upload
This commit is contained in:
56
tfyolov3/convert_weights.py
Normal file
56
tfyolov3/convert_weights.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
import yolo_v3
|
||||
import yolo_v3_tiny
|
||||
|
||||
from utils import load_coco_names, load_weights
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'class_names', 'coco.names', 'File with class names')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'weights_file', 'yolov3.weights', 'Binary file with detector weights')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'data_format', 'NCHW', 'Data format: NCHW (gpu only) / NHWC')
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'tiny', False, 'Use tiny version of YOLOv3')
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'spp', False, 'Use SPP version of YOLOv3')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'ckpt_file', './saved_model/model.ckpt', 'Chceckpoint file')
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if FLAGS.tiny:
|
||||
model = yolo_v3_tiny.yolo_v3_tiny
|
||||
elif FLAGS.spp:
|
||||
model = yolo_v3.yolo_v3_spp
|
||||
else:
|
||||
model = yolo_v3.yolo_v3
|
||||
|
||||
classes = load_coco_names(FLAGS.class_names)
|
||||
|
||||
# placeholder for detector inputs
|
||||
# any size > 320 will work here
|
||||
inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
|
||||
|
||||
with tf.variable_scope('detector'):
|
||||
detections = model(inputs, len(classes),
|
||||
data_format=FLAGS.data_format)
|
||||
load_ops = load_weights(tf.global_variables(
|
||||
scope='detector'), FLAGS.weights_file)
|
||||
|
||||
saver = tf.train.Saver(tf.global_variables(scope='detector'))
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(load_ops)
|
||||
|
||||
save_path = saver.save(sess, save_path=FLAGS.ckpt_file)
|
||||
print('Model saved in path: {}'.format(save_path))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
56
tfyolov3/convert_weights_pb.py
Normal file
56
tfyolov3/convert_weights_pb.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import yolo_v3
|
||||
import yolo_v3_tiny
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
from utils import load_weights, load_coco_names, detections_boxes, freeze_graph
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'class_names', 'yolov3coco.names', 'File with class names')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'weights_file', 'yolov3.weights', 'Binary file with detector weights')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'output_graph', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model output path')
|
||||
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'tiny', False, 'Use tiny version of YOLOv3')
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'spp', False, 'Use SPP version of YOLOv3')
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'size', 416, 'Image size')
|
||||
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if FLAGS.tiny:
|
||||
model = yolo_v3_tiny.yolo_v3_tiny
|
||||
elif FLAGS.spp:
|
||||
model = yolo_v3.yolo_v3_spp
|
||||
else:
|
||||
model = yolo_v3.yolo_v3
|
||||
|
||||
classes = load_coco_names(FLAGS.class_names)
|
||||
|
||||
# placeholder for detector inputs
|
||||
inputs = tf.placeholder(tf.float32, [None, FLAGS.size, FLAGS.size, 3], "inputs")
|
||||
|
||||
with tf.variable_scope('detector'):
|
||||
detections = model(inputs, len(classes), data_format=FLAGS.data_format)
|
||||
load_ops = load_weights(tf.global_variables(scope='detector'), FLAGS.weights_file)
|
||||
|
||||
# Sets the output nodes in the current session
|
||||
boxes = detections_boxes(detections)
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(load_ops)
|
||||
freeze_graph(sess, FLAGS.output_graph)
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
109
tfyolov3/demo.py
Normal file
109
tfyolov3/demo.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from PIL import Image
|
||||
import time
|
||||
|
||||
import yolo_v3
|
||||
import yolo_v3_tiny
|
||||
|
||||
from utils import load_coco_names, draw_boxes, get_boxes_and_inputs, get_boxes_and_inputs_pb, non_max_suppression, \
|
||||
load_graph, letter_box_image
|
||||
|
||||
FLAGS = tf.app.flags.FLAGS
|
||||
|
||||
tf.app.flags.DEFINE_string(
|
||||
'input_img', '2in.jpg', 'Input image')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'output_img', '2out.jpg', 'Output image')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'class_names', 'yolov3coco.names', 'File with class names')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'weights_file', 'yolov3.weights', 'Binary file with detector weights')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'data_format', 'NHWC', 'Data format: NCHW (gpu only) / NHWC')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'ckpt_file', '', 'Checkpoint file')
|
||||
#'ckpt_file', './saved_model/model.ckpt', 'Checkpoint file')
|
||||
tf.app.flags.DEFINE_string(
|
||||
'frozen_model', 'frozen_darknet_yolov3_model.pb', 'Frozen tensorflow protobuf model')
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'tiny', False, 'Use tiny version of YOLOv3')
|
||||
tf.app.flags.DEFINE_bool(
|
||||
'spp', False, 'Use SPP version of YOLOv3')
|
||||
|
||||
tf.app.flags.DEFINE_integer(
|
||||
'size', 416, 'Image size')
|
||||
|
||||
tf.app.flags.DEFINE_float(
|
||||
'conf_threshold', 0.5, 'Confidence threshold')
|
||||
tf.app.flags.DEFINE_float(
|
||||
'iou_threshold', 0.4, 'IoU threshold')
|
||||
|
||||
tf.app.flags.DEFINE_float(
|
||||
'gpu_memory_fraction', 1.0, 'Gpu memory fraction to use')
|
||||
|
||||
def main(argv=None):
|
||||
|
||||
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
|
||||
|
||||
config = tf.ConfigProto(
|
||||
gpu_options=gpu_options,
|
||||
log_device_placement=False,
|
||||
)
|
||||
|
||||
img = Image.open(FLAGS.input_img)
|
||||
img_resized = letter_box_image(img, FLAGS.size, FLAGS.size, 128)
|
||||
img_resized = img_resized.astype(np.float32)
|
||||
classes = load_coco_names(FLAGS.class_names)
|
||||
|
||||
if FLAGS.frozen_model:
|
||||
|
||||
t0 = time.time()
|
||||
frozenGraph = load_graph(FLAGS.frozen_model)
|
||||
print("Loaded graph in {:.2f}s".format(time.time()-t0))
|
||||
|
||||
#print(frozenGraph.inputs)
|
||||
#print(frozenGraph.outputs)
|
||||
|
||||
boxes, inputs = get_boxes_and_inputs_pb(frozenGraph)
|
||||
|
||||
with tf.Session(graph=frozenGraph, config=config) as sess:
|
||||
t0 = time.time()
|
||||
detected_boxes = sess.run(
|
||||
boxes, feed_dict={inputs: [img_resized]})
|
||||
|
||||
else:
|
||||
if FLAGS.tiny:
|
||||
model = yolo_v3_tiny.yolo_v3_tiny
|
||||
elif FLAGS.spp:
|
||||
model = yolo_v3.yolo_v3_spp
|
||||
else:
|
||||
model = yolo_v3.yolo_v3
|
||||
|
||||
boxes, inputs = get_boxes_and_inputs(model, len(classes), FLAGS.size, FLAGS.data_format)
|
||||
|
||||
saver = tf.train.Saver(var_list=tf.global_variables(scope='detector'))
|
||||
|
||||
with tf.Session(config=config) as sess:
|
||||
t0 = time.time()
|
||||
saver.restore(sess, FLAGS.ckpt_file)
|
||||
print('Model restored in {:.2f}s'.format(time.time()-t0))
|
||||
|
||||
t0 = time.time()
|
||||
detected_boxes = sess.run(
|
||||
boxes, feed_dict={inputs: [img_resized]})
|
||||
|
||||
filtered_boxes = non_max_suppression(detected_boxes,
|
||||
confidence_threshold=FLAGS.conf_threshold,
|
||||
iou_threshold=FLAGS.iou_threshold)
|
||||
print("Predictions found in {:.2f}s".format(time.time() - t0))
|
||||
|
||||
draw_boxes(filtered_boxes, img, classes, (FLAGS.size, FLAGS.size), True)
|
||||
|
||||
img.save(FLAGS.output_img)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tf.app.run()
|
||||
86
tfyolov3/pb_to_tensorboard.py
Normal file
86
tfyolov3/pb_to_tensorboard.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ================================
|
||||
"""Imports a protobuf model as a graph in Tensorboard."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from tensorflow.core.framework import graph_pb2
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.framework import importer
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.platform import app
|
||||
from tensorflow.python.platform import gfile
|
||||
from tensorflow.python.summary import summary
|
||||
|
||||
# Try importing TensorRT ops if available
|
||||
# TODO(aaroey): ideally we should import everything from contrib, but currently
|
||||
# tensorrt module would cause build errors when being imported in
|
||||
# tensorflow/contrib/__init__.py. Fix it.
|
||||
# pylint: disable=unused-import,g-import-not-at-top,wildcard-import
|
||||
try:
|
||||
from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import *
|
||||
except ImportError:
|
||||
pass
|
||||
# pylint: enable=unused-import,g-import-not-at-top,wildcard-import
|
||||
|
||||
def import_to_tensorboard(model_dir, log_dir):
|
||||
"""View an imported protobuf model (`.pb` file) as a graph in Tensorboard.
|
||||
|
||||
Args:
|
||||
model_dir: The location of the protobuf (`pb`) model to visualize
|
||||
log_dir: The location for the Tensorboard log to begin visualization from.
|
||||
|
||||
Usage:
|
||||
Call this function with your model location and desired log directory.
|
||||
Launch Tensorboard by pointing it to the log directory.
|
||||
View your imported `.pb` model as a graph.
|
||||
"""
|
||||
with session.Session(graph=ops.Graph()) as sess:
|
||||
with gfile.GFile(model_dir, "rb") as f:
|
||||
graph_def = graph_pb2.GraphDef()
|
||||
graph_def.ParseFromString(f.read())
|
||||
importer.import_graph_def(graph_def)
|
||||
|
||||
pb_visual_writer = summary.FileWriter(log_dir)
|
||||
pb_visual_writer.add_graph(sess.graph)
|
||||
print("Model Imported. Visualize by running: "
|
||||
"tensorboard --logdir={}".format(log_dir))
|
||||
|
||||
|
||||
def main(unused_args):
|
||||
import_to_tensorboard(FLAGS.model_dir, FLAGS.log_dir)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.register("type", "bool", lambda v: v.lower() == "true")
|
||||
parser.add_argument(
|
||||
"--model_dir",
|
||||
type=str,
|
||||
default="",
|
||||
required=True,
|
||||
help="The location of the protobuf (\'pb\') model to visualize.")
|
||||
parser.add_argument(
|
||||
"--log_dir",
|
||||
type=str,
|
||||
default="",
|
||||
required=True,
|
||||
help="The location for the Tensorboard log to begin visualization from.")
|
||||
FLAGS, unparsed = parser.parse_known_args()
|
||||
app.run(main=main, argv=[sys.argv[0]] + unparsed)
|
||||
301
tfyolov3/utils.py
Normal file
301
tfyolov3/utils.py
Normal file
@@ -0,0 +1,301 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from PIL import ImageDraw, Image
|
||||
|
||||
|
||||
def get_boxes_and_inputs_pb(frozen_graph):
|
||||
|
||||
with frozen_graph.as_default():
|
||||
boxes = tf.get_default_graph().get_tensor_by_name("output_boxes:0")
|
||||
inputs = tf.get_default_graph().get_tensor_by_name("inputs:0")
|
||||
|
||||
return boxes, inputs
|
||||
|
||||
|
||||
def get_boxes_and_inputs(model, num_classes, size, data_format):
|
||||
|
||||
inputs = tf.placeholder(tf.float32, [1, size, size, 3])
|
||||
|
||||
with tf.variable_scope('detector'):
|
||||
detections = model(inputs, num_classes,
|
||||
data_format=data_format)
|
||||
|
||||
boxes = detections_boxes(detections)
|
||||
|
||||
return boxes, inputs
|
||||
|
||||
|
||||
def load_graph(frozen_graph_filename):
|
||||
|
||||
with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
|
||||
graph_def = tf.GraphDef()
|
||||
graph_def.ParseFromString(f.read())
|
||||
|
||||
with tf.Graph().as_default() as graph:
|
||||
tf.import_graph_def(graph_def, name="")
|
||||
|
||||
return graph
|
||||
|
||||
|
||||
def freeze_graph(sess, output_graph):
|
||||
|
||||
output_node_names = [
|
||||
"output_boxes",
|
||||
"inputs",
|
||||
]
|
||||
output_node_names = ",".join(output_node_names)
|
||||
|
||||
output_graph_def = tf.graph_util.convert_variables_to_constants(
|
||||
sess,
|
||||
tf.get_default_graph().as_graph_def(),
|
||||
output_node_names.split(",")
|
||||
)
|
||||
|
||||
with tf.gfile.GFile(output_graph, "wb") as f:
|
||||
f.write(output_graph_def.SerializeToString())
|
||||
|
||||
print("{} ops written to {}.".format(len(output_graph_def.node), output_graph))
|
||||
|
||||
|
||||
def load_weights(var_list, weights_file):
|
||||
"""
|
||||
Loads and converts pre-trained weights.
|
||||
:param var_list: list of network variables.
|
||||
:param weights_file: name of the binary file.
|
||||
:return: list of assign ops
|
||||
"""
|
||||
with open(weights_file, "rb") as fp:
|
||||
_ = np.fromfile(fp, dtype=np.int32, count=5)
|
||||
|
||||
weights = np.fromfile(fp, dtype=np.float32)
|
||||
|
||||
ptr = 0
|
||||
i = 0
|
||||
assign_ops = []
|
||||
while i < len(var_list) - 1:
|
||||
var1 = var_list[i]
|
||||
var2 = var_list[i + 1]
|
||||
# do something only if we process conv layer
|
||||
if 'Conv' in var1.name.split('/')[-2]:
|
||||
# check type of next layer
|
||||
if 'BatchNorm' in var2.name.split('/')[-2]:
|
||||
# load batch norm params
|
||||
gamma, beta, mean, var = var_list[i + 1:i + 5]
|
||||
batch_norm_vars = [beta, gamma, mean, var]
|
||||
for var in batch_norm_vars:
|
||||
shape = var.shape.as_list()
|
||||
num_params = np.prod(shape)
|
||||
var_weights = weights[ptr:ptr + num_params].reshape(shape)
|
||||
ptr += num_params
|
||||
assign_ops.append(
|
||||
tf.assign(var, var_weights, validate_shape=True))
|
||||
|
||||
# we move the pointer by 4, because we loaded 4 variables
|
||||
i += 4
|
||||
elif 'Conv' in var2.name.split('/')[-2]:
|
||||
# load biases
|
||||
bias = var2
|
||||
bias_shape = bias.shape.as_list()
|
||||
bias_params = np.prod(bias_shape)
|
||||
bias_weights = weights[ptr:ptr +
|
||||
bias_params].reshape(bias_shape)
|
||||
ptr += bias_params
|
||||
assign_ops.append(
|
||||
tf.assign(bias, bias_weights, validate_shape=True))
|
||||
|
||||
# we loaded 1 variable
|
||||
i += 1
|
||||
# we can load weights of conv layer
|
||||
shape = var1.shape.as_list()
|
||||
num_params = np.prod(shape)
|
||||
|
||||
var_weights = weights[ptr:ptr + num_params].reshape(
|
||||
(shape[3], shape[2], shape[0], shape[1]))
|
||||
# remember to transpose to column-major
|
||||
var_weights = np.transpose(var_weights, (2, 3, 1, 0))
|
||||
ptr += num_params
|
||||
assign_ops.append(
|
||||
tf.assign(var1, var_weights, validate_shape=True))
|
||||
i += 1
|
||||
|
||||
return assign_ops
|
||||
|
||||
|
||||
def detections_boxes(detections):
|
||||
"""
|
||||
Converts center x, center y, width and height values to coordinates of top left and bottom right points.
|
||||
|
||||
:param detections: outputs of YOLO v3 detector of shape (?, 10647, (num_classes + 5))
|
||||
:return: converted detections of same shape as input
|
||||
"""
|
||||
center_x, center_y, width, height, attrs = tf.split(
|
||||
detections, [1, 1, 1, 1, -1], axis=-1)
|
||||
w2 = width / 2
|
||||
h2 = height / 2
|
||||
x0 = center_x - w2
|
||||
y0 = center_y - h2
|
||||
x1 = center_x + w2
|
||||
y1 = center_y + h2
|
||||
|
||||
boxes = tf.concat([x0, y0, x1, y1], axis=-1)
|
||||
detections = tf.concat([boxes, attrs], axis=-1, name="output_boxes")
|
||||
return detections
|
||||
|
||||
|
||||
def _iou(box1, box2):
|
||||
"""
|
||||
Computes Intersection over Union value for 2 bounding boxes
|
||||
|
||||
:param box1: array of 4 values (top left and bottom right coords): [x0, y0, x1, x2]
|
||||
:param box2: same as box1
|
||||
:return: IoU
|
||||
"""
|
||||
b1_x0, b1_y0, b1_x1, b1_y1 = box1
|
||||
b2_x0, b2_y0, b2_x1, b2_y1 = box2
|
||||
|
||||
int_x0 = max(b1_x0, b2_x0)
|
||||
int_y0 = max(b1_y0, b2_y0)
|
||||
int_x1 = min(b1_x1, b2_x1)
|
||||
int_y1 = min(b1_y1, b2_y1)
|
||||
|
||||
int_area = max(int_x1 - int_x0, 0) * max(int_y1 - int_y0, 0)
|
||||
|
||||
b1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
|
||||
b2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)
|
||||
|
||||
# we add small epsilon of 1e-05 to avoid division by 0
|
||||
iou = int_area / (b1_area + b2_area - int_area + 1e-05)
|
||||
return iou
|
||||
|
||||
|
||||
def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshold=0.4):
|
||||
"""
|
||||
Applies Non-max suppression to prediction boxes.
|
||||
|
||||
:param predictions_with_boxes: 3D numpy array, first 4 values in 3rd dimension are bbox attrs, 5th is confidence
|
||||
:param confidence_threshold: the threshold for deciding if prediction is valid
|
||||
:param iou_threshold: the threshold for deciding if two boxes overlap
|
||||
:return: dict: class -> [(box, score)]
|
||||
"""
|
||||
conf_mask = np.expand_dims(
|
||||
(predictions_with_boxes[:, :, 4] > confidence_threshold), -1)
|
||||
predictions = predictions_with_boxes * conf_mask
|
||||
|
||||
result = {}
|
||||
for i, image_pred in enumerate(predictions):
|
||||
shape = image_pred.shape
|
||||
# non_zero_idxs = np.nonzero(image_pred)
|
||||
# image_pred = image_pred[non_zero_idxs]
|
||||
temp = image_pred
|
||||
sum_t = np.sum(temp, axis=1)
|
||||
non_zero_idx = sum_t != 0
|
||||
image_pred = image_pred[non_zero_idx, :]
|
||||
image_pred = image_pred.reshape(-1, shape[-1])
|
||||
|
||||
bbox_attrs = image_pred[:, :5]
|
||||
classes = image_pred[:, 5:]
|
||||
classes = np.argmax(classes, axis=-1)
|
||||
|
||||
unique_classes = list(set(classes.reshape(-1)))
|
||||
|
||||
for cls in unique_classes:
|
||||
cls_mask = classes == cls
|
||||
cls_boxes = bbox_attrs[np.nonzero(cls_mask)]
|
||||
cls_boxes = cls_boxes[cls_boxes[:, -1].argsort()[::-1]]
|
||||
cls_scores = cls_boxes[:, -1]
|
||||
cls_boxes = cls_boxes[:, :-1]
|
||||
|
||||
while len(cls_boxes) > 0:
|
||||
box = cls_boxes[0]
|
||||
score = cls_scores[0]
|
||||
if cls not in result:
|
||||
result[cls] = []
|
||||
result[cls].append((box, score))
|
||||
cls_boxes = cls_boxes[1:]
|
||||
cls_scores = cls_scores[1:]
|
||||
ious = np.array([_iou(box, x) for x in cls_boxes])
|
||||
iou_mask = ious < iou_threshold
|
||||
cls_boxes = cls_boxes[np.nonzero(iou_mask)]
|
||||
cls_scores = cls_scores[np.nonzero(iou_mask)]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def load_coco_names(file_name):
|
||||
names = {}
|
||||
with open(file_name) as f:
|
||||
for id, name in enumerate(f):
|
||||
names[id] = name
|
||||
return names
|
||||
|
||||
|
||||
def draw_boxes(boxes, img, cls_names, detection_size, is_letter_box_image):
|
||||
draw = ImageDraw.Draw(img)
|
||||
|
||||
for cls, bboxs in boxes.items():
|
||||
color = tuple(np.random.randint(0, 256, 3))
|
||||
for box, score in bboxs:
|
||||
box = convert_to_original_size(box, np.array(detection_size),
|
||||
np.array(img.size),
|
||||
is_letter_box_image)
|
||||
draw.rectangle(box, outline=color)
|
||||
draw.text(box[:2], '{} {:.2f}%'.format(
|
||||
cls_names[cls], score * 100), fill=color)
|
||||
|
||||
|
||||
def convert_to_original_size(box, size, original_size, is_letter_box_image):
|
||||
if is_letter_box_image:
|
||||
box = box.reshape(2, 2)
|
||||
box[0, :] = letter_box_pos_to_original_pos(box[0, :], size, original_size)
|
||||
box[1, :] = letter_box_pos_to_original_pos(box[1, :], size, original_size)
|
||||
else:
|
||||
ratio = original_size / size
|
||||
box = box.reshape(2, 2) * ratio
|
||||
return list(box.reshape(-1))
|
||||
|
||||
|
||||
def letter_box_image(image: Image.Image, output_height: int, output_width: int, fill_value)-> np.ndarray:
|
||||
"""
|
||||
Fit image with final image with output_width and output_height.
|
||||
:param image: PILLOW Image object.
|
||||
:param output_height: width of the final image.
|
||||
:param output_width: height of the final image.
|
||||
:param fill_value: fill value for empty area. Can be uint8 or np.ndarray
|
||||
:return: numpy image fit within letterbox. dtype=uint8, shape=(output_height, output_width)
|
||||
"""
|
||||
|
||||
height_ratio = float(output_height)/image.size[1]
|
||||
width_ratio = float(output_width)/image.size[0]
|
||||
fit_ratio = min(width_ratio, height_ratio)
|
||||
fit_height = int(image.size[1] * fit_ratio)
|
||||
fit_width = int(image.size[0] * fit_ratio)
|
||||
fit_image = np.asarray(image.resize((fit_width, fit_height), resample=Image.BILINEAR))
|
||||
|
||||
if isinstance(fill_value, int):
|
||||
fill_value = np.full(fit_image.shape[2], fill_value, fit_image.dtype)
|
||||
|
||||
to_return = np.tile(fill_value, (output_height, output_width, 1))
|
||||
pad_top = int(0.5 * (output_height - fit_height))
|
||||
pad_left = int(0.5 * (output_width - fit_width))
|
||||
to_return[pad_top:pad_top+fit_height, pad_left:pad_left+fit_width] = fit_image
|
||||
return to_return
|
||||
|
||||
|
||||
def letter_box_pos_to_original_pos(letter_pos, current_size, ori_image_size)-> np.ndarray:
|
||||
"""
|
||||
Parameters should have same shape and dimension space. (Width, Height) or (Height, Width)
|
||||
:param letter_pos: The current position within letterbox image including fill value area.
|
||||
:param current_size: The size of whole image including fill value area.
|
||||
:param ori_image_size: The size of image before being letter boxed.
|
||||
:return:
|
||||
"""
|
||||
letter_pos = np.asarray(letter_pos, dtype=np.float)
|
||||
current_size = np.asarray(current_size, dtype=np.float)
|
||||
ori_image_size = np.asarray(ori_image_size, dtype=np.float)
|
||||
final_ratio = min(current_size[0]/ori_image_size[0], current_size[1]/ori_image_size[1])
|
||||
pad = 0.5 * (current_size - final_ratio * ori_image_size)
|
||||
pad = pad.astype(np.int32)
|
||||
to_return_pos = (letter_pos - pad) / final_ratio
|
||||
return to_return_pos
|
||||
292
tfyolov3/yolo_v3.py
Normal file
292
tfyolov3/yolo_v3.py
Normal file
@@ -0,0 +1,292 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
_BATCH_NORM_DECAY = 0.9
|
||||
_BATCH_NORM_EPSILON = 1e-05
|
||||
_LEAKY_RELU = 0.1
|
||||
|
||||
_ANCHORS = [(10, 13), (16, 30), (33, 23),
|
||||
(30, 61), (62, 45), (59, 119),
|
||||
(116, 90), (156, 198), (373, 326)]
|
||||
|
||||
|
||||
def darknet53(inputs):
|
||||
"""
|
||||
Builds Darknet-53 model.
|
||||
"""
|
||||
inputs = _conv2d_fixed_padding(inputs, 32, 3)
|
||||
inputs = _conv2d_fixed_padding(inputs, 64, 3, strides=2)
|
||||
inputs = _darknet53_block(inputs, 32)
|
||||
inputs = _conv2d_fixed_padding(inputs, 128, 3, strides=2)
|
||||
|
||||
for i in range(2):
|
||||
inputs = _darknet53_block(inputs, 64)
|
||||
|
||||
inputs = _conv2d_fixed_padding(inputs, 256, 3, strides=2)
|
||||
|
||||
for i in range(8):
|
||||
inputs = _darknet53_block(inputs, 128)
|
||||
|
||||
route_1 = inputs
|
||||
inputs = _conv2d_fixed_padding(inputs, 512, 3, strides=2)
|
||||
|
||||
for i in range(8):
|
||||
inputs = _darknet53_block(inputs, 256)
|
||||
|
||||
route_2 = inputs
|
||||
inputs = _conv2d_fixed_padding(inputs, 1024, 3, strides=2)
|
||||
|
||||
for i in range(4):
|
||||
inputs = _darknet53_block(inputs, 512)
|
||||
|
||||
return route_1, route_2, inputs
|
||||
|
||||
|
||||
def _conv2d_fixed_padding(inputs, filters, kernel_size, strides=1):
|
||||
if strides > 1:
|
||||
inputs = _fixed_padding(inputs, kernel_size)
|
||||
inputs = slim.conv2d(inputs, filters, kernel_size, stride=strides,
|
||||
padding=('SAME' if strides == 1 else 'VALID'))
|
||||
return inputs
|
||||
|
||||
|
||||
def _darknet53_block(inputs, filters):
|
||||
shortcut = inputs
|
||||
inputs = _conv2d_fixed_padding(inputs, filters, 1)
|
||||
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
|
||||
|
||||
inputs = inputs + shortcut
|
||||
return inputs
|
||||
|
||||
|
||||
def _spp_block(inputs, data_format='NCHW'):
|
||||
return tf.concat([slim.max_pool2d(inputs, 13, 1, 'SAME'),
|
||||
slim.max_pool2d(inputs, 9, 1, 'SAME'),
|
||||
slim.max_pool2d(inputs, 5, 1, 'SAME'),
|
||||
inputs],
|
||||
axis=1 if data_format == 'NCHW' else 3)
|
||||
|
||||
|
||||
@tf.contrib.framework.add_arg_scope
|
||||
def _fixed_padding(inputs, kernel_size, *args, mode='CONSTANT', **kwargs):
|
||||
"""
|
||||
Pads the input along the spatial dimensions independently of input size.
|
||||
|
||||
Args:
|
||||
inputs: A tensor of size [batch, channels, height_in, width_in] or
|
||||
[batch, height_in, width_in, channels] depending on data_format.
|
||||
kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
|
||||
Should be a positive integer.
|
||||
data_format: The input format ('NHWC' or 'NCHW').
|
||||
mode: The mode for tf.pad.
|
||||
|
||||
Returns:
|
||||
A tensor with the same format as the input with the data either intact
|
||||
(if kernel_size == 1) or padded (if kernel_size > 1).
|
||||
"""
|
||||
pad_total = kernel_size - 1
|
||||
pad_beg = pad_total // 2
|
||||
pad_end = pad_total - pad_beg
|
||||
|
||||
if kwargs['data_format'] == 'NCHW':
|
||||
padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
|
||||
[pad_beg, pad_end],
|
||||
[pad_beg, pad_end]],
|
||||
mode=mode)
|
||||
else:
|
||||
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
|
||||
[pad_beg, pad_end], [0, 0]], mode=mode)
|
||||
return padded_inputs
|
||||
|
||||
|
||||
def _yolo_block(inputs, filters, data_format='NCHW', with_spp=False):
|
||||
inputs = _conv2d_fixed_padding(inputs, filters, 1)
|
||||
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
|
||||
inputs = _conv2d_fixed_padding(inputs, filters, 1)
|
||||
|
||||
if with_spp:
|
||||
inputs = _spp_block(inputs, data_format)
|
||||
inputs = _conv2d_fixed_padding(inputs, filters, 1)
|
||||
|
||||
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
|
||||
inputs = _conv2d_fixed_padding(inputs, filters, 1)
|
||||
route = inputs
|
||||
inputs = _conv2d_fixed_padding(inputs, filters * 2, 3)
|
||||
return route, inputs
|
||||
|
||||
|
||||
def _get_size(shape, data_format):
|
||||
if len(shape) == 4:
|
||||
shape = shape[1:]
|
||||
return shape[1:3] if data_format == 'NCHW' else shape[0:2]
|
||||
|
||||
|
||||
def _detection_layer(inputs, num_classes, anchors, img_size, data_format):
|
||||
num_anchors = len(anchors)
|
||||
predictions = slim.conv2d(inputs, num_anchors * (5 + num_classes), 1,
|
||||
stride=1, normalizer_fn=None,
|
||||
activation_fn=None,
|
||||
biases_initializer=tf.zeros_initializer())
|
||||
|
||||
shape = predictions.get_shape().as_list()
|
||||
grid_size = _get_size(shape, data_format)
|
||||
dim = grid_size[0] * grid_size[1]
|
||||
bbox_attrs = 5 + num_classes
|
||||
|
||||
if data_format == 'NCHW':
|
||||
predictions = tf.reshape(
|
||||
predictions, [-1, num_anchors * bbox_attrs, dim])
|
||||
predictions = tf.transpose(predictions, [0, 2, 1])
|
||||
|
||||
predictions = tf.reshape(predictions, [-1, num_anchors * dim, bbox_attrs])
|
||||
|
||||
stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
|
||||
|
||||
anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
|
||||
|
||||
box_centers, box_sizes, confidence, classes = tf.split(
|
||||
predictions, [2, 2, 1, num_classes], axis=-1)
|
||||
|
||||
box_centers = tf.nn.sigmoid(box_centers)
|
||||
confidence = tf.nn.sigmoid(confidence)
|
||||
|
||||
grid_x = tf.range(grid_size[0], dtype=tf.float32)
|
||||
grid_y = tf.range(grid_size[1], dtype=tf.float32)
|
||||
a, b = tf.meshgrid(grid_x, grid_y)
|
||||
|
||||
x_offset = tf.reshape(a, (-1, 1))
|
||||
y_offset = tf.reshape(b, (-1, 1))
|
||||
|
||||
x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
|
||||
x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
|
||||
|
||||
box_centers = box_centers + x_y_offset
|
||||
box_centers = box_centers * stride
|
||||
|
||||
anchors = tf.tile(anchors, [dim, 1])
|
||||
box_sizes = tf.exp(box_sizes) * anchors
|
||||
box_sizes = box_sizes * stride
|
||||
|
||||
detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
|
||||
|
||||
classes = tf.nn.sigmoid(classes)
|
||||
predictions = tf.concat([detections, classes], axis=-1)
|
||||
return predictions
|
||||
|
||||
|
||||
def _upsample(inputs, out_shape, data_format='NCHW'):
|
||||
# tf.image.resize_nearest_neighbor accepts input in format NHWC
|
||||
if data_format == 'NCHW':
|
||||
inputs = tf.transpose(inputs, [0, 2, 3, 1])
|
||||
|
||||
if data_format == 'NCHW':
|
||||
new_height = out_shape[3]
|
||||
new_width = out_shape[2]
|
||||
else:
|
||||
new_height = out_shape[2]
|
||||
new_width = out_shape[1]
|
||||
|
||||
inputs = tf.image.resize_nearest_neighbor(inputs, (new_height, new_width))
|
||||
|
||||
# back to NCHW if needed
|
||||
if data_format == 'NCHW':
|
||||
inputs = tf.transpose(inputs, [0, 3, 1, 2])
|
||||
|
||||
inputs = tf.identity(inputs, name='upsampled')
|
||||
return inputs
|
||||
|
||||
|
||||
def yolo_v3(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False, with_spp=False):
|
||||
"""
|
||||
Creates YOLO v3 model.
|
||||
|
||||
:param inputs: a 4-D tensor of size [batch_size, height, width, channels].
|
||||
Dimension batch_size may be undefined. The channel order is RGB.
|
||||
:param num_classes: number of predicted classes.
|
||||
:param is_training: whether is training or not.
|
||||
:param data_format: data format NCHW or NHWC.
|
||||
:param reuse: whether or not the network and its variables should be reused.
|
||||
:param with_spp: whether or not is using spp layer.
|
||||
:return:
|
||||
"""
|
||||
# it will be needed later on
|
||||
img_size = inputs.get_shape().as_list()[1:3]
|
||||
|
||||
# transpose the inputs to NCHW
|
||||
if data_format == 'NCHW':
|
||||
inputs = tf.transpose(inputs, [0, 3, 1, 2])
|
||||
|
||||
# normalize values to range [0..1]
|
||||
inputs = inputs / 255
|
||||
|
||||
# set batch norm params
|
||||
batch_norm_params = {
|
||||
'decay': _BATCH_NORM_DECAY,
|
||||
'epsilon': _BATCH_NORM_EPSILON,
|
||||
'scale': True,
|
||||
'is_training': is_training,
|
||||
'fused': None, # Use fused batch norm if possible.
|
||||
}
|
||||
|
||||
# Set activation_fn and parameters for conv2d, batch_norm.
|
||||
with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], data_format=data_format, reuse=reuse):
|
||||
with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params,
|
||||
biases_initializer=None,
|
||||
activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
|
||||
with tf.variable_scope('darknet-53'):
|
||||
route_1, route_2, inputs = darknet53(inputs)
|
||||
|
||||
with tf.variable_scope('yolo-v3'):
|
||||
route, inputs = _yolo_block(inputs, 512, data_format, with_spp)
|
||||
|
||||
detect_1 = _detection_layer(
|
||||
inputs, num_classes, _ANCHORS[6:9], img_size, data_format)
|
||||
detect_1 = tf.identity(detect_1, name='detect_1')
|
||||
|
||||
inputs = _conv2d_fixed_padding(route, 256, 1)
|
||||
upsample_size = route_2.get_shape().as_list()
|
||||
inputs = _upsample(inputs, upsample_size, data_format)
|
||||
inputs = tf.concat([inputs, route_2],
|
||||
axis=1 if data_format == 'NCHW' else 3)
|
||||
|
||||
route, inputs = _yolo_block(inputs, 256)
|
||||
|
||||
detect_2 = _detection_layer(
|
||||
inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
|
||||
detect_2 = tf.identity(detect_2, name='detect_2')
|
||||
|
||||
inputs = _conv2d_fixed_padding(route, 128, 1)
|
||||
upsample_size = route_1.get_shape().as_list()
|
||||
inputs = _upsample(inputs, upsample_size, data_format)
|
||||
inputs = tf.concat([inputs, route_1],
|
||||
axis=1 if data_format == 'NCHW' else 3)
|
||||
|
||||
_, inputs = _yolo_block(inputs, 128)
|
||||
|
||||
detect_3 = _detection_layer(
|
||||
inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
|
||||
detect_3 = tf.identity(detect_3, name='detect_3')
|
||||
|
||||
detections = tf.concat([detect_1, detect_2, detect_3], axis=1)
|
||||
detections = tf.identity(detections, name='detections')
|
||||
return detections
|
||||
|
||||
|
||||
def yolo_v3_spp(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
|
||||
"""
|
||||
Creates YOLO v3 with SPP model.
|
||||
|
||||
:param inputs: a 4-D tensor of size [batch_size, height, width, channels].
|
||||
Dimension batch_size may be undefined. The channel order is RGB.
|
||||
:param num_classes: number of predicted classes.
|
||||
:param is_training: whether is training or not.
|
||||
:param data_format: data format NCHW or NHWC.
|
||||
:param reuse: whether or not the network and its variables should be reused.
|
||||
:return:
|
||||
"""
|
||||
return yolo_v3(inputs, num_classes, is_training=is_training, data_format=data_format, reuse=reuse, with_spp=True)
|
||||
100
tfyolov3/yolo_v3_tiny.py
Normal file
100
tfyolov3/yolo_v3_tiny.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from yolo_v3 import _conv2d_fixed_padding, _fixed_padding, _get_size, \
|
||||
_detection_layer, _upsample
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
_BATCH_NORM_DECAY = 0.9
|
||||
_BATCH_NORM_EPSILON = 1e-05
|
||||
_LEAKY_RELU = 0.1
|
||||
|
||||
_ANCHORS = [(10, 14), (23, 27), (37, 58),
|
||||
(81, 82), (135, 169), (344, 319)]
|
||||
|
||||
|
||||
def yolo_v3_tiny(inputs, num_classes, is_training=False, data_format='NCHW', reuse=False):
|
||||
"""
|
||||
Creates YOLO v3 tiny model.
|
||||
|
||||
:param inputs: a 4-D tensor of size [batch_size, height, width, channels].
|
||||
Dimension batch_size may be undefined. The channel order is RGB.
|
||||
:param num_classes: number of predicted classes.
|
||||
:param is_training: whether is training or not.
|
||||
:param data_format: data format NCHW or NHWC.
|
||||
:param reuse: whether or not the network and its variables should be reused.
|
||||
:return:
|
||||
"""
|
||||
# it will be needed later on
|
||||
img_size = inputs.get_shape().as_list()[1:3]
|
||||
|
||||
# transpose the inputs to NCHW
|
||||
if data_format == 'NCHW':
|
||||
inputs = tf.transpose(inputs, [0, 3, 1, 2])
|
||||
|
||||
# normalize values to range [0..1]
|
||||
inputs = inputs / 255
|
||||
|
||||
# set batch norm params
|
||||
batch_norm_params = {
|
||||
'decay': _BATCH_NORM_DECAY,
|
||||
'epsilon': _BATCH_NORM_EPSILON,
|
||||
'scale': True,
|
||||
'is_training': is_training,
|
||||
'fused': None, # Use fused batch norm if possible.
|
||||
}
|
||||
|
||||
# Set activation_fn and parameters for conv2d, batch_norm.
|
||||
with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding, slim.max_pool2d], data_format=data_format):
|
||||
with slim.arg_scope([slim.conv2d, slim.batch_norm, _fixed_padding], reuse=reuse):
|
||||
with slim.arg_scope([slim.conv2d],
|
||||
normalizer_fn=slim.batch_norm,
|
||||
normalizer_params=batch_norm_params,
|
||||
biases_initializer=None,
|
||||
activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=_LEAKY_RELU)):
|
||||
|
||||
with tf.variable_scope('yolo-v3-tiny'):
|
||||
for i in range(6):
|
||||
inputs = _conv2d_fixed_padding(
|
||||
inputs, 16 * pow(2, i), 3)
|
||||
|
||||
if i == 4:
|
||||
route_1 = inputs
|
||||
|
||||
if i == 5:
|
||||
inputs = slim.max_pool2d(
|
||||
inputs, [2, 2], stride=1, padding="SAME", scope='pool2')
|
||||
else:
|
||||
inputs = slim.max_pool2d(
|
||||
inputs, [2, 2], scope='pool2')
|
||||
|
||||
inputs = _conv2d_fixed_padding(inputs, 1024, 3)
|
||||
inputs = _conv2d_fixed_padding(inputs, 256, 1)
|
||||
route_2 = inputs
|
||||
|
||||
inputs = _conv2d_fixed_padding(inputs, 512, 3)
|
||||
# inputs = _conv2d_fixed_padding(inputs, 255, 1)
|
||||
|
||||
detect_1 = _detection_layer(
|
||||
inputs, num_classes, _ANCHORS[3:6], img_size, data_format)
|
||||
detect_1 = tf.identity(detect_1, name='detect_1')
|
||||
|
||||
inputs = _conv2d_fixed_padding(route_2, 128, 1)
|
||||
upsample_size = route_1.get_shape().as_list()
|
||||
inputs = _upsample(inputs, upsample_size, data_format)
|
||||
|
||||
inputs = tf.concat([inputs, route_1],
|
||||
axis=1 if data_format == 'NCHW' else 3)
|
||||
|
||||
inputs = _conv2d_fixed_padding(inputs, 256, 3)
|
||||
# inputs = _conv2d_fixed_padding(inputs, 255, 1)
|
||||
|
||||
detect_2 = _detection_layer(
|
||||
inputs, num_classes, _ANCHORS[0:3], img_size, data_format)
|
||||
detect_2 = tf.identity(detect_2, name='detect_2')
|
||||
|
||||
detections = tf.concat([detect_1, detect_2], axis=1)
|
||||
detections = tf.identity(detections, name='detections')
|
||||
return detections
|
||||
Reference in New Issue
Block a user