Created
November 20, 2018 00:24
-
-
Save cysin/f8c89d6eb1f66fb50c3ba3447b3f4466 to your computer and use it in GitHub Desktop.
mbn.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import sys,os | |
import argparse | |
caffe_root = '/root/work/caffe/' | |
sys.path.insert(0, caffe_root + 'python') | |
os.environ['GLOG_minloglevel'] = '3' | |
import caffe | |
#import google.protobuf as pb | |
import google.protobuf.text_format | |
import pprint as pp | |
def make_parser(): | |
parser = argparse.ArgumentParser(description='Merge bn layer to conv layer, and automatically convert layer names.') | |
parser.add_argument('--tn', type=str, required=True, help='.prototxt with bn') | |
parser.add_argument('--tm', type=str, required=True, help='.caffemodel with bn') | |
parser.add_argument('--dn', type=str, required=True, help='.prototxt to save') | |
parser.add_argument('--dm', type=str, required=True, help='.caffemodel to save') | |
return parser | |
if __name__ == '__main__': | |
parser = make_parser() | |
args = parser.parse_args() | |
# convert net definitions | |
bn_eps = [] | |
with open(args.tn) as f: | |
net = caffe.proto.caffe_pb2.NetParameter() | |
google.protobuf.text_format.Parse(f.read(), net) | |
#pp.pprint(net) | |
num_layers = len(net.layer) | |
idx_remove = [] | |
name_maps = {} | |
for i in range(num_layers): | |
l = net.layer[i] | |
if l.type in ['Convolution', 'DepthwiseConvolution'] and (net.layer[i + 1].type == 'BatchNorm' and net.layer[i + 2].type == 'Scale'): | |
# rename next layer | |
bn_eps.append(net.layer[i + 1].batch_norm_param.eps) | |
l_src = None | |
l_dst = None | |
if i+3 < num_layers and net.layer[i + 2].top != net.layer[i].top: | |
l_src = net.layer[i + 2].top[0] | |
l_dst = net.layer[i].top[0] | |
name_maps[l_src] = l_dst | |
l.convolution_param.bias_term = True | |
l.convolution_param.bias_filler.type = 'constant' | |
l.convolution_param.bias_filler.value = 0 | |
idx_remove.append(net.layer[i + 1]) | |
idx_remove.append(net.layer[i + 2]) | |
#print i | |
#print l.type | |
for j,v in enumerate(l.bottom): | |
if v in name_maps: | |
l.bottom[j] = name_maps[v] | |
for l in idx_remove: | |
net.layer.remove(l) | |
print "Saving new net models to '%s'" % args.dn | |
with open(args.dn, 'w') as f: | |
f.write(str(net)) | |
dst_net = caffe.Net(args.dn, caffe.TEST) | |
net = caffe.Net(args.tn, args.tm, caffe.TEST) | |
num_layers = len(net.layers) | |
i = 0; | |
dst_i = 0; | |
bn_i = 0; | |
while i < num_layers: | |
lname = net._layer_names[i] | |
lblobs = net.layers[i].blobs | |
ltype = net.layers[i].type | |
#print lname | |
if ltype in ['Convolution', 'DepthwiseConvolution'] and (net.layers[i + 1].type == 'BatchNorm' and net.layers[i + 2].type == 'Scale'): | |
conv = net.layers[i + 0].blobs | |
bn = net.layers[i + 1].blobs | |
scale = net.layers[i + 2].blobs | |
#bn_param = net.layers[i + 1].layer_param | |
#print net.LayerParameter | |
''' | |
merge the batchnorm, scale layer weights to the conv layer, to improve the performance | |
var = var + scaleFacotr | |
rstd = 1. / sqrt(var + eps) | |
w = w * rstd * scale | |
b = (b - mean) * rstd * scale + shift | |
''' | |
weight = conv[0].data | |
channels = weight.shape[0] | |
bias = np.zeros(weight.shape[0]) | |
if len(conv) > 1: | |
bias = conv[1].data | |
mean = bn[0].data | |
var = bn[1].data | |
scalef = bn[2].data | |
scales = scale[0].data | |
shift = scale[1].data | |
if scalef != 0: | |
scalef = 1. / scalef | |
mean = mean * scalef | |
var = var * scalef | |
eps = bn_eps[bn_i] | |
#print eps | |
bn_i += 1 | |
rstd = 1 / np.sqrt(var + eps) | |
rstd1 = rstd.reshape((channels,1,1,1)) | |
scales1 = scales.reshape((channels,1,1,1)) | |
weight = weight * rstd1 * scales1 | |
bias = (bias - mean) * rstd * scales + shift | |
dst_conv = dst_net.layers[dst_i].blobs | |
#print dst_net.layers[dst_i].type | |
dst_conv[0].data[...] = weight | |
dst_conv[1].data[...] = bias | |
i += 3 | |
else: | |
l = net.layers[i].blobs | |
for j, w in enumerate(l): | |
dst_net.layers[dst_i].blobs[j].data[...] = w.data | |
i += 1 | |
dst_i += 1 | |
print "Saving new net weights to '%s'" % args.dm | |
dst_net.save(args.dm) | |
#-------------------------------------------------- | |
print "Now testing new model and weights:" | |
caffe.set_mode_gpu() | |
net1 = caffe.Net(args.tn, args.tm, caffe.TEST) | |
net2 = caffe.Net(args.dn, args.dm, caffe.TEST) | |
for i in range(100): | |
input_data = np.random.random_integers(0,255,net1.blobs['data'].data.shape) | |
net1.blobs['data'].data[...] = input_data | |
net2.blobs['data'].data[...] = input_data | |
r1 = net1.forward() | |
r2 = net2.forward() | |
d1 = r1.values()[0] | |
d2 = r2.values()[0] | |
diff = 0 | |
for idx, x in np.ndenumerate(d1): | |
a = d1[idx] | |
b = d2[idx] | |
diff += (a - b) * (a - b) | |
variance = diff / d1.size | |
if variance < 0.0000001: | |
print "iter %d diff variance: %f OK!" % (i, variance) | |
else: | |
print "iter %d diff variance: %f ERR!" % (i, variance) | |
#layer = {} | |
#layer['name'] = net._layer_names[i] | |
#layer['blobs'] = net.layers[i].blobs | |
#layer['type'] = net.layers[i].type | |
#net_layers.append(layer) | |
#for i in range(len(net_layers)): | |
#v = net_layers[i] | |
#print "%d name: %s\t blob size: %d\t type: %s" % (i, v['name'], len(v['blobs']), v['type']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment