-
-
Save masroorhasan/0e73a7fc7bb2558c65933338d8194130 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
from __future__ import print_function | |
import argparse | |
import numpy as np | |
import time | |
tt = time.time() | |
import cv2 | |
from grpc.beta import implementations | |
from protos.tensorflow.core.framework import tensor_pb2 | |
from protos.tensorflow.core.framework import tensor_shape_pb2 | |
from protos.tensorflow.core.framework import types_pb2 | |
from protos.tensorflow_serving.apis import predict_pb2 | |
from protos.tensorflow_serving.apis import prediction_service_pb2 | |
parser = argparse.ArgumentParser(description='incetion grpc client flags.') | |
parser.add_argument('--host', default='0.0.0.0', help='inception serving host') | |
parser.add_argument('--port', default='9000', help='inception serving port') | |
parser.add_argument('--image', default='', help='path to JPEG image file') | |
FLAGS = parser.parse_args() | |
def main(): | |
# create prediction service client stub | |
channel = implementations.insecure_channel(FLAGS.host, int(FLAGS.port)) | |
stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) | |
# create request | |
request = predict_pb2.PredictRequest() | |
request.model_spec.name = 'resnet' | |
request.model_spec.signature_name = 'serving_default' | |
# read image into numpy array | |
img = cv2.imread(FLAGS.image).astype(np.float32) | |
# convert to tensor proto and make request | |
# shape is in NHWC (num_samples x height x width x channels) format | |
dims = [tensor_shape_pb2.TensorShapeProto.Dim(size=dim) for dim in [1]+list(img.shape)] | |
tensor = tensor_pb2.TensorProto( | |
dtype=types_pb2.DT_FLOAT, | |
tensor_shape=tensor_shape_pb2.TensorShapeProto(dim=dims), | |
float_val=list(img.reshape(-1))) | |
request.inputs['input'].CopyFrom(tensor) | |
resp = stub.Predict(request, 30.0) | |
print('total time: {}s'.format(time.time() - tt)) | |
if __name__ == '__main__': | |
main() |
another note- even though I got the protobuf import method working- I'm seeing no performance gains- I get the same accuracy at a slight decrease in speed. If you have any thoughts as to why that may be, I'm all ears!
I figured this out- seems simple in hindsight, but I am measuring inference time only. This code measures time from import till inference- but initial import time is really not that significant to me (and in most applications). It's a little misleading to claim that you've sped up tensorflow serving performance using this method when in fact it only speeds up the initial import time. Nonetheless, you've put out an article for free with code for others to benefit from, so I can't be upset- but I think it would be a good idea to clarify that this doesn't decrease inference time at all (in my tests I actually saw a slight increase in inference time- both in my application and in an exact replication of your code). It doesn't seem like there's any latency reduction by switching out tf.contrib.util.make_tensor_proto
calls.
AttributeError: 'grpc._cython.cygrpc.Channel' object has no attribute 'unary_unary'
What should I do?
@kr-ish are you sure that you didn't get any performance improvements during inference time?
If no, I don't want to get into the hassle of maintaining proto files myself.
@kr-ish are you sure that you didn't get any performance improvements during inference time?
If no, I don't want to get into the hassle of maintaining proto files myself.
Yes, I didn't see any inference time improvements
Thanks for responding
Anyone coming back here. I can also confirm what @kr-ish mentioned. I am not seeing any major improvement implementing this. You should only use it if you want to reduce the whole size overall.
Anyone coming back here. I can also confirm what @kr-ish mentioned. I am not seeing any major improvement implementing this. You should only use it if you want to reduce the whole size overall.
Me too.
I've only achieved some performance gains with the compression configuration
just like this
result = stub.Predict(
request,
timeout=1, # timeout (second)
compression=1, # gzip 2, deflate 1, none 0
)
hey Masroor- thanks for posting this! just wanted to note that I had to change the imports in some of the protos (for example
import "tensorflow/core/framework/tensor_shape.proto";
intensor.proto
toimport "protos/tensorflow/core/framework/tensor_shape.proto";
) to get this to work with the structure you outlined in your article. perhaps I'm doing something wrong here- do let me know if you know how I can make this work without making these changes