tensorflow/serving源码编译与镜像推送

一 准备工作

  • Centos 5.4

    • amd架构
    • 8核16G
  • bazel:4.2.2

  • go:1.16.12

  • docker:20.10.12

  • git:2.19.1

# 克隆
git clone https://github.com/tensorflow/serving

# 切换分支
git chekout r1.15

二 编译自定义的tensorflow_model_server

  • 进入源码目录:cd serving
  • 修改文件:http请求处理添加开始日志和结束日志
  • vi tensorflow_serving/model_servers/http_rest_api_handler.cc
// 主要修改该方法
Status HttpRestApiHandler::ProcessRequest(
    const absl::string_view http_method, const absl::string_view request_path,
    const absl::string_view request_body,
    std::vector<std::pair<string, string>>* headers, string* output) {
  headers->clear();
  output->clear();
  AddHeaders(headers);
  string model_name;
  string model_version_str;
  string method;
  string model_subresource;
  Status status = errors::InvalidArgument("Malformed request: ", http_method,
                                          " ", request_path);
  // 添加开始日志
  LOG(INFO) << "<= Begin Access " << string(request_path) << " Model Name: " << model_name << " ...";
  if (http_method == "POST" &&
      RE2::FullMatch(string(request_path), prediction_api_regex_, &model_name,
                     &model_version_str, &method)) {
    absl::optional<int64> model_version;
    if (!model_version_str.empty()) {
      int64 version;
      if (!absl::SimpleAtoi(model_version_str, &version)) {
        return errors::InvalidArgument(
            "Failed to convert version: ", model_version_str, " to numeric.");
      }
      model_version = version;
    }
    if (method == "classify") {
      status = ProcessClassifyRequest(model_name, model_version, request_body,
                                      output);
    } else if (method == "regress") {
      status = ProcessRegressRequest(model_name, model_version, request_body,
                                     output);
    } else if (method == "predict") {
      status = ProcessPredictRequest(model_name, model_version, request_body,
                                     output);
    }
  } else if (http_method == "GET" &&
             RE2::FullMatch(string(request_path), modelstatus_api_regex_,
                            &model_name, &model_version_str,
                            &model_subresource)) {
    if (!model_subresource.empty() && model_subresource == "metadata") {
      status =
          ProcessModelMetadataRequest(model_name, model_version_str, output);
    } else {
      status = ProcessModelStatusRequest(model_name, model_version_str, output);
    }
  }
  
  // 添加结束日志
  LOG(INFO) << "=> End Access metadata Api Model Name: " << model_name << " ...";
  if (!status.ok()) {
    FillJsonErrorMsg(status.error_message(), output);
  }
  return status;
}
  • 编译与运行
# 编译前测试,可跳过
tools/run_in_docker.sh -d tensorflow/serving:1.15.0-devel bazel test -c opt tensorflow_serving/...

# 执行编译操作
tools/run_in_docker.sh -d tensorflow/serving:1.15.0-devel bazel build tensorflow_serving/...

# 第一次编译耗时2h左右,后续反复编译30s左右。

# 编译中出现The repository '@zlib' could not be resolved错误,请修改对应目录汇总的文件 
# 文件地址:/serving/.cache/_bazel_root/2d09b9f1da03dc789cd67b59a4571350/external/com_google_protobuf/BUILD 
# 将ZLIB_DEPS = ["@zlib//:zlib"] 修改为 ZLIB_DEPS = ["@zlib_archive//:zlib"]

# 删除工作目录中的tensorflow_model_server工具
rm /home/workspace/tensorflow_model_server && cp bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server /home/workspace/

# 运行tensorflow serving
/home/workspace/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=half_plus_two --model_base_path=/home/workspace/half_plus_two

# 执行测试
curl -d '{"instances": [1.0, 2.0, 5.0]}' -X POST http://localhost:8501/v1/models/half_plus_two:predict

三 编译自定义tensorflow/serving镜像

1 Dockerfile

FROM ubuntu:18.04

LABEL maintainer="littleevil.lin@gmail.com"

RUN apt-get update && apt-get install -y --no-install-recommends \
        ca-certificates \
        && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

# Install TF Serving pkg
COPY ./tensorflow_model_server /usr/bin/tensorflow_model_server

# Expose ports
# gRPC
EXPOSE 8500

# REST
EXPOSE 8501

# Set where models should be stored in the container
ENV MODEL_BASE_PATH=/models
RUN mkdir -p ${MODEL_BASE_PATH}

# The only required piece is the model name in order to differentiate endpoints
ENV MODEL_NAME=model

# Create a script that runs the model server so we can use environment variables
# while also passing in arguments from the docker command line
RUN echo '#!/bin/bash \n\n\
tensorflow_model_server --port=8500 --rest_api_port=8501 \
--model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} \
"$@"' > /usr/bin/tf_serving_entrypoint.sh \
&& chmod +x /usr/bin/tf_serving_entrypoint.sh

ENTRYPOINT ["/usr/bin/tf_serving_entrypoint.sh"]

2 Docker编译与推送

# 登录
docker login

# 进入执行目录
cd /home/workspace

# 编译
docker build --network=host -t littleevil.lin/tfs:1.15.0 ./

# 运行
docker run -t --rm -p 8501:8501 \
    -v "/home/workspace/half_plus_two:/models/half_plus_two" \
    -e MODEL_NAME=half_plus_two \
    littleevil.lin/tfs:1.15.0 &

# 推送
docker push littleevil.lin/tfs:1.15.0