|
| 1 | +import fedml |
| 2 | + |
1 | 3 | import logging |
2 | 4 | import os |
3 | | -import pickle |
4 | | -import platform |
5 | 5 | import shutil |
6 | 6 | import time |
7 | 7 | import traceback |
8 | 8 | import yaml |
9 | 9 | import datetime |
| 10 | +import docker |
10 | 11 |
|
11 | 12 | import requests |
12 | 13 | import torch |
|
15 | 16 |
|
16 | 17 | import collections.abc |
17 | 18 |
|
18 | | -import fedml |
19 | 19 | from fedml.computing.scheduler.comm_utils import sys_utils, security_utils |
20 | | -from fedml.computing.scheduler.comm_utils.container_utils import ContainerUtils |
21 | 20 | from fedml.computing.scheduler.comm_utils.hardware_utils import HardwareUtil |
22 | 21 | from fedml.computing.scheduler.comm_utils.job_utils import JobRunnerUtils |
23 | | - |
24 | | -for type_name in collections.abc.__all__: |
25 | | - setattr(collections, type_name, getattr(collections.abc, type_name)) |
26 | | - |
27 | 22 | from fedml.computing.scheduler.comm_utils.constants import SchedulerConstants |
28 | 23 | from fedml.computing.scheduler.model_scheduler.device_client_constants import ClientConstants |
29 | | -import io |
30 | | - |
31 | | -import docker |
32 | | -from ..scheduler_core.compute_cache_manager import ComputeCacheManager |
| 24 | +from fedml.computing.scheduler.model_scheduler.device_model_cache import FedMLModelCache |
33 | 25 | from ..scheduler_core.compute_utils import ComputeUtils |
34 | 26 | from ..comm_utils.container_utils import ContainerUtils |
35 | | - |
36 | 27 | from .device_http_inference_protocol import FedMLHttpInference |
37 | 28 |
|
38 | | -from fedml.computing.scheduler.model_scheduler.device_model_cache import FedMLModelCache |
| 29 | +for type_name in collections.abc.__all__: |
| 30 | + setattr(collections, type_name, getattr(collections.abc, type_name)) |
39 | 31 |
|
40 | 32 | no_real_gpu_allocation = None |
41 | 33 |
|
@@ -432,8 +424,6 @@ def should_exit_logs(end_point_id, model_id, cmd_type, model_name, inference_eng |
432 | 424 | if cmd_type == ClientConstants.CMD_TYPE_RUN_DEFAULT_SERVER: |
433 | 425 | # TODO: Exited Quickly if the container is Exited or Removed |
434 | 426 | # If the container has exited, return True, means we should exit the logs |
435 | | - # container_name = "{}".format(ClientConstants.FEDML_DEFAULT_SERVER_CONTAINER_NAME_PREFIX) + "__" + \ |
436 | | - # security_utils.get_content_hash(model_name) |
437 | 427 | try: |
438 | 428 | inference_output_url, model_version, model_metadata, model_config = \ |
439 | 429 | get_model_info(model_name, inference_engine, inference_port, infer_host, |
@@ -554,8 +544,6 @@ def log_deployment_result(end_point_id, model_id, cmd_container_name, cmd_type, |
554 | 544 |
|
555 | 545 | def is_client_inference_container_ready(infer_url_host, inference_http_port, inference_model_name, local_infer_url, |
556 | 546 | inference_type="default", model_version="", request_input_example=None): |
557 | | - # logging.info(f"Inference type: {inference_type}, infer_url_host {infer_url_host}, \ |
558 | | - # inference_http_port: {inference_http_port}, local_infer_url {local_infer_url}") |
559 | 547 |
|
560 | 548 | if inference_type == "default": |
561 | 549 | default_client_container_ready_url = "http://{}:{}/ready".format("0.0.0.0", inference_http_port) |
@@ -631,211 +619,5 @@ def run_http_inference_with_curl_request(inference_url, inference_input_list, in |
631 | 619 | inference_type=inference_type, engine_type=engine_type, timeout=timeout) |
632 | 620 |
|
633 | 621 |
|
634 | | -def convert_model_to_onnx( |
635 | | - torch_model, output_path: str, dummy_input_list, input_size: int, input_is_tensor=True |
636 | | -) -> None: |
637 | | - from collections import OrderedDict |
638 | | - import torch |
639 | | - from torch.onnx import TrainingMode |
640 | | - |
641 | | - torch.onnx.export(torch_model, # model being run |
642 | | - dummy_input_list if input_is_tensor else tuple(dummy_input_list), |
643 | | - # model input (or a tuple for multiple inputs) |
644 | | - f=output_path, # where to save the model (can be a file or file-like object) |
645 | | - export_params=True, # store the trained parameter weights inside the model file |
646 | | - opset_version=11, # the ONNX version to export the model to |
647 | | - do_constant_folding=False, # whether to execute constant folding for optimization |
648 | | - input_names=["input1", "input2"], |
649 | | - # the model's input names |
650 | | - output_names=['output'], # the model's output names |
651 | | - training=TrainingMode.EVAL, |
652 | | - verbose=True, |
653 | | - dynamic_axes={"input1": {0: "batch_size"}, |
654 | | - "input2": {0: "batch_size"}, |
655 | | - "output": {0: "batch_size"}} |
656 | | - ) |
657 | | - |
658 | | - |
659 | | -def test_start_triton_server(model_serving_dir): |
660 | | - sudo_prefix = "sudo " |
661 | | - sys_name = platform.system() |
662 | | - if sys_name == "Darwin": |
663 | | - sudo_prefix = "" |
664 | | - gpu_attach_cmd = "" |
665 | | - |
666 | | - triton_server_container_name = "{}".format(ClientConstants.FEDML_TRITON_SERVER_CONTAINER_NAME_PREFIX) |
667 | | - triton_server_cmd = "{}docker stop {}; {}docker rm {}; {}docker run --name {} {} -p{}:8000 " \ |
668 | | - "-p{}:8001 -p{}:8002 " \ |
669 | | - "--shm-size {} " \ |
670 | | - "-v {}:/models {} " \ |
671 | | - "bash -c \"pip install transformers && tritonserver --strict-model-config=false " \ |
672 | | - "--model-control-mode=poll --repository-poll-secs={} " \ |
673 | | - "--model-repository=/models\" ".format(sudo_prefix, triton_server_container_name, |
674 | | - sudo_prefix, triton_server_container_name, |
675 | | - sudo_prefix, triton_server_container_name, |
676 | | - gpu_attach_cmd, |
677 | | - ClientConstants.INFERENCE_HTTP_PORT, |
678 | | - ClientConstants.INFERENCE_GRPC_PORT, |
679 | | - 8002, |
680 | | - "4096m", |
681 | | - model_serving_dir, |
682 | | - ClientConstants.INFERENCE_SERVER_IMAGE, |
683 | | - ClientConstants.FEDML_MODEL_SERVING_REPO_SCAN_INTERVAL) |
684 | | - logging.info("Run triton inference server: {}".format(triton_server_cmd)) |
685 | | - triton_server_process = ClientConstants.exec_console_with_script(triton_server_cmd, |
686 | | - should_capture_stdout=False, |
687 | | - should_capture_stderr=False, |
688 | | - no_sys_out_err=True) |
689 | | - |
690 | | - |
691 | | -def test_convert_pytorch_model_to_onnx(model_net_file, model_bin_file, model_name, model_in_params): |
692 | | - torch_model = torch.jit.load(model_net_file) |
693 | | - with open(model_bin_file, 'rb') as model_pkl_file: |
694 | | - model_state_dict = pickle.load(model_pkl_file) |
695 | | - torch_model.load_state_dict(model_state_dict) |
696 | | - torch_model.eval() |
697 | | - |
698 | | - input_size = model_in_params["input_size"] |
699 | | - input_types = model_in_params["input_types"] |
700 | | - |
701 | | - dummy_input_list = [] |
702 | | - for index, input_i in enumerate(input_size): |
703 | | - if input_types[index] == "int": |
704 | | - this_input = torch.tensor(torch.randint(0, 1, input_i)) |
705 | | - else: |
706 | | - this_input = torch.tensor(torch.zeros(input_i)) |
707 | | - dummy_input_list.append(this_input) |
708 | | - |
709 | | - onnx_model_dir = os.path.join(ClientConstants.get_model_cache_dir(), |
710 | | - ClientConstants.FEDML_CONVERTED_MODEL_DIR_NAME, |
711 | | - model_name, ClientConstants.INFERENCE_MODEL_VERSION) |
712 | | - if not os.path.exists(onnx_model_dir): |
713 | | - os.makedirs(onnx_model_dir, exist_ok=True) |
714 | | - onnx_model_path = os.path.join(onnx_model_dir, "model.onnx") |
715 | | - |
716 | | - convert_model_to_onnx(torch_model, onnx_model_path, dummy_input_list, input_size, |
717 | | - input_is_tensor=True) |
718 | | - |
719 | | - model_serving_dir = os.path.join(ClientConstants.get_model_cache_dir(), |
720 | | - ClientConstants.FEDML_CONVERTED_MODEL_DIR_NAME) |
721 | | - return model_serving_dir |
722 | | - |
723 | | - |
724 | | -def start_gpu_model_load_process(): |
725 | | - from multiprocessing import Process |
726 | | - import time |
727 | | - process = Process(target=load_gpu_model_to_cpu_device) |
728 | | - process.start() |
729 | | - while True: |
730 | | - time.sleep(1) |
731 | | - |
732 | | - |
733 | | -def load_gpu_model_to_cpu_device(): |
734 | | - import pickle |
735 | | - import io |
736 | | - import torch |
737 | | - |
738 | | - class CPU_Unpickler(pickle.Unpickler): |
739 | | - def find_class(self, module, name): |
740 | | - if module == 'torch.storage' and name == '_load_from_bytes': |
741 | | - return lambda b: torch.load(io.BytesIO(b), map_location='cpu') |
742 | | - else: |
743 | | - return super().find_class(module, name) |
744 | | - |
745 | | - model_file = "/home/fedml/.fedml/fedml-client/fedml/models/theta_rec_auc_81_single_label/theta_rec_auc_81_single_label" |
746 | | - with open(model_file, "rb") as model_pkl_file: |
747 | | - if not torch.cuda.is_available(): |
748 | | - model = CPU_Unpickler(model_pkl_file).load() |
749 | | - if model is None: |
750 | | - print("Failed to load gpu model to cpu device") |
751 | | - else: |
752 | | - print("Succeeded to load gpu model to cpu device") |
753 | | - |
754 | | - |
755 | 622 | if __name__ == "__main__": |
756 | | - start_gpu_model_load_process() |
757 | | - |
758 | | - model_serving_dir = test_convert_pytorch_model_to_onnx("./sample-open-training-model-net", |
759 | | - "./sample-open-training-model", |
760 | | - "rec-model", |
761 | | - {"input_size": [[1, 24], [1, 2]], |
762 | | - "input_types": ["int", "float"]}) |
763 | | - |
764 | | - test_start_triton_server(model_serving_dir) |
765 | | - |
766 | | - # input_data = {"model_version": "v0-Sun Feb 05 12:17:16 GMT 2023", |
767 | | - # "model_name": "model_414_45_open-model-test_v0-Sun-Feb-05-12-17-16-GMT-2023", |
768 | | - # # "data": "file:///Users/alexliang/fedml_data/mnist-image.png", |
769 | | - # "data": "https://raw.githubusercontent.com/niyazed/triton-mnist-example/master/images/sample_image.png", |
770 | | - # "end_point_id": 414, "model_id": 45, "token": "a09a18a14c4c4d89a8d5f9515704c073"} |
771 | | - # |
772 | | - # data_list = list() |
773 | | - # data_list.append(input_data["data"]) |
774 | | - # run_http_inference_with_lib_http_api_with_image_data(input_data["model_name"], |
775 | | - # 5001, 1, data_list, "") |
776 | | - # |
777 | | - # |
778 | | - # class LogisticRegression(torch.nn.Module): |
779 | | - # def __init__(self, input_dim, output_dim): |
780 | | - # super(LogisticRegression, self).__init__() |
781 | | - # self.linear = torch.nn.Linear(input_dim, output_dim) |
782 | | - # |
783 | | - # def forward(self, x): |
784 | | - # outputs = torch.sigmoid(self.linear(x)) |
785 | | - # return outputs |
786 | | - # |
787 | | - # |
788 | | - # model = LogisticRegression(28 * 28, 10) |
789 | | - # checkpoint = {'model': model} |
790 | | - # model_net_file = "/Users/alexliang/fedml-client/fedml/models/open-model-test/model-net.pt" |
791 | | - # torch.save(checkpoint, model_net_file) |
792 | | - # |
793 | | - # with open("/Users/alexliang/fedml-client/fedml/models/open-model-test/open-model-test", 'rb') as model_pkl_file: |
794 | | - # model_params = pickle.load(model_pkl_file) |
795 | | - # # torch.save(model_params, "/Users/alexliang/fedml-client/fedml/models/open-model-test/a.pt") |
796 | | - # # model = torch.load("/Users/alexliang/fedml-client/fedml/models/open-model-test/a.pt") |
797 | | - # loaded_checkpoint = torch.load(model_net_file) |
798 | | - # loaded_model = loaded_checkpoint["model"] |
799 | | - # loaded_model.load_state_dict(model_params) |
800 | | - # for parameter in loaded_model.parameters(): |
801 | | - # parameter.requires_grad = False |
802 | | - # loaded_model.eval() |
803 | | - # input_names = {"x": 0} |
804 | | - # convert_model_to_onnx(loaded_model, "/Users/alexliang/fedml-client/fedml/models/open-model-test/a.onnx", |
805 | | - # input_names, 28 * 28) |
806 | | - |
807 | | - # parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
808 | | - # parser.add_argument("--cf", "-c", help="config file") |
809 | | - # parser.add_argument("--role", "-r", type=str, default="client", help="role") |
810 | | - # parser.add_argument("--model_storage_local_path", "-url", type=str, default="/home/ubuntu", |
811 | | - # help="model storage local path") |
812 | | - # parser.add_argument("--inference_model_name", "-n", type=str, default="fedml-model", |
813 | | - # help="inference model name") |
814 | | - # parser.add_argument("--inference_engine", "-engine", type=str, default="ONNX", help="inference engine") |
815 | | - # parser.add_argument("--inference_http_port", "-http", type=int, default=8000, help="inference http port") |
816 | | - # parser.add_argument("--inference_grpc_port", "-gprc", type=int, default=8001, help="inference grpc port") |
817 | | - # parser.add_argument("--inference_metric_port", "-metric", type=int, default=8002, help="inference metric port") |
818 | | - # parser.add_argument("--inference_use_gpu", "-gpu", type=str, default="gpu", help="inference use gpu") |
819 | | - # parser.add_argument("--inference_memory_size", "-mem", type=str, default="256m", help="inference memory size") |
820 | | - # parser.add_argument("--inference_convertor_image", "-convertor", type=str, |
821 | | - # default=ClientConstants.INFERENCE_CONVERTOR_IMAGE, help="inference convertor image") |
822 | | - # parser.add_argument("--inference_server_image", "-server", type=str, |
823 | | - # default=ClientConstants.INFERENCE_SERVER_IMAGE, help="inference server image") |
824 | | - # args = parser.parse_args() |
825 | | - # args.user = args.user |
826 | | - # |
827 | | - # pip_source_dir = os.path.dirname(__file__) |
828 | | - # __running_model_name, __inference_output_url, __model_version, __model_metadata, __model_config = \ |
829 | | - # start_deployment( |
830 | | - # args.model_storage_local_path, |
831 | | - # args.inference_model_name, |
832 | | - # args.inference_engine, |
833 | | - # args.inference_http_port, |
834 | | - # args.inference_grpc_port, |
835 | | - # args.inference_metric_port, |
836 | | - # args.inference_use_gpu, |
837 | | - # args.inference_memory_size, |
838 | | - # args.inference_convertor_image, |
839 | | - # args.inference_server_image) |
840 | | - # print("Model deployment results, running model name: {}, url: {}, model metadata: {}, model config: {}".format( |
841 | | - # __running_model_name, __inference_output_url, __model_metadata, __model_config)) |
| 623 | + pass |
0 commit comments