@@ -26,19 +26,22 @@ def detect_gpu_card_type(cls) -> Optional[GPUCardType]:
2626
2727 @staticmethod
2828 def get_gpu_cards () -> List [GPUCard ]:
29- from qaicrt import Util , QIDList , QDevInfo , QStatus
29+ return list ( QualcommNPUtil . __get_gpu_cards (). values ())
3030
31- cards = []
31+ @staticmethod
32+ def __get_gpu_cards () -> Dict [int , GPUCard ]:
33+ from qaicrt import Util , QIDList , QDevInfo , QStatus
34+ cards = dict ()
3235 util = Util ()
3336 status , card_list = util .getDeviceIds ()
3437 if status .value == 0 :
3538 for card in card_list :
3639 status , card_info = util .getDeviceInfo (card )
3740 if status .value == 0 and card_info .devStatus .value == 1 :
38- cards . append ( QualcommNPUtil .__convert (card_info ) )
39-
41+ gpu_card = QualcommNPUtil .__convert (card_info )
42+ cards [ gpu_card . id ] = gpu_card
4043 else :
41- logging .error ("Qualcomm Card Status not Healthy" )
44+ logging .error ("Qualcomm Cards Status not Healthy" )
4245 return cards
4346
4447 @staticmethod
@@ -58,11 +61,21 @@ def get_available_gpu_card_ids(order: str, limit: int, max_load: float, max_memo
5861
5962 @staticmethod
6063 def get_docker_gpu_device_mapping (gpu_ids : Optional [List [int ]], num_gpus : int = 0 ) -> Optional [Dict ]:
61- if gpu_ids is not None and len (gpu_ids ):
62- return {
63- "devices" : [f"{ QualcommNPUtil .NPU_CARD_PATH } { gpu_id } :{ QualcommNPUtil .NPU_CARD_PATH } { gpu_id } " for gpu_id
64- in gpu_ids ]}
65- return None
64+ if gpu_ids is None or not len (gpu_ids ):
65+ return None
66+
67+ devices = []
68+ gpu_cards = QualcommNPUtil .__get_gpu_cards ()
69+
70+ for gpu_id in gpu_ids :
71+ if not (gpu_id in gpu_cards and gpu_cards [gpu_id ].device_path ):
72+ logging .error ("Failed to get gpu device mapping for docker" )
73+ break
74+ else :
75+ device_path = gpu_cards [gpu_id ].device_path
76+ devices .append (f"{ device_path } :{ device_path } " )
77+
78+ return {"devices" : devices } if len (devices ) == len (gpu_ids ) else None
6679
6780 @staticmethod
6881 def get_docker_gpu_ids_by_container_name (container_name : str , docker_client : DockerClient ) -> List [int ]:
@@ -87,7 +100,8 @@ def __convert(npu) -> GPUCard:
87100 load = (nsp_total - nsp_free ) / nsp_total
88101
89102 return GPUCard (
90- id = npu .qid ,
103+ id = npu .mhiId ,
104+ device_path = npu .name ,
91105 name = npu .pciInfo .devicename ,
92106 driver = npu .devData .fwQCImageVersionString ,
93107 serial = npu .devData .serial ,
0 commit comments