Skip to content

Commit 6b33065

Browse files
authored
Merge pull request #2155 from FedML-AI/alexleung/dev_v070_for_refactor
[CoreEngine] 1. fixed the issue that the fork method is not support i…
2 parents 343b940 + 28ff0f3 commit 6b33065

6 files changed

Lines changed: 30 additions & 13 deletions

File tree

python/fedml/__init__.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import platform
23

34
import multiprocess as multiprocessing
45
import os
@@ -92,13 +93,7 @@ def init(args=None, check_env=True, should_init_logs=True):
9293
# Windows/Linux/MacOS compatability issues on multi-processing
9394
# https://github.com/pytorch/pytorch/issues/3492
9495
"""
95-
if multiprocessing.get_start_method() != "fork":
96-
# force all platforms (Windows/Linux/macOS) to use the same way (fork) for multiprocessing
97-
multiprocessing.set_start_method("fork", force=True)
98-
99-
# if multiprocessing.get_start_method() != "spawn":
100-
# # force all platforms (Windows/Linux/MacOS) to use the same way (spawn) for multiprocessing
101-
# multiprocessing.set_start_method("spawn", force=True)
96+
_init_multiprocessing()
10297

10398
"""
10499
# https://stackoverflow.com/questions/53014306/error-15-initializing-libiomp5-dylib-but-found-libiomp5-dylib-already-initial
@@ -450,6 +445,21 @@ def _run_distributed():
450445
pass
451446

452447

448+
def _init_multiprocessing():
449+
"""
450+
# Windows/Linux/MacOS compatability issues on multi-processing
451+
# https://github.com/pytorch/pytorch/issues/3492
452+
"""
453+
if platform.system() == "Windows":
454+
if multiprocessing.get_start_method() != "spawn":
455+
# force all platforms (Windows/Linux/macOS) to use the same way (spawn) for multiprocessing
456+
multiprocessing.set_start_method("spawn", force=True)
457+
else:
458+
if multiprocessing.get_start_method() != "fork":
459+
# force all platforms (Windows/Linux/macOS) to use the same way (fork) for multiprocessing
460+
multiprocessing.set_start_method("fork", force=True)
461+
462+
453463
def set_env_version(version):
454464
set_env_kv("FEDML_ENV_VERSION", version)
455465
load_env()

python/fedml/computing/scheduler/comm_utils/hardware_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __get_util(cls) -> Optional[GPUCardUtil]:
2727
except Exception as e:
2828
pass
2929

30-
logging.error("No GPU card detected")
30+
# logging.error("No GPU card detected")
3131
return None
3232

3333
@staticmethod

python/fedml/computing/scheduler/model_scheduler/device_model_db.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import logging
33
import os
4+
import platform
45
import time
56

67
from fedml.computing.scheduler.model_scheduler.device_server_constants import ServerConstants
@@ -261,7 +262,10 @@ def open_job_db(self):
261262
self.db_base_dir = ServerConstants.get_database_dir()
262263

263264
job_db_path = os.path.join(self.db_base_dir, FedMLModelDatabase.MODEL_DEPLOYMENT_DB)
264-
self.db_engine = create_engine('sqlite:////{}'.format(job_db_path), echo=False)
265+
if platform.system() == "Windows":
266+
self.db_engine = create_engine('sqlite:///{}'.format(job_db_path), echo=False)
267+
else:
268+
self.db_engine = create_engine('sqlite:////{}'.format(job_db_path), echo=False)
265269

266270
db_session_class = sessionmaker(bind=self.db_engine)
267271
self.db_connection = db_session_class()

python/fedml/computing/scheduler/scheduler_core/base_db.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import platform
34
import time
45

56
from sqlalchemy import Column, String, TEXT, Integer, Float, create_engine, and_
@@ -25,7 +26,10 @@ def open_job_db(self):
2526
if self.db_connection is not None:
2627
return
2728

28-
self.db_engine = create_engine('sqlite:////{}'.format(self.db_path), echo=False)
29+
if platform.system() == "Windows":
30+
self.db_engine = create_engine('sqlite:///{}'.format(self.db_path), echo=False)
31+
else:
32+
self.db_engine = create_engine('sqlite:////{}'.format(self.db_path), echo=False)
2933

3034
db_session_class = sessionmaker(bind=self.db_engine)
3135
self.db_connection = db_session_class()

python/fedml/computing/scheduler/scheduler_core/compute_gpu_db.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from fedml.core.common.singleton import Singleton
99
from .base_db import FedMLBaseDb
1010
from .compute_utils import ComputeUtils
11+
from ..master.server_constants import ServerConstants
1112

1213
Base = declarative_base()
1314

python/fedml/computing/scheduler/scheduler_core/scheduler_base_protocol_manager.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ def __init__(self, args, agent_config=None, is_master=False):
4646
self.status_reporter = None
4747
self.user_name = args.user_name
4848

49-
if multiprocessing.get_start_method() != "fork":
50-
# force all platforms (Windows/Linux/macOS) to use the same way (fork) for multiprocessing
51-
multiprocessing.set_start_method("fork", force=True)
49+
fedml._init_multiprocessing()
5250

5351
def generate_topics(self):
5452
# generate the subscribed topics.

0 commit comments

Comments
 (0)