diff --git a/config.py b/config.py index 713423d..155b68e 100644 --- a/config.py +++ b/config.py @@ -1,7 +1,7 @@ import numpy as np # Training Parameters -MODEL: str = "attention_matd3" # options: 'maddpg', 'matd3', 'mappo', 'masac', 'attention_', 'random' +MODEL: str = "attention_matd3" # options: 'maddpg', 'matd3', 'mappo', 'masac', 'attention_', 'random', 'nearest_greedy' SEED: int = 42 # random seed for reproducibility np.random.seed(SEED) # set numpy random seed STEPS_PER_EPISODE: int = 1000 # total T @@ -20,6 +20,14 @@ UE_MAX_DIST: float = 15.0 # d_max^UE in meters UE_MAX_WAIT_TIME: int = 10 # in time slots +USE_HOTSPOTS: bool = False # using hotspots or not +NUM_HOTSPOTS: int = 2 # number of hotspots +HOTSPOT_RADIUS: float = 100.0 # radius of each hotspot in meters +assert NUM_HOTSPOTS * HOTSPOT_RADIUS * 2 <= min(AREA_WIDTH, AREA_HEIGHT), "Hotspots cannot fit in the area without overlap." +HOTSPOT_SEPARATION: float = 400.0 # minimum separation between hotspots in meters +assert HOTSPOT_SEPARATION >= 2 * HOTSPOT_RADIUS, "Hotspot separation must be at least twice the hotspot radius to avoid overlap." +HOTSPOT_UE_PROB: float = 0.8 # probability that a UE is in a hotspot + # UAV Parameters UAV_ALTITUDE: int = 100 # H in meters UAV_SPEED: float = 15.0 # v^UAV in m/s @@ -137,3 +145,7 @@ ATTN_HIDDEN_DIM: int = 64 # Embedding size for internal attention representations ATTN_NUM_HEADS: int = 8 # Number of attention heads assert ATTN_HIDDEN_DIM % ATTN_NUM_HEADS == 0, f"ATTN_HIDDEN_DIM ({ATTN_HIDDEN_DIM}) must be divisible by ATTN_NUM_HEADS ({ATTN_NUM_HEADS})" + +# Cache Ablation Setting +CACHE_POLICY: str = "GDSF" # Options: "GDSF", "LRU", "LFU", "RANDOM", "NO_CACHE" +ALLOW_COLLABORATION: bool = True # whether UAVs can collaborate to serve UEs diff --git a/environment/env.py b/environment/env.py index 8d81f4f..72c4a65 100644 --- a/environment/env.py +++ b/environment/env.py @@ -20,14 +20,23 @@ def uavs(self) -> list[UAV]: def ues(self) -> list[UE]: return self._ues - def reset(self) -> list[np.ndarray]: + def reset(self, initial_positions: list[np.ndarray] | None = None) -> list[np.ndarray]: """Resets the environment to an initial state and returns the initial observations.""" + if getattr(config, "USE_HOTSPOTS", False): + UE.generate_hotspots() # Randomize hotspot locations on reset + self._ues = [UE(i) for i in range(config.NUM_UES)] self._uavs = [UAV(i) for i in range(config.NUM_UAVS)] + + # Apply strict geometric spawn points if provided by the static baseline + if initial_positions is not None: + for i, uav in enumerate(self._uavs): + uav.pos[:2] = initial_positions[i] + self._time_step = 0 return self._get_obs() - def step(self, actions: np.ndarray) -> tuple[list[np.ndarray], list[float], tuple[float, float, float, float]]: + def step(self, actions: np.ndarray) -> tuple[list[np.ndarray], list[float], tuple[float, float, float, float, int, int]]: """Execute one time step of the simulation.""" self._time_step += 1 @@ -91,7 +100,7 @@ def _get_obs(self) -> list[np.ndarray]: ue_states: np.ndarray = np.zeros((config.MAX_ASSOCIATED_UES, config.UE_OBS_DIM), dtype=np.float32) ues: list[UE] = sorted(uav.current_covered_ues, key=lambda u: float(np.linalg.norm(uav.pos[:2] - u.pos[:2])))[: config.MAX_ASSOCIATED_UES] for i, ue in enumerate(ues): - delta_pos: np.ndarray = (ue.pos[:2] - uav.pos[:2]) / config.AREA_WIDTH + delta_pos: np.ndarray = (ue.pos[:2] - uav.pos[:2]) / np.array([config.AREA_WIDTH, config.AREA_HEIGHT], dtype=np.float32) req_type, req_size, req_id = ue.current_request norm_type: float = float(req_type) / 2.0 # assuming 3 types: 0,1,2 norm_id: float = float(req_id) / float(config.NUM_FILES) @@ -171,7 +180,7 @@ def _associate_ues_to_uavs(self) -> None: best_uav.current_covered_ues.append(ue) ue.assigned = True - def _get_rewards_and_metrics(self) -> tuple[list[float], tuple[float, float, float, float]]: + def _get_rewards_and_metrics(self) -> tuple[list[float], tuple[float, float, float, float, int, int]]: """Returns the reward and other metrics.""" total_latency: float = sum(ue.latency_current_request if ue.assigned else config.NON_SERVED_LATENCY_PENALTY for ue in self._ues) total_energy: float = sum(uav.energy for uav in self._uavs) @@ -188,10 +197,15 @@ def _get_rewards_and_metrics(self) -> tuple[list[float], tuple[float, float, flo r_offline: float = config.ALPHA_4 * np.log(1.0 + offline_rate) reward: float = r_fairness - r_latency - r_energy - r_offline rewards: list[float] = [reward] * config.NUM_UAVS + for uav in self._uavs: if uav.collision_violation: rewards[uav.id] -= config.COLLISION_PENALTY if uav.boundary_violation: rewards[uav.id] -= config.BOUNDARY_PENALTY rewards = [r * config.REWARD_SCALING_FACTOR for r in rewards] - return rewards, (total_latency, total_energy, jfi, offline_rate) + + step_hits: int = sum(uav.cache_hits_step for uav in self._uavs) + step_reqs: int = sum(uav.total_reqs_step for uav in self._uavs) + + return rewards, (total_latency, total_energy, jfi, offline_rate, step_hits, step_reqs) diff --git a/environment/uavs.py b/environment/uavs.py index ef9d656..01d4bf6 100644 --- a/environment/uavs.py +++ b/environment/uavs.py @@ -24,11 +24,46 @@ def _get_computing_latency_and_energy(uav: UAV, cpu_cycles: float) -> tuple[floa def _try_add_file_to_cache(uav: UAV, file_id: int) -> None: - """Try to add a file to UAV cache if there's enough space.""" + """Try to add a file to UAV cache if there's enough space, or evict based on policy.""" + policy: str = getattr(config, "CACHE_POLICY", "GDSF") + if policy == "NO_CACHE": + return + if uav._working_cache[file_id]: return # Already in cache + + file_size: int = config.FILE_SIZES[file_id] + + # GDSF only adds if there's space (eviction is handled periodically via gdsf_cache_update) + if policy == "GDSF": + used_space: int = np.sum(uav._working_cache * config.FILE_SIZES) + if used_space + file_size <= config.UAV_STORAGE_CAPACITY[uav.id]: + uav._working_cache[file_id] = True + return + + # LRU, LFU, and RANDOM update reactively (on-demand eviction) + if file_size > config.UAV_STORAGE_CAPACITY[uav.id]: + return # Can't fit even if cache is empty, so skip caching used_space: int = np.sum(uav._working_cache * config.FILE_SIZES) - if used_space + config.FILE_SIZES[file_id] <= config.UAV_STORAGE_CAPACITY[uav.id]: + while used_space + file_size > config.UAV_STORAGE_CAPACITY[uav.id]: + cached_indices: np.ndarray = np.where(uav._working_cache)[0] + if len(cached_indices) == 0: + break # Cache is empty but file still doesn't fit + + evict_idx: int = -1 + if policy == "LRU": + evict_idx = cached_indices[np.argmin(uav._last_access_time[cached_indices])] + elif policy == "LFU": + evict_idx = cached_indices[np.argmin(uav._cumulative_freq_counts[cached_indices])] + elif policy == "RANDOM": + evict_idx = np.random.choice(cached_indices) + else: + raise ValueError(f"Unknown cache policy: {policy}") + + uav._working_cache[evict_idx] = False + used_space -= config.FILE_SIZES[evict_idx] + + if used_space + file_size <= config.UAV_STORAGE_CAPACITY[uav.id]: uav._working_cache[file_id] = True @@ -51,6 +86,12 @@ def __init__(self, uav_id: int) -> None: self._freq_counts: np.ndarray = np.zeros(config.NUM_FILES, dtype=np.float32) self._ema_scores: np.ndarray = np.zeros(config.NUM_FILES, dtype=np.float32) + self._last_access_time: np.ndarray = np.zeros(config.NUM_FILES, dtype=float) + self._cumulative_freq_counts: np.ndarray = np.zeros(config.NUM_FILES, dtype=int) + self._local_timer: int = 0 + self.cache_hits_step: int = 0 + self.total_reqs_step: int = 0 + self._uav_mbs_rate: float = 0.0 @property @@ -74,6 +115,8 @@ def reset_for_next_step(self) -> None: self._energy_current_slot = 0.0 self.collision_violation = False self.boundary_violation = False + self.cache_hits_step = 0 + self.total_reqs_step = 0 def update_position(self, next_pos: np.ndarray) -> None: """Update the UAV's position to the new location chosen by the MARL agent.""" @@ -110,13 +153,21 @@ def process_requests(self) -> None: self._process_energy_request(ue) continue + self.total_reqs_step += 1 # not counting energy requests for CHR + ue_uav_rate: float = comms.calculate_ue_uav_rate(comms.calculate_channel_gain(ue.pos, self.pos), len(self._current_covered_ues)) best_target_idx, best_target_uav = self._decide_offloading_target(ue.current_request, ue_uav_rate) self._freq_counts[req_id] += 1 # I got a request for this file + self._local_timer += 1 + self._last_access_time[req_id] = self._local_timer + self._cumulative_freq_counts[req_id] += 1 if best_target_idx == 1 and best_target_uav is not None: # Request also seen by collaborating UAV best_target_uav._freq_counts[req_id] += 1 + best_target_uav._local_timer += 1 + best_target_uav._last_access_time[req_id] = best_target_uav._local_timer + best_target_uav._cumulative_freq_counts[req_id] += 1 if req_type == 0: if best_target_idx != 0: @@ -164,27 +215,28 @@ def _decide_offloading_target(self, current_req: tuple[int, int, int], ue_uav_ra best_exp_latency = exp_mbs_latency best_target_idx = 2 - # Collaborating UAV Expected Latency - for neighbor in self._neighbors: - belief_prob: float = _get_belief_probability(req_id, neighbor.id) - - uav_uav_rate: float = comms.calculate_uav_uav_rate(comms.calculate_channel_gain(self.pos, neighbor.pos)) - uav_mbs_rate: float = comms.calculate_uav_mbs_rate(comms.calculate_channel_gain(neighbor.pos, config.MBS_POS)) - uav_uav_download_latency: float = file_size / uav_uav_rate - exp_neighbor_fetch_latency: float = (1.0 - belief_prob) * (file_size / uav_mbs_rate) # For both - exp_neighbor_latency: float = exp_neighbor_fetch_latency + uav_uav_download_latency + ue_uav_download_latency # For content - if req_type == 0: # Service - # Neighbor Load: They broadcasted 'initial_load'. We add +1 because "If I come, I add to the pile." - neigh_load: int = neighbor._current_service_request_count + 1 - assert neigh_load > 0 - est_comp_latency = cpu_cycles / (config.UAV_COMPUTING_CAPACITY[neighbor.id] / neigh_load) - uav_uav_upload_latency: float = req_size / uav_uav_rate - exp_neighbor_latency = ue_uav_upload_latency + uav_uav_upload_latency + exp_neighbor_fetch_latency + est_comp_latency # Overwrite for service - - if exp_neighbor_latency < best_exp_latency: - best_exp_latency = exp_neighbor_latency - best_target_idx = 1 - best_target_uav = neighbor + if getattr(config, "ALLOW_COLLABORATION", True): + # Collaborating UAV Expected Latency + for neighbor in self._neighbors: + belief_prob: float = _get_belief_probability(req_id, neighbor.id) + + uav_uav_rate: float = comms.calculate_uav_uav_rate(comms.calculate_channel_gain(self.pos, neighbor.pos)) + uav_mbs_rate: float = comms.calculate_uav_mbs_rate(comms.calculate_channel_gain(neighbor.pos, config.MBS_POS)) + uav_uav_download_latency: float = file_size / uav_uav_rate + exp_neighbor_fetch_latency: float = (1.0 - belief_prob) * (file_size / uav_mbs_rate) # For both + exp_neighbor_latency: float = exp_neighbor_fetch_latency + uav_uav_download_latency + ue_uav_download_latency # For content + if req_type == 0: # Service + # Neighbor Load: They broadcasted 'initial_load'. We add +1 because "If I come, I add to the pile." + neigh_load: int = neighbor._current_service_request_count + 1 + assert neigh_load > 0 + est_comp_latency = cpu_cycles / (config.UAV_COMPUTING_CAPACITY[neighbor.id] / neigh_load) + uav_uav_upload_latency: float = req_size / uav_uav_rate + exp_neighbor_latency = ue_uav_upload_latency + uav_uav_upload_latency + exp_neighbor_fetch_latency + est_comp_latency # Overwrite for service + + if exp_neighbor_latency < best_exp_latency: + best_exp_latency = exp_neighbor_latency + best_target_idx = 1 + best_target_uav = neighbor assert best_exp_latency >= 0.0 return best_target_idx, best_target_uav @@ -202,6 +254,8 @@ def _process_service_request(self, ue: UE, ue_uav_rate: float, target_idx: int, if not self.cache[req_id]: fetch_latency = file_size / self._uav_mbs_rate _try_add_file_to_cache(self, req_id) + else: + self.cache_hits_step += 1 comp_latency, comp_energy = _get_computing_latency_and_energy(self, cpu_cycles) ue.latency_current_request = ue_uav_upload_latency + fetch_latency + comp_latency @@ -217,6 +271,8 @@ def _process_service_request(self, ue: UE, ue_uav_rate: float, target_idx: int, if not target_uav.cache[req_id]: fetch_latency = file_size / uav_mbs_rate _try_add_file_to_cache(target_uav, req_id) + else: + target_uav.cache_hits_step += 1 comp_latency, comp_energy = _get_computing_latency_and_energy(target_uav, cpu_cycles) ue.latency_current_request = ue_uav_upload_latency + uav_uav_upload_latency + fetch_latency + comp_latency @@ -240,6 +296,8 @@ def _process_content_request(self, ue: UE, ue_uav_rate: float, target_idx: int, if not self.cache[req_id]: fetch_latency = file_size / self._uav_mbs_rate _try_add_file_to_cache(self, req_id) + else: + self.cache_hits_step += 1 ue.latency_current_request = fetch_latency + ue_uav_download_latency @@ -253,6 +311,8 @@ def _process_content_request(self, ue: UE, ue_uav_rate: float, target_idx: int, if not target_uav.cache[req_id]: fetch_latency = file_size / uav_mbs_rate _try_add_file_to_cache(target_uav, req_id) + else: + target_uav.cache_hits_step += 1 ue.latency_current_request = fetch_latency + uav_uav_download_latency + ue_uav_download_latency _try_add_file_to_cache(self, req_id) # Since it was a miss, try to add to associated UAV's cache as well in background @@ -276,6 +336,8 @@ def update_ema_and_cache(self) -> None: def gdsf_cache_update(self) -> None: """Update cache using the GDSF caching policy at a longer timescale.""" + if getattr(config, "CACHE_POLICY", "GDSF") != "GDSF": + return # LRU/LFU/Random update reactively, No Cache does nothing priority_scores: np.ndarray = self._ema_scores / config.FILE_SIZES sorted_file_ids: np.ndarray = np.argsort(-priority_scores) self.cache = np.zeros(config.NUM_FILES, dtype=bool) diff --git a/environment/user_equipments.py b/environment/user_equipments.py index 6f5b401..f3174a8 100644 --- a/environment/user_equipments.py +++ b/environment/user_equipments.py @@ -7,6 +7,7 @@ class UE: global_ranks: np.ndarray id_to_rank_map: dict[int, int] global_probs: np.ndarray + hotspot_centers: list[np.ndarray] @classmethod def initialize_ue_class(cls) -> None: @@ -17,9 +18,41 @@ def initialize_ue_class(cls) -> None: zipf_denom: float = np.sum(1 / cls.global_ranks**config.ZIPF_BETA) cls.global_probs = (1 / cls.global_ranks**config.ZIPF_BETA) / zipf_denom + if getattr(config, "USE_HOTSPOTS", False): + cls.generate_hotspots() + + @classmethod + def generate_hotspots(cls) -> None: + """Randomizes the locations of the hotspots across the map.""" + cls.hotspot_centers = [] + max_retries = 100 # Safety limit for rejection sampling + + for _ in range(config.NUM_HOTSPOTS): + valid: bool = False + new_center: np.ndarray = np.zeros(2, dtype=np.float32) + retries: int = 0 + while not valid and retries < max_retries: + hx: float = np.random.uniform(config.HOTSPOT_RADIUS, config.AREA_WIDTH - config.HOTSPOT_RADIUS) + hy: float = np.random.uniform(config.HOTSPOT_RADIUS, config.AREA_HEIGHT - config.HOTSPOT_RADIUS) + new_center = np.array([hx, hy], dtype=np.float32) + + if not cls.hotspot_centers: + valid = True + else: + distances: np.ndarray = np.linalg.norm(np.array(cls.hotspot_centers) - new_center, axis=1) + if np.min(distances) > config.HOTSPOT_SEPARATION: + valid = True + retries += 1 + + cls.hotspot_centers.append(new_center) + def __init__(self, ue_id: int) -> None: self.id: int = ue_id self.pos: np.ndarray = np.array([np.random.uniform(0, config.AREA_WIDTH), np.random.uniform(0, config.AREA_HEIGHT), 0.0], dtype=np.float32) + self.is_hotspot_user = getattr(config, "USE_HOTSPOTS", False) and (self.id < config.NUM_UES * getattr(config, "HOTSPOT_UE_PROB", 0.0)) + if self.is_hotspot_user: + self.pos[:2] = self._get_position_in_hotspot() + self.battery_level: float = np.random.uniform(0.6, 1.0) * config.UE_BATTERY_CAPACITY # Start at capacity between 60% to 100% self.current_request: tuple[int, int, int] = (0, 0, 0) # Request : (req_type, req_size, req_id) @@ -51,6 +84,15 @@ def update_position(self) -> None: move_vector = (direction_vec / distance_to_waypoint) * config.UE_MAX_DIST self.pos[:2] += move_vector + def _get_position_in_hotspot(self) -> np.ndarray: + """Generates a random position strictly within this UE's assigned hotspot.""" + angle: float = np.random.uniform(0, 2 * np.pi) + r: float = config.HOTSPOT_RADIUS * np.sqrt(np.random.uniform(0, 1)) + offset: np.ndarray = r * np.array([np.cos(angle), np.sin(angle)], dtype=np.float32) + center: np.ndarray = UE.hotspot_centers[self.id % config.NUM_HOTSPOTS] + pos: np.ndarray = np.clip(center + offset, [0, 0], [config.AREA_WIDTH, config.AREA_HEIGHT]) + return pos.astype(np.float32) + def generate_request(self) -> None: """Generates a new request tuple for the current time slot.""" @@ -78,7 +120,12 @@ def update_service_coverage(self, current_time_step_t: int) -> None: def _set_new_waypoint(self): """Set a new destination, speed, and wait time as per the Random Waypoint model.""" - self._waypoint = np.array([np.random.uniform(0, config.AREA_WIDTH), np.random.uniform(0, config.AREA_HEIGHT)], dtype=np.float32) + # If hotspots are active, the new waypoint MUST also be inside the hotspot! + if self.is_hotspot_user: + self._waypoint = self._get_position_in_hotspot() + else: + self._waypoint = np.array([np.random.uniform(0, config.AREA_WIDTH), np.random.uniform(0, config.AREA_HEIGHT)], dtype=np.float32) + self._wait_time = np.random.randint(0, config.UE_MAX_WAIT_TIME + 1) def update_battery(self, harv_energy: float, ue_transmit_time: float) -> None: diff --git a/main.py b/main.py index 3d6dff3..d4af06d 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ from marl_models.base_model import MARLModel from environment.env import Env from marl_models.utils import get_model, load_step_count -from train import train_on_policy, train_off_policy, train_random +from train import train_on_policy, train_off_policy, train_baselines from test import test_model from utils.logger import Logger, load_configs from utils.plot_logs import generate_plots @@ -54,8 +54,8 @@ def start_training(args: argparse.Namespace): train_off_policy(env, model, logger, args.num_episodes, total_step_count) elif model_name in ["mappo", "attention_mappo"]: train_on_policy(env, model, logger, args.num_episodes) - else: # "random" - train_random(env, model, logger, args.num_episodes) + else: # "random", "static", "nearest_greedy", "uncoordinated_greedy" + train_baselines(env, model, logger, args.num_episodes) print("āœ… Training Completed!\n") print("šŸ“Š Generating plots...") diff --git a/marl_models/nearest_greedy_baseline/nearest_greedy_model.py b/marl_models/nearest_greedy_baseline/nearest_greedy_model.py new file mode 100644 index 0000000..6b02605 --- /dev/null +++ b/marl_models/nearest_greedy_baseline/nearest_greedy_model.py @@ -0,0 +1,39 @@ +from marl_models.base_model import MARLModel, ExperienceBatch +import config +import numpy as np + + +class NearestGreedyModel(MARLModel): + def __init__(self, model_name: str, num_agents: int, obs_dim: int, action_dim: int, device: str) -> None: + super().__init__(model_name, num_agents, obs_dim, action_dim, device) + + # Calculate exactly where the UE observations start in the flattened array + self.ue_start_idx: int = 2 + config.NUM_FILES + config.MAX_UAV_NEIGHBORS * config.NEIGHBOR_OBS_DIM + + def select_actions(self, observations: np.ndarray, exploration: bool = True) -> np.ndarray: + # In obs, UEs are pre-sorted by distance. Hence, the very first UE block in the observation is the nearest. + dx: np.ndarray = observations[:, self.ue_start_idx] * config.AREA_WIDTH + dy: np.ndarray = observations[:, self.ue_start_idx + 1] * config.AREA_HEIGHT + distances: np.ndarray = np.sqrt(dx**2 + dy**2) + + # Default action: If no UEs are in range (distance is 0 due to zero-padding), do a moderate random walk. + actions: np.ndarray = np.random.uniform(-0.2, 0.2, size=(self.num_agents, self.action_dim)).astype(np.float32) + + # Masking: Only apply the greedy vector to UAVs that actually see a UE + mask: np.ndarray = distances > 0.1 + if np.any(mask): + actions[mask, 0] = dx[mask] / distances[mask] + actions[mask, 1] = dy[mask] / distances[mask] + return actions + + def update(self, batch: ExperienceBatch) -> dict: + return {} # Does not learn, return empty losses dict. + + def reset(self) -> None: + pass + + def save(self, directory: str) -> None: + pass + + def load(self, directory: str) -> None: + pass diff --git a/marl_models/static_baseline/static_model.py b/marl_models/static_baseline/static_model.py new file mode 100644 index 0000000..3a26399 --- /dev/null +++ b/marl_models/static_baseline/static_model.py @@ -0,0 +1,71 @@ +from marl_models.base_model import MARLModel, ExperienceBatch +import config +import numpy as np + + +class StaticModel(MARLModel): + def __init__(self, model_name: str, num_agents: int, obs_dim: int, action_dim: int, device: str) -> None: + super().__init__(model_name, num_agents, obs_dim, action_dim, device) + self.static_positions: list[np.ndarray] = self._generate_dynamic_grid(num_agents) + + def _generate_dynamic_grid(self, num_agents: int) -> list[np.ndarray]: + """Dynamically calculates maximum-spread formations, keeping coverage circles strictly inside the map.""" + + boundary_gap: float = config.UAV_COVERAGE_RADIUS # The circle will just touch the wall! + safe_width: float = config.AREA_WIDTH - (2 * boundary_gap) + safe_height: float = config.AREA_HEIGHT - (2 * boundary_gap) + min_x: float = boundary_gap + min_y: float = boundary_gap + + max_in_row: int = int(np.ceil(np.sqrt(num_agents))) + min_required_size: float = (config.MIN_UAV_SEPARATION * (max_in_row - 1)) + (2 * boundary_gap) + + if config.AREA_WIDTH < min_required_size or config.AREA_HEIGHT < min_required_size: + print(f"\nāš ļø WARNING: Map size ({config.AREA_WIDTH}x{config.AREA_HEIGHT}) is too small for {num_agents} UAVs.") + + num_rows: int = max(1, int(np.round(np.sqrt(num_agents)))) + base_count: int = num_agents // num_rows + remainder: int = num_agents % num_rows + + row_counts: list[int] = [base_count] * num_rows + left: int = (num_rows - remainder) // 2 + for i in range(remainder): + row_counts[left + i] += 1 + + positions: list[np.ndarray] = [] + y_coords: list[float] = [] + + if num_rows == 1: + y_coords = [min_y + safe_height / 2.0] + else: + y_step: float = safe_height / (num_rows - 1) + y_coords = [min_y + r * y_step for r in range(num_rows)] + + for r, count_in_row in enumerate(row_counts): + y = y_coords[r] + if count_in_row == 1: + x = min_x + safe_width / 2.0 + positions.append(np.array([x, y], dtype=np.float32)) + else: + x_step: float = safe_width / (count_in_row - 1) + for c in range(count_in_row): + x = min_x + c * x_step + positions.append(np.array([x, y], dtype=np.float32)) + + return positions + + def select_actions(self, observations: np.ndarray, exploration: bool = True) -> np.ndarray: + # Action is exactly [0.0, 0.0] for all UAVs : they hover statically. + return np.zeros((self.num_agents, self.action_dim), dtype=np.float32) + + def update(self, batch: ExperienceBatch) -> dict: + return {} # Does not learn, return empty losses dict. + + def reset(self) -> None: + pass + + def save(self, directory: str) -> None: + pass + + def load(self, directory: str) -> None: + pass diff --git a/marl_models/uncoordinated_greedy_baseline/uncoordinated_greedy_model.py b/marl_models/uncoordinated_greedy_baseline/uncoordinated_greedy_model.py new file mode 100644 index 0000000..3b167ad --- /dev/null +++ b/marl_models/uncoordinated_greedy_baseline/uncoordinated_greedy_model.py @@ -0,0 +1,39 @@ +from marl_models.base_model import MARLModel, ExperienceBatch +import config +import numpy as np + + +class UncoordinatedGreedyModel(MARLModel): + def __init__(self, model_name: str, num_agents: int, obs_dim: int, action_dim: int, device: str) -> None: + super().__init__(model_name, num_agents, obs_dim, action_dim, device) + + # The greedy target: The absolute center of the map + self.target_pos: np.ndarray = np.array([[config.AREA_WIDTH / 2.0, config.AREA_HEIGHT / 2.0]], dtype=np.float32) + self.area_dims: np.ndarray = np.array([[config.AREA_WIDTH, config.AREA_HEIGHT]], dtype=np.float32) + + def select_actions(self, observations: np.ndarray, exploration: bool = True) -> np.ndarray: + current_pos: np.ndarray = observations[:, :2] * self.area_dims + delta: np.ndarray = self.target_pos - current_pos # Shape: (num_agents, 2) + distances: np.ndarray = np.linalg.norm(delta, axis=1, keepdims=True) # Shape: (num_agents, 1) + + # Default action: Random jitter (fighting for the center spot) + actions: np.ndarray = np.random.uniform(-0.1, 0.1, size=(self.num_agents, self.action_dim)).astype(np.float32) + + # Only apply the directed flight to UAVs farther than 10m from the center + mask: np.ndarray = (distances > 10.0).flatten() + if np.any(mask): + actions[mask] = delta[mask] / distances[mask] # Unit vector towards center + + return actions + + def update(self, batch: ExperienceBatch) -> dict: + return {} # Does not learn, return empty losses dict. + + def reset(self) -> None: + pass + + def save(self, directory: str) -> None: + pass + + def load(self, directory: str) -> None: + pass diff --git a/marl_models/utils.py b/marl_models/utils.py index 6990e5e..6f35bff 100644 --- a/marl_models/utils.py +++ b/marl_models/utils.py @@ -8,6 +8,9 @@ from marl_models.attention_mappo.attention_mappo import AttentionMAPPO from marl_models.attention_masac.attention_masac import AttentionMASAC from marl_models.random_baseline.random_model import RandomModel +from marl_models.static_baseline.static_model import StaticModel +from marl_models.nearest_greedy_baseline.nearest_greedy_model import NearestGreedyModel +from marl_models.uncoordinated_greedy_baseline.uncoordinated_greedy_model import UncoordinatedGreedyModel import config import torch import os @@ -46,6 +49,12 @@ def get_model(model_name: str) -> MARLModel: return AttentionMASAC(model_name=model_name, num_agents=config.NUM_UAVS, obs_dim=config.OBS_DIM_SINGLE, action_dim=config.ACTION_DIM, device=device) elif model_name == "random": return RandomModel(model_name=model_name, num_agents=config.NUM_UAVS, obs_dim=config.OBS_DIM_SINGLE, action_dim=config.ACTION_DIM, device=device) + elif model_name == "static": + return StaticModel(model_name=model_name, num_agents=config.NUM_UAVS, obs_dim=config.OBS_DIM_SINGLE, action_dim=config.ACTION_DIM, device=device) + elif model_name == "nearest_greedy": + return NearestGreedyModel(model_name=model_name, num_agents=config.NUM_UAVS, obs_dim=config.OBS_DIM_SINGLE, action_dim=config.ACTION_DIM, device=device) + elif model_name == "uncoordinated_greedy": + return UncoordinatedGreedyModel(model_name=model_name, num_agents=config.NUM_UAVS, obs_dim=config.OBS_DIM_SINGLE, action_dim=config.ACTION_DIM, device=device) else: raise ValueError(f"Unknown model type: {model_name}.") diff --git a/test.py b/test.py index cb23761..0ceb1c6 100644 --- a/test.py +++ b/test.py @@ -22,6 +22,8 @@ def test_model(env: Env, model: MARLModel, logger: Logger, num_episodes: int) -> episode_energy: float = 0.0 episode_fairness: float = 0.0 episode_offline_rate: float = 0.0 + episode_hits: int = 0 + episode_requests: int = 0 # reset_trajectories(env) # tracking code, comment if not needed # plot_snapshot(env, episode, 0, logger.log_dir, logger.timestamp, True) @@ -31,7 +33,7 @@ def test_model(env: Env, model: MARLModel, logger: Logger, num_episodes: int) -> obs_arr: np.ndarray = np.asarray(obs, dtype=np.float32) actions: np.ndarray = model.select_actions(obs_arr, exploration=False) - next_obs, rewards, (total_latency, total_energy, jfi, offline_rate) = env.step(actions) + next_obs, rewards, (total_latency, total_energy, jfi, offline_rate, step_hits, step_requests) = env.step(actions) # update_trajectories(env) # tracking code, comment if not needed done: bool = step >= config.STEPS_PER_EPISODE obs = next_obs @@ -41,10 +43,14 @@ def test_model(env: Env, model: MARLModel, logger: Logger, num_episodes: int) -> episode_energy += total_energy episode_fairness = jfi episode_offline_rate = offline_rate + episode_hits += step_hits + episode_requests += step_requests if done: break - episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate) + episode_chr: float = episode_hits / episode_requests if episode_requests > 0 else 0.0 + episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate, episode_chr) + # episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate) if episode % config.TEST_LOG_FREQ == 0: elapsed_time: float = time.time() - start_time logger.log_metrics(episode, episode_log, config.TEST_LOG_FREQ, elapsed_time) diff --git a/train.py b/train.py index 4205099..1c3ce88 100644 --- a/train.py +++ b/train.py @@ -35,6 +35,8 @@ def train_on_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: in episode_energy: float = 0.0 episode_fairness: float = 0.0 episode_offline_rate: float = 0.0 + episode_hits: int = 0 + episode_requests: int = 0 obs: list[np.ndarray] = env.reset() obs_arr: np.ndarray = np.asarray(obs, dtype=np.float32) @@ -52,7 +54,7 @@ def train_on_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: in raw_actions, log_probs, values = model.get_action_and_value(obs_arr, state) actions: np.ndarray = np.clip(raw_actions, -1.0, 1.0) - next_obs, rewards, (total_latency, total_energy, jfi, offline_rate) = env.step(actions) + next_obs, rewards, (total_latency, total_energy, jfi, offline_rate, step_hits, step_requests) = env.step(actions) next_state: np.ndarray = np.concatenate(next_obs, axis=0, dtype=np.float32) # update_trajectories(env) # tracking code, comment if not needed @@ -69,11 +71,14 @@ def train_on_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: in episode_energy += total_energy episode_fairness = jfi episode_offline_rate = offline_rate + episode_hits += step_hits + episode_requests += step_requests if done: # plot_snapshot(env, episode, episode_step, logger.log_dir, logger.timestamp) # Final snapshot of episode recent_rewards.append(episode_reward) - episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate) + episode_chr: float = episode_hits / episode_requests if episode_requests > 0 else 0.0 + episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate, episode_chr) # Optuna Pruning Check if trial: @@ -92,7 +97,7 @@ def train_on_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: in episode += 1 episode_step = 0 - episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate = 0.0, 0.0, 0.0, 0.0, 0.0 + episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate, episode_hits, episode_requests = 0.0, 0.0, 0.0, 0.0, 0.0, 0, 0 # reset_trajectories(env) # tracking code, comment if not needed # plot_snapshot(env, episode, 0, logger.log_dir, logger.timestamp, True) @@ -150,6 +155,8 @@ def train_off_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: i episode_energy: float = 0.0 episode_fairness: float = 0.0 episode_offline_rate: float = 0.0 + episode_hits: int = 0 + episode_requests: int = 0 # reset_trajectories(env) # tracking code, comment if not needed # plot_snapshot(env, episode, 0, logger.log_dir, logger.timestamp, True) @@ -164,7 +171,7 @@ def train_off_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: i else: actions = model.select_actions(obs_arr, exploration=True) - next_obs, rewards, (total_latency, total_energy, jfi, offline_rate) = env.step(actions) + next_obs, rewards, (total_latency, total_energy, jfi, offline_rate, step_hits, step_requests) = env.step(actions) next_obs_arr: np.ndarray = np.array(next_obs, dtype=np.float32) # update_trajectories(env) # tracking code, comment if not needed done: bool = step >= config.STEPS_PER_EPISODE @@ -186,10 +193,14 @@ def train_off_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: i episode_energy += total_energy episode_fairness = jfi episode_offline_rate = offline_rate + episode_hits += step_hits + episode_requests += step_requests + if done: break - episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate) + episode_chr: float = episode_hits / episode_requests if episode_requests > 0 else 0.0 + episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate, episode_chr) if episode % config.LOG_FREQ == 0: elapsed_time: float = time.time() - start_time # Prepare averaged losses for logging @@ -224,17 +235,22 @@ def train_off_policy(env: Env, model: MARLModel, logger: Logger, num_episodes: i return float(np.mean(recent_rewards[-max(1, int(num_episodes * 0.1)) :])) -def train_random(env: Env, model: MARLModel, logger: Logger, num_episodes: int) -> float: +def train_baselines(env: Env, model: MARLModel, logger: Logger, num_episodes: int) -> float: start_time: float = time.time() episode_log: Log = Log() for episode in range(1, num_episodes + 1): - obs = env.reset() + if hasattr(model, "static_positions") and model.static_positions is not None: + obs = env.reset(initial_positions=model.static_positions) + else: + obs = env.reset() episode_reward: float = 0.0 episode_latency: float = 0.0 episode_energy: float = 0.0 episode_fairness: float = 0.0 episode_offline_rate: float = 0.0 + episode_hits: int = 0 + episode_requests: int = 0 # reset_trajectories(env) # tracking code, comment if not needed # plot_snapshot(env, episode, 0, logger.log_dir, logger.timestamp, True) @@ -244,7 +260,7 @@ def train_random(env: Env, model: MARLModel, logger: Logger, num_episodes: int) obs_arr: np.ndarray = np.array(obs, dtype=np.float32) actions: np.ndarray = model.select_actions(obs_arr, exploration=False) - next_obs, rewards, (total_latency, total_energy, jfi, offline_rate) = env.step(actions) + next_obs, rewards, (total_latency, total_energy, jfi, offline_rate, step_hits, step_requests) = env.step(actions) # update_trajectories(env) # tracking code, comment if not needed done: bool = step >= config.STEPS_PER_EPISODE obs = next_obs @@ -254,12 +270,14 @@ def train_random(env: Env, model: MARLModel, logger: Logger, num_episodes: int) episode_energy += total_energy episode_fairness = jfi episode_offline_rate = offline_rate + episode_hits += step_hits + episode_requests += step_requests if done: break - - episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate) + episode_chr: float = episode_hits / episode_requests if episode_requests > 0 else 0.0 + episode_log.append(episode_reward, episode_latency, episode_energy, episode_fairness, episode_offline_rate, episode_chr) if episode % config.LOG_FREQ == 0: elapsed_time: float = time.time() - start_time logger.log_metrics(episode, episode_log, config.LOG_FREQ, elapsed_time, losses=None) - return 0.0 # Random training does not need tuning + return 0.0 # Baseline training does not need tuning diff --git a/utils/logger.py b/utils/logger.py index e97b1ee..9769d20 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -11,7 +11,8 @@ def __init__(self) -> None: self.energies: list[float] = [] self.fairness_scores: list[float] = [] self.offline_rates: list[float] = [] - # Training losses (optional, may be empty for random baseline) + self.chrs: list[float] = [] + # Training losses (optional, may be empty for baselines) self.actor_losses: list[float | None] = [] self.critic_losses: list[float | None] = [] self.entropy_losses: list[float | None] = [] @@ -24,6 +25,7 @@ def append( energy: float, fairness: float, offline_rate: float, + chr: float, *, actor_loss: float | None = None, critic_loss: float | None = None, @@ -39,6 +41,7 @@ def append( self.energies.append(energy) self.fairness_scores.append(fairness) self.offline_rates.append(offline_rate) + self.chrs.append(chr) self.actor_losses.append(actor_loss) self.critic_losses.append(critic_loss) @@ -92,12 +95,14 @@ def log_metrics( energies_slice: np.ndarray = np.array(log.energies[-log_freq:]) fairness_slice: np.ndarray = np.array(log.fairness_scores[-log_freq:]) offline_slice: np.ndarray = np.array(log.offline_rates[-log_freq:]) + chr_slice: np.ndarray = np.array(log.chrs[-log_freq:]) reward_avg: float = float(np.mean(rewards_slice)) latency_avg: float = float(np.mean(latencies_slice)) energy_avg: float = float(np.mean(energies_slice)) fairness_avg: float = float(np.mean(fairness_slice)) offline_avg: float = float(np.mean(offline_slice)) + chr_avg: float = float(np.mean(chr_slice)) # Prepare loss averages from the Log object if available; prefer explicit `losses` dict when provided def _safe_mean(lst: list) -> float | None: @@ -141,21 +146,13 @@ def _safe_mean(lst: list) -> float | None: loss_str = " | ".join(loss_parts) + " | " if loss_parts else "" - log_msg: str = f"šŸ”„ Episode {progress_step} | " f"Total Reward: {reward_avg:.3f} | " f"Total Latency: {latency_avg:.3f} | " f"Total Energy: {energy_avg:.3f} | " f"Final Fairness: {fairness_avg:.3f} | " f"Offline Rate: {offline_avg:.3f} | " + loss_str + f"Elapsed Time: {elapsed_time:.2f}s\n" + log_msg: str = f"šŸ”„ Episode {progress_step} | " f"Total Reward: {reward_avg:.3f} | " f"Total Latency: {latency_avg:.3f} | " f"Total Energy: {energy_avg:.3f} | " f"Final Fairness: {fairness_avg:.3f} | " f"Offline Rate: {offline_avg:.3f} | " f"CHR: {chr_avg:.3f} | " + loss_str + f"Elapsed Time: {elapsed_time:.2f}s\n" with open(self.log_file_path, "a", encoding="utf-8") as f: f.write(log_msg) # Prepare JSON entry, only include keys that are not None - data_entry: dict = { - "episode": progress_step, - "reward": reward_avg, - "latency": latency_avg, - "energy": energy_avg, - "fairness": fairness_avg, - "offline_rate": offline_avg, - "time": elapsed_time, - } + data_entry: dict = {"episode": progress_step, "reward": reward_avg, "latency": latency_avg, "energy": energy_avg, "fairness": fairness_avg, "offline_rate": offline_avg, "time": elapsed_time, "chr": chr_avg} if actor_avg is not None: data_entry["actor_loss"] = actor_avg if critic_avg is not None: