为什么用DQN 训练无人机任务分配的环境时,奖励值不收敛一直在震荡?step函数如下:
def step(self, action):
task_idx = action % self.num_tasks
uav_idx = action // self.num_tasks
if self.task_assigned[task_idx] == 1:
# The task has already been assigned to a UAV
return self._get_obs(), 0, False, {}
task_position = self.task_positions[task_idx]
uav_position = self.uav_positions[uav_idx]
distance = np.linalg.norm(task_position - uav_position)
if distance > self.max_distance:
# The UAV is too far from the task to assign it
return self._get_obs(), 0, False, {}
reward = -distance
self.task_assigned[task_idx] = 1
self.uav_positions[uav_idx] = task_position
done = np.all(self.task_assigned == 1)
return self._get_obs(), reward, done, {}