Source code for fleetrl.fleet_env.config.score_config

import numpy as np

[docs] class ScoreConfig: """ The Score Config sets coefficients to calculate the reward function - Multipliers: price_multiplier, penalty_invalid_action, penalty_overcharging, penalty_overloading - Changing the multipliers to 0 leads to ignoring this aspect of the reward function - The SOC violation and overloading are calculated using sigmoid functions """ def __init__(self, env_config): self.price_multiplier = env_config.get('price_multiplier', 3.33) self.price_exponent = env_config.get('price_exponent', 1) self.fully_charged_reward = env_config.get('fully_charged_reward', 1) # in cold weather trafo can operate >100% # TODO give overcharging/underloading penalty not only when the car departs, but # also in realtime when the battery dips below or goes above the healthy range # And maybe increase the penalty the farther the agent gets away from these boundaries self.penalty_invalid_action = env_config.get('penalty_invalid_action', -0.2) self.penalty_overcharging = env_config.get('penalty_overcharging', -0.0055) self.penalty_overloading = env_config.get('penalty_overloading', 1) self.clip_overcharging = env_config.get('clip_overcharging', -0.2)
[docs] @staticmethod # Define the soc_violation_penalty function using the parameters of the fitted sigmoid function for the reward function def soc_violation_penalty(missing_soc): x0, k = 0.29229767, 16.48461585 # Parameters from the fitted sigmoid function for the reward function penalty = -500 / (1 + np.exp(-k * (missing_soc - x0))) + 1 return penalty
# Define the overloading_penalty function using the parameters of the fitted piecewise sigmoid function for the overloading penalty function
[docs] def overloading_penalty(self, rel_loading): x0, k = 1.33298382, 15.77350877 # Parameters from the fitted piecewise sigmoid function for the overloading penalty function penalty = np.piecewise(rel_loading, [rel_loading < 1.1, rel_loading >= 1.1], [0, lambda x: -700 / (1 + np.exp(-k * (x - x0)))]) # If penalty is an array with only one element, return that element if isinstance(penalty, np.ndarray) and penalty.size == 1: penalty = penalty.item() return penalty * self.penalty_overloading