Source code for fleetrl.fleet_env.config.score_config

import numpy as np


[docs]
class ScoreConfig:
    """
    The Score Config sets coefficients to calculate the reward function
    - Multipliers: price_multiplier, penalty_invalid_action, penalty_overcharging, penalty_overloading
    - Changing the multipliers to 0 leads to ignoring this aspect of the reward function
    - The SOC violation and overloading are calculated using sigmoid functions
    """

    def __init__(self, env_config):
        self.price_multiplier = env_config.get('price_multiplier', 3.33)
        self.price_exponent = env_config.get('price_exponent', 1)
        self.fully_charged_reward = env_config.get('fully_charged_reward', 1)
        # in cold weather trafo can operate >100%
        # TODO give overcharging/underloading penalty not only when the car departs, but
        # also in realtime when the battery dips below or goes above the healthy range
        # And maybe increase the penalty the farther the agent gets away from these boundaries
        self.penalty_invalid_action = env_config.get('penalty_invalid_action', -0.2)
        self.penalty_overcharging = env_config.get('penalty_overcharging', -0.0055)
        self.penalty_overloading = env_config.get('penalty_overloading', 1)
        self.clip_overcharging = env_config.get('clip_overcharging', -0.2)


[docs]
    @staticmethod
    # Define the soc_violation_penalty function using the parameters of the fitted sigmoid function for the reward function
    def soc_violation_penalty(missing_soc):
        x0, k = 0.29229767, 16.48461585  # Parameters from the fitted sigmoid function for the reward function
        penalty = -500 / (1 + np.exp(-k * (missing_soc - x0))) + 1

        return penalty


    # Define the overloading_penalty function using the parameters of the fitted piecewise sigmoid function for the overloading penalty function

[docs]
    def overloading_penalty(self, rel_loading):
        x0, k = 1.33298382, 15.77350877  # Parameters from the fitted piecewise sigmoid function for the overloading penalty function
        penalty = np.piecewise(rel_loading, [rel_loading < 1.1, rel_loading >= 1.1],
                               [0, lambda x: -700 / (1 + np.exp(-k * (x - x0)))])
        # If penalty is an array with only one element, return that element
        if isinstance(penalty, np.ndarray) and penalty.size == 1:
            penalty = penalty.item()

        return penalty * self.penalty_overloading