diff --git a/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/.$MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio.bkp b/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/.$MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio.bkp index fbe151d9a..fb8c3b773 100644 --- a/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/.$MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio.bkp +++ b/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/.$MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio.bkp @@ -1,11 +1,11 @@ - + - + @@ -24,11 +24,11 @@ - - + + - + @@ -75,14 +75,11 @@ - + - - - @@ -97,7 +94,7 @@ - + @@ -119,7 +116,7 @@ - + @@ -142,7 +139,7 @@ - + @@ -177,7 +174,7 @@ - + @@ -190,6 +187,12 @@ + + + + + + @@ -224,12 +227,15 @@ - + + + + diff --git a/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio b/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio index fb8c3b773..c21951c4f 100644 --- a/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio +++ b/doc/rtd/content/99_appendices/appendix2/sub/pool/mlpro_oa/control/controllers/images/MLPro-OA-Control-RL-PID-Controller_class_diagram.drawio @@ -1,6 +1,6 @@ - + @@ -21,13 +21,13 @@ - + - + - + @@ -77,43 +77,52 @@ - + - - + + - + - + - + + + + + + + - + - - + + - - + + - - + + - - + + - - + + + + + @@ -129,14 +138,17 @@ - + - + - - + + + + + @@ -149,32 +161,32 @@ - + - + - + - + - + - + - + - + @@ -187,53 +199,53 @@ - + - + - + - + - + - + - + - + - + - - + + - - + + - + - + - + - + diff --git a/src/mlpro/oa/control/controllers/oa_pid_controller.py b/src/mlpro/oa/control/controllers/oa_pid_controller.py index 2d1dfe424..3aeb4218a 100644 --- a/src/mlpro/oa/control/controllers/oa_pid_controller.py +++ b/src/mlpro/oa/control/controllers/oa_pid_controller.py @@ -9,93 +9,68 @@ from mlpro.bf.systems.basics import ActionElement, State from mlpro.bf.various import Log from mlpro.bf.streams import InstDict, Instance +from mlpro.rl.models_env_ada import SARSElement +from mlpro_int_sb3.wrappers.basics import WrPolicySB32MLPro +from stable_baselines3 import A2C, PPO, DQN, DDPG +class RLPID(Policy): -class RLController(OAController): + def __init__(self, p_observation_space: MSpace, p_action_space: MSpace,pid_controller:PIDController ,p_id=None, p_buffer_size: int = 1, p_ada: bool = True, p_visualize: bool = False, p_logging=Log.C_LOG_ALL ): + super().__init__(p_observation_space, p_action_space, p_id, p_buffer_size, p_ada, p_visualize, p_logging) - C_TYPE= 'RL PID-Controller' + self._pid_controller = pid_controller - def __init__(self, p_name: str = None, p_range_max=Task.C_RANGE_THREAD, p_duplicate_data: bool = False, p_visualize: bool = False, - p_logging=Log.C_LOG_ALL,p_error_id:int = 0,p_cls_policy:type = None, p_param_policy = None,p_fct_reward:FctReward = None ,**p_kwargs): - super().__init__(p_name, p_range_max, p_duplicate_data, p_visualize, p_logging, **p_kwargs) - def _adapt(self, p_setpoint: SetPoint, p_ctrl_error: ControlError, p_state: State, p_action: Action,p_reward:float) -> bool: - """ - Specialized custom method for online adaptation in closed-loop control scenarios. - - Parameters - ---------- - p_ctrl_error : ControlError - Control error. - p_state : State - State of control system. - p_setpoint : SetPoint - Setpoint. - p_Action : Action - control variable - p_reward : float - Output valaue of the reward function - """ + + def _adapt(self, p_sars_elem: SARSElement) -> bool: + """ + policy_sb3 = PPO( + policy="MlpPolicy", + n_steps=5, + env=None, + _init_setup_model=False, + device="cpu") + sb3_policy =WrPolicySB32MLPro() + sb3_policy._adapt_on_policy(p_sars_elem) + sb3_policy._compute_action_on_policy() + -class RLPIDController(RLController): - def __init__(self, p_name: str = None, p_range_max=Task.C_RANGE_THREAD, p_duplicate_data: bool = False, p_visualize: bool = False, - p_logging=Log.C_LOG_ALL, p_error_id: int = 0, p_cls_policy: type = None, p_param_policy=None, p_fct_reward: FctReward = None,pid_controller:PIDController = None,**p_kwargs): - super().__init__(p_name, p_range_max, p_duplicate_data, p_visualize, p_logging, p_error_id, p_cls_policy, p_param_policy, p_fct_reward,**p_kwargs) - self._policy = self._setup_policy(p_param_policy) - - self._pid_controller = pid_controller + p_param={} + self._pid_controller.set_parameter(p_param) - def _setup_policy_action_space(self) -> MSpace: + """ pass + + + - def _setup_policy(self,p_param_policy:dict)-> Policy: - pass - - + def compute_action(self, p_obs: State) -> Action: - def _run(self, p_inst:InstDict): - pass + #create control error from p_obs + crtl_error = ControlError(p_obs.get_feature_data(),p_obs.get_label_data(),p_obs.get_tstamp()) + #get action + action=self._pid_controller.compute_action(crtl_error) - def compute_action(self, p_ctrl_error: ControlError) -> Action: - return self._pid_controller.compute_action(p_ctrl_error) - - def _adapt_rl(self, p_setpoint: SetPoint, p_ctrl_error: ControlError, p_state: State, p_action: Action,p_reward:float) -> bool: + #return action + return action - """ - Specialized custom method for online adaptation in closed-loop control scenarios. - - Parameters - ---------- - p_ctrl_error : ControlError - Control error. - p_state : State - State of control system. - p_setpoint : SetPoint - Setpoint. - p_Action : Action - control variable - p_reward : float - Output valaue of the reward function - """ - - pass - + @@ -104,7 +79,6 @@ def _adapt_rl(self, p_setpoint: SetPoint, p_ctrl_error: ControlError, p_state: S -