Page 57 - Read Online
P. 57

Boin et al. Intell Robot 2022;2(2):145­67  I http://dx.doi.org/10.20517/ir.2022.11  Page 155








               Algorithm 1: FRL applied to an AV platoon.
                  for each platoon    ∈                  do
                      for    ∈     ℎ           do
                          initialize replay buffer      ;
                          initialize actor      , critic      , target actor    , target critic    ; 0
                                                       0
                                                                    
                      end
                  end
                  for                ∈                 _                 do
                      for    ∈                  do
                          collect all vehicles states      ,   from   ;
                      end
                      for          ∈           _      _               do
                          for    ∈                  do
                             for    ∈     ℎ           do
                                 collect actions      ,   from actor;
                             end
                             advance the platoon   , with      ,  ;
                             collect (     ,   ,      ,  +1 ,      ,   ,                 ) from   ;
                          end
                          for    ∈                  do
                             for    ∈     ℎ           do
                                 add (     ,   ,      ,  +1 ,      ,   ,                 ) to replay buffer      ;
                                 if FRL update is not required then
                                     train      ,      ,    ,    locally;
                                                  0
                                               0
                                                     
                                 end
                                 append gradients of       and       to all_gradients;
                                 append weights of       and       to all_weights;
                             end
                          end
                          if FRL update required then
                             if gradient averaging enabled then
                                 avg_gradients ←global_update(all_gradients);
                                            −
                                 train      ,       using avg gradients;
                             end
                             if weight averaging enabled then
                                 avg_weights ←global_update(all_weights);
                                           −
                                 update weights      ,      ,    ,    using avg weights;
                                                     0
                                                   0
                                                         
                             end
                          end
                      end
                  end
                  Function global_update(params) is
                      upload params to FRL server;
                      collect averaged params from FRL server;
                      return averaged params;
                  end
   52   53   54   55   56   57   58   59   60   61   62