Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
Adding missing handler when a restart crash.
[hpcvm.git] / src / and / hpcvm / Server.java
index b703aa7..185b5f3 100644 (file)
@@ -500,7 +500,7 @@ public class Server extends UnicastRemoteObject implements ServicesServer
                @Override
                public void run() 
                {
-                       boolean change ;
+                       boolean change, dead ;
                        
                        while( run )
                        {
@@ -513,41 +513,55 @@ public class Server extends UnicastRemoteObject implements ServicesServer
                                {
                                        ConnectedClient cl = it.next() ;
                                        cl.incTimeout() ;
+                                       dead = false ;
                                        
-                                       if( cl.getTimeout() > max_timeout )
+                                       if( cl.getTimeout() > max_timeout || cl.getFail() )
                                        {
-                                               System.out.println( "Disconnection of " + cl.getName() ) ;
-                                               if( cl.getStatus().equalsIgnoreCase( "running" ) || cl.getStatus().equalsIgnoreCase( "saving" ) )
+                                               if( ! cl.getFail() )
                                                {
-                                                       System.out.println( "A VM was running on it!!" ) ;
-                                                       System.out.println( "I will redeploy a save and restart all VM ..." ) ;
+                                                       try {
+                                                               cl.getStub().echo() ;
+                                                               cl.resetTimeout() ;
+                                                       } catch( RemoteException e ) {
+                                                               dead = true ;
+                                                       }
+                                               }
+                                               
+                                               if( dead )
+                                               {
+                                                       System.out.println( "Disconnection of " + cl.getName() ) ;
+                                                       if( cl.getStatus().equalsIgnoreCase( "running" ) || cl.getStatus().equalsIgnoreCase( "saving" ) )
+                                                       {
+                                                               System.out.println( "A VM was running on it!!" ) ;
+                                                               System.out.println( "I will redeploy a save and restart all VM ..." ) ;
                                
-//                                                     for( int i = 0 ; i < computingClients.size() ; i++ )
-//                                                     {
-//                                                             if( computingClients.get( i ).getClient().getIP().equals( cl.getIP() ) )
+//                                                             for( int i = 0 ; i < computingClients.size() ; i++ )
 //                                                             {
-//                                                                     computingClients.remove( i ) ;
-//                                                                     break ;
+//                                                                     if( computingClients.get( i ).getClient().getIP().equals( cl.getIP() ) )
+//                                                                     {
+//                                                                             computingClients.remove( i ) ;
+//                                                                             break ;
+//                                                                     }
 //                                                             }
-//                                                     }
-                                                       synchronized( counter )
-                                                       {
-                                                               counter.inc() ;
-                                                       }
+                                                               synchronized( counter )
+                                                               {
+                                                                       counter.inc() ;
+                                                               }
                                                                
-                                                       new Server.FaultManager( cl ).start() ;
-                                                       nb_disconnections_computing++ ;
-                                               } else {
-                                                       System.out.println( "There was no VM running on it." ) ;
-                                                       System.out.println( "Maybe it will come back later :)" ) ;
-                                               }
+                                                               new Server.FaultManager( cl ).start() ;
+                                                               nb_disconnections_computing++ ;
+                                                       } else {
+                                                               System.out.println( "There was no VM running on it." ) ;
+                                                               System.out.println( "Maybe it will come back later :)" ) ;
+                                                       }
                                                
-                                               synchronized( clients )
-                                               {
-                                                       it.remove() ;
+                                                       synchronized( clients )
+                                                       {
+                                                               it.remove() ;
+                                                       }
+                                                       nb_disconnections++ ;
+                                                       change = true ;
                                                }
-                                               nb_disconnections++ ;
-                                               change = true ;
                                        }
                                }
                                
@@ -610,21 +624,32 @@ public class Server extends UnicastRemoteObject implements ServicesServer
                                        {
                                                applications.get( ind ).getComputingClients().get( i ).setRestartOk( false ) ;
                                                
-                                               final ServicesClient sc = applications.get( ind ).getComputingClients().get( i ).getClient().getStub() ;
+                                               new RestartVM( applications.get( ind ).getComputingClients().get( i ).getClient() ).start() ;
                                                
-                                               new Thread( new Runnable() {
-                                                       
-                                                       @Override
-                                                       public void run() 
-                                                       {
-                                                               try {
-                                                                       sc.restartVMAfterCrash() ;
-                                                               } catch( RemoteException e ) {
-                                                                       e.printStackTrace() ;
-                                                                       yield() ;
-                                                               }
-                                                       }
-                                               } ).start() ;
+//                                             final ServicesClient sc = applications.get( ind ).getComputingClients().get( i ).getClient().getStub() ;
+                                               
+//                                             new Thread( new Runnable() {
+//                                                     
+//                                                     @Override
+//                                                     public void run() 
+//                                                     {
+//                                                             try {
+//                                                                     if( sc.restartVMAfterCrash() != 0 )
+//                                                                     {
+//                                                                             System.err.println( "Problem while restarting VM on " +sc.getName() + "!" ) ;
+//                                                                     }
+//                                                             } catch( RemoteException e ) {
+//                                                                     try {
+//                                                                             System.err.println( "Problem while restarting VM on " + sc.getName() + "!" ) ;
+//                                                                     } catch( RemoteException e1 ) {
+//                                                                             System.err.println( "Problem while restarting a VM!" ) ;
+//                                                                             e1.printStackTrace() ;
+//                                                                     }
+//                                                                     e.printStackTrace() ;
+//                                                                     yield() ;
+//                                                             }
+//                                                     }
+//                                             } ).start() ;
                                        }
                                }
                                
@@ -1041,36 +1066,39 @@ public class Server extends UnicastRemoteObject implements ServicesServer
        {
                synchronized( applications )
                {
-               if( running )
-               {
-                       Iterator<ComputingClient> it = computingClients.iterator() ;
-               
-                       while( it.hasNext() )
+                       if( running )
                        {
-                               ComputingClient cl = it.next() ;
+                               applications.get( ind ).setEndTime( System.currentTimeMillis() ) ;
+                               applications.get( ind ).setRunning( false ) ;
+                               applications.get( ind ).clear() ;
+                       
+                               Iterator<ComputingClient> it = computingClients.iterator() ;
+               
+                               while( it.hasNext() )
+                               {
+                                       ComputingClient cl = it.next() ;
 
-                               try {
-                                       cl.getClient().getStub().emergencyStop() ;
-                               } catch (RemoteException e) {
-                                       e.printStackTrace();
-                               }
+                                       try {
+                                               cl.getClient().getStub().emergencyStop() ;
+                                       } catch (RemoteException e) {
+                                               e.printStackTrace();
+                                       }
                        
-                               cl.getClient().setStatus( "connected" ) ;
-                               cl.getClient().setComputingClient( null ) ;
-                               it.remove() ;
-                               cl = null ;
-                       }
+                                       cl.getClient().setStatus( "connected" ) ;
+                                       cl.getClient().setComputingClient( null ) ;
+                                       it.remove() ;
+                                       cl = null ;
+                               }
                
-                       applications.get( ind ).setEndTime( System.currentTimeMillis() ) ;
-                       applications.get( ind ).setRunning( false ) ;
-                       applications.get( ind ).clear() ;
-//                     applications.remove( ind ) ;
                        
-                       running = false ;
+                               applications.get( ind ).clear() ;
                        
-                       System.out.println( "Application " + applications.get( ind ).getName() + " ends in " + 
+                               running = false ;
+                       
+                               System.out.println( "Application " + applications.get( ind ).getName() + " ends in " + 
                                        applications.get( ind ).getExecutionTime() + " seconds." ) ;
-               }}
+                       }
+               }
        }
 
 
@@ -1311,6 +1339,52 @@ public class Server extends UnicastRemoteObject implements ServicesServer
                protected int getNb() { return nb ; }
        }
        
+       
+       private class RestartVM extends Thread
+       {
+               private ConnectedClient cc ;
+               
+               protected RestartVM( ConnectedClient _cc )
+               {
+                       cc = _cc ;
+               }
+                       
+               public void run()
+               {
+                       boolean error = false ;
+                       if( cc != null )
+                       {
+                               try {
+                                       if( cc.getStub().restartVMAfterCrash() != 0 )
+                                       {
+                                               System.err.println( "Problem while restarting VM on " + cc.getName() + "!" ) ;
+                                               error = true ;
+                                       }
+                               } catch( RemoteException e ) {
+                                       e.printStackTrace() ;
+                                       error = true ;
+                                       yield() ;
+                               }
+                       } else {
+                               System.err.println( "The client to restart is null!" ) ;
+                       }
+                       
+                       if( error )
+                       {
+                               cc.setFail( true ) ;
+                               
+                               try {
+                                       System.out.print( "Trying to stop the client ... " ) ;
+                                       cc.getStub().stop() ;
+                                       System.out.println( "successful client stop." );
+                               } catch( RemoteException e ) {
+                                       System.out.println( "unsuccessful client stop!" ) ;
+                                       e.printStackTrace() ;
+                               }
+                       }
+               }
+       }
+       
 }
 
 /** La programmation est un art, respectons ceux qui la pratiquent !! **/