From a5033b66e150c1f5a03008abb71408b1700337aa Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=C3=A9bastien=20Miqu=C3=A9e?= Date: Wed, 29 Jun 2011 17:40:42 +0200 Subject: [PATCH 1/1] Changing save mechanism. --- src/and/hpcvm/Client.java | 791 +++++++++++++++++++++++------- src/and/hpcvm/Server.java | 79 ++- src/and/hpcvm/ServicesClient.java | 18 +- src/and/hpcvm/VirtualMachine.java | 61 ++- 4 files changed, 735 insertions(+), 214 deletions(-) diff --git a/src/and/hpcvm/Client.java b/src/and/hpcvm/Client.java index 2f1e01b..66a43c3 100644 --- a/src/and/hpcvm/Client.java +++ b/src/and/hpcvm/Client.java @@ -15,6 +15,7 @@ import java.rmi.registry.LocateRegistry; import java.rmi.registry.Registry; import java.rmi.server.UnicastRemoteObject; import java.util.ArrayList; +import java.util.concurrent.Semaphore; public class Client extends UnicastRemoteObject implements ServicesClient @@ -43,6 +44,10 @@ public class Client extends UnicastRemoteObject implements ServicesClient private SaveProcess saveProcess; private int maxRetryVM ; private int timeRetryVM ; + private Semaphore sema ; + private boolean emergencyStop ; + private Process procSave ; + private boolean lastSaveOk ; protected Client() throws RemoteException @@ -50,12 +55,37 @@ public class Client extends UnicastRemoteObject implements ServicesClient super() ; } + @Override + public void emergencyStop() + { + emergencyStop = true ; + + // Saving processus stop + synchronized( saveProcess ) { + saveProcess.setStatus( false ) ; + try { + saveProcess.notifyAll() ; + } catch( Exception e ) {}} + + try { + procSave.destroy() ; + } catch( Exception e ) {} + + stopVM() ; + } @Override public int startVM( int _mode ) { if( machine != null && ! machine.getStatus().equalsIgnoreCase( "running" ) ) { + try { + sema.acquire() ; + } catch( InterruptedException e2 ) { + System.err.println( "Problem with semaphore acquiring!" ) ; + e2.printStackTrace() ; + } + boolean ret = true ; int retry = 0 ; @@ -78,39 +108,57 @@ public class Client extends UnicastRemoteObject implements ServicesClient working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name(), "nogui"} ; while( ret ) - try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; + { - if( p.exitValue() == 0 ) + if( emergencyStop ) { - System.out.println( "Virtual machine successfully started." ) ; - ret = false ; - } else { - System.err.println( "Virtual machine not started!" ) ; - ret = printProcessError( p ) ; - - if( ! ret ) + return 1 ; + } + + try { + Process p = Runtime.getRuntime().exec( command ) ; + p.waitFor() ; + + if( p.exitValue() == 0 ) { - return 1 ; + System.out.println( "Virtual machine successfully started." ) ; + ret = false ; } else { - retry++ ; - if( retry >= maxRetryVM ) + System.err.println( "Virtual machine not started!" ) ; + ret = printProcessError( p ) ; + + if( ! ret ) { - System.err.println( "Unable to start VM!" ) ; + sema.release() ; + return 1 ; + } else { + retry++ ; + if( retry >= maxRetryVM ) + { + System.err.println( "Unable to start VM!" ) ; + + sema.release() ; + + return 1 ; + } + System.out.println( "Retrying (" + retry + ") ... " ) ; + Thread.sleep( timeRetryVM ) ; } - System.out.println( "Retrying (" + retry + ") ... " ) ; - Thread.sleep( timeRetryVM ) ; } + } catch( IOException e ) { + System.err.println( "Error during execution of start command: " ) ; + e.printStackTrace() ; + + sema.release() ; + + return 1 ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + + sema.release() ; + return 1 ; } - } catch( IOException e ) { - System.err.println( "Error during execution of start command: " ) ; - e.printStackTrace() ; - return 1 ; - } catch( InterruptedException e ) { - e.printStackTrace() ; - return 1 ; } @@ -121,6 +169,11 @@ public class Client extends UnicastRemoteObject implements ServicesClient while( ! started ) { + if( emergencyStop ) + { + return 1 ; + } + /** Waiting for VM being started **/ try { Thread.sleep( wait_start ) ; @@ -141,71 +194,94 @@ public class Client extends UnicastRemoteObject implements ServicesClient fw.close() ; } catch( IOException e1 ) { e1.printStackTrace() ; + + sema.release() ; + return 1 ; } command = new String[]{ ushell, working_directory + "/testStarted.sh"} ; while( ret ) - try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; - - if( p.exitValue() == 0 ) + { + if( emergencyStop ) { - started = true ; + return 1 ; + } + + try { + Process p = Runtime.getRuntime().exec( command ) ; + p.waitFor() ; + + if( p.exitValue() == 0 ) + { + started = true ; // machine.setStatus( "running" ) ; // if( _mode == 0 ) // { // LocalHost.Instance().getServerStub().changeStatus( // LocalHost.Instance().getIP(), "running" ) ; // } - ret = false ; - } else { - System.err.println( "Error while checking if the VM is started!" ) ; + ret = false ; + } else { + System.err.println( "Error while checking if the VM is started!" ) ; // printProcessError( p.getErrorStream() ) ; - ret = printProcessError( p ) ; + ret = printProcessError( p ) ; - if( ! ret ) - { - return 1 ; - } else { - retry++ ; - if( retry >= maxRetryVM ) + if( ! ret ) { - System.err.println( "Unable to check VM!" ) ; + sema.release() ; + stopVM() ; return 1 ; + } else { + retry++ ; + if( retry >= maxRetryVM ) + { + System.err.println( "Unable to check VM!" ) ; + + sema.release() ; + stopVM() ; + return 1 ; + } + System.out.println( "Retrying (" + retry + ") ... " ) ; + Thread.sleep( timeRetryVM ) ; } - System.out.println( "Retrying (" + retry + ") ... " ) ; - Thread.sleep( timeRetryVM ) ; - } // wait_start = wait_start / 2 ; - count++ ; + count++ ; + } + } catch( IOException e ) { + e.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; } - } catch( IOException e ) { - e.printStackTrace() ; - } catch( InterruptedException e ) { - e.printStackTrace() ; - } - if( count == max_start_try && ! started ) - { - System.err.println( "Virtual machine not responding!!" ) ; + if( count == max_start_try && ! started ) + { + System.err.println( "Virtual machine not responding!!" ) ; - try { - LocalHost.Instance().getServerStub().changeStatus( - LocalHost.Instance().getIP(), "undefined" ) ; - } catch( RemoteException e ) { - e.printStackTrace() ; - } - - stopVM() ; + try { + LocalHost.Instance().getServerStub().changeStatus( + LocalHost.Instance().getIP(), "undefined" ) ; + } catch( RemoteException e ) { + e.printStackTrace() ; + } + + sema.release() ; - return 1 ; - } else { - try { - Thread.sleep( 3000 ) ; - } catch( InterruptedException e ) { - e.printStackTrace() ; + stopVM() ; + + return 1 ; + } else { + try { + Thread.sleep( 3000 ) ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + } } } } @@ -228,6 +304,9 @@ public class Client extends UnicastRemoteObject implements ServicesClient fw.close() ; } catch( IOException e1 ) { e1.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; } command = new String[]{ ushell, working_directory + "/sendHostIP.sh"} ; @@ -236,41 +315,57 @@ public class Client extends UnicastRemoteObject implements ServicesClient retry = 0 ; while( ret ) - try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; - - if( p.exitValue() == 0 ) + { + if( emergencyStop ) { - System.out.println( "VM received the host IP." ) ; - ret = false ; - } else { - System.err.println( "VM did not received the host IP!" ) ; -// printProcessError( p.getErrorStream() ) ; - ret = printProcessError( p ) ; - - if( ! ret ) + return 1 ; + } + + try { + Process p = Runtime.getRuntime().exec( command ) ; + p.waitFor() ; + + if( p.exitValue() == 0 ) { - return 1 ; + System.out.println( "VM received the host IP." ) ; + ret = false ; } else { - retry++ ; - if( retry >= maxRetryVM ) - { - System.err.println( "Unable to send information to VM!" ) ; + System.err.println( "VM did not received the host IP!" ) ; +// printProcessError( p.getErrorStream() ) ; + ret = printProcessError( p ) ; + if( ! ret ) + { + sema.release() ; stopVM() ; - return 1 ; + } else { + retry++ ; + if( retry >= maxRetryVM ) + { + System.err.println( "Unable to send information to VM!" ) ; + + sema.release() ; + stopVM() ; + + return 1 ; + } + System.out.println( "Retrying (" + retry + ") ... " ) ; + Thread.sleep( timeRetryVM ) ; } - System.out.println( "Retrying (" + retry + ") ... " ) ; - Thread.sleep( timeRetryVM ) ; } + } catch( IOException e ) { + System.err.println( "Error during execution of runScriptInGuest command: " ) ; + e.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; + } catch( InterruptedException e) { + e.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; } - } catch( IOException e ) { - System.err.println( "Error during execution of runScriptInGuest command: " ) ; - e.printStackTrace() ; - } catch( InterruptedException e) { - e.printStackTrace() ; } /** Sending the vm ip **/ @@ -291,6 +386,9 @@ public class Client extends UnicastRemoteObject implements ServicesClient fw.close() ; } catch( IOException e1 ) { e1.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; } command = new String[]{ ushell, working_directory + "/sendVmIP.sh"} ; @@ -299,43 +397,59 @@ public class Client extends UnicastRemoteObject implements ServicesClient retry = 0 ; while( ret ) - try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; - - if( p.exitValue() == 0 ) + { + if( emergencyStop ) { - System.out.println( "VM received its assigned IP." ) ; + return 1 ; + } + + try { + Process p = Runtime.getRuntime().exec( command ) ; + p.waitFor() ; + + if( p.exitValue() == 0 ) + { + System.out.println( "VM received its assigned IP." ) ; // return 0 ; - ret = false ; - machine.setStatus( "running" ) ; - } else { - System.err.println( "VM did not received its assigned IP!" ) ; + ret = false ; + machine.setStatus( "running" ) ; + } else { + System.err.println( "VM did not received its assigned IP!" ) ; // printProcessError( p.getErrorStream() ) ; - ret = printProcessError( p ) ; + ret = printProcessError( p ) ; - if( ! ret ) - { - return 1 ; - } else { - retry++ ; - if( retry >= maxRetryVM ) + if( ! ret ) { - System.err.println( "Unable to send information to VM!" ) ; - + sema.release() ; stopVM() ; - return 1 ; + } else { + retry++ ; + if( retry >= maxRetryVM ) + { + System.err.println( "Unable to send information to VM!" ) ; + + sema.release() ; + stopVM() ; + + return 1 ; + } + System.out.println( "Retrying (" + retry + ") ... " ) ; + Thread.sleep( timeRetryVM ) ; } - System.out.println( "Retrying (" + retry + ") ... " ) ; - Thread.sleep( timeRetryVM ) ; } + } catch( IOException e ) { + System.err.println( "Error during execution of runScriptInGuest command: " ) ; + e.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + sema.release() ; + stopVM() ; + return 1 ; } - } catch( IOException e ) { - System.err.println( "Error during execution of runScriptInGuest command: " ) ; - e.printStackTrace() ; - } catch( InterruptedException e ) { - e.printStackTrace() ; } if( _mode == 0 ) @@ -349,9 +463,13 @@ public class Client extends UnicastRemoteObject implements ServicesClient } } + sema.release() ; + return 0 ; } + sema.release() ; + return 1 ; } @@ -360,11 +478,19 @@ public class Client extends UnicastRemoteObject implements ServicesClient { if( machine != null && machine.getStatus().equalsIgnoreCase( "stopped" ) ) { + emergencyStop = false ; return 0 ; } if( machine != null && ! machine.getStatus().equalsIgnoreCase( "stopped" ) ) { + try { + sema.acquire() ; + } catch( InterruptedException e2 ) { + System.err.println( "Problem with semaphore acquiring!" ) ; + e2.printStackTrace() ; + } + System.out.print( "Stopping VM ... " ) ; boolean ret = true ; @@ -399,12 +525,14 @@ public class Client extends UnicastRemoteObject implements ServicesClient if( ! ret ) { + sema.release() ; return 1 ; } else { retry++ ; if( retry >= maxRetryVM ) { System.err.println( "Unable to stop VM!" ) ; + sema.release() ; return 1 ; } System.out.println( "Retrying (" + retry + ") ... " ) ; @@ -416,8 +544,12 @@ public class Client extends UnicastRemoteObject implements ServicesClient } catch( IOException e ) { System.err.println( "Error during execution of stop command: " ) ; e.printStackTrace() ; + sema.release() ; + return 1 ; } catch( InterruptedException e ) { e.printStackTrace() ; + sema.release() ; + return 1 ; } machine.setStatus( "stopped" ) ; @@ -443,25 +575,35 @@ public class Client extends UnicastRemoteObject implements ServicesClient if( p.exitValue() == 0 ) { System.out.println( "Successfully replaced the VMX file." ) ; + sema.release() ; + emergencyStop = false ; return 0 ; } else { System.err.println( "Unsuccessful replacement of the VMX file!" ) ; // printProcessError( p.getErrorStream() ) ; printProcessError( p ) ; - + sema.release() ; return 1 ; } } catch( IOException e ) { System.err.println( "Error during VMX file replacement: " ) ; e.printStackTrace() ; + sema.release() ; + return 1 ; } catch( InterruptedException e ) { e.printStackTrace() ; + sema.release() ; + return 1 ; } } else { + sema.release() ; + emergencyStop = false ; return 0 ; } } + sema.release() ; + return 1 ; } @@ -475,6 +617,13 @@ public class Client extends UnicastRemoteObject implements ServicesClient if( machine != null && ! machine.getStatus().equalsIgnoreCase( "suspended" ) ) { + try { + sema.acquire() ; + } catch( InterruptedException e2 ) { + System.err.println( "Problem with semaphore acquiring!" ) ; + e2.printStackTrace() ; + } + System.out.print( "Suspending VM ... " ) ; boolean ret = true ; @@ -511,12 +660,17 @@ public class Client extends UnicastRemoteObject implements ServicesClient if( ! ret ) { + sema.release() ; + return 1 ; } else { retry++ ; if( retry >= maxRetryVM ) { System.err.println( "Unable to suspend VM!" ) ; + + sema.release() ; + return 1 ; } System.out.println( "Retrying (" + retry + ") ... " ) ; @@ -544,9 +698,13 @@ public class Client extends UnicastRemoteObject implements ServicesClient } } + sema.release() ; + return 0 ; } - + + sema.release() ; + return 1 ; } @@ -825,25 +983,69 @@ public class Client extends UnicastRemoteObject implements ServicesClient if( suspendVM( 1 ) == 1 ) { error = true ; errorVM = true ; } + if( ! lastSaveOk ) + { + System.out.println( "Deletion of last nok archive ... " ) ; + + command = new String[]{ "/bin/rm", "-rf", + working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar", + working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar.gz", } ; + + try { + procSave = Runtime.getRuntime().exec( command ) ; + procSave.waitFor() ; + + if( procSave.exitValue() == 0 ) + { + System.out.println( "Last nok archive successfully deleted." ) ; + } else { + System.err.println( "Last nok archive not deleted!" ) ; +// printProcessError( p.getErrorStream() ) ; + printProcessError( procSave ) ; + + error = true ; + } + } catch( IOException e ) { + System.err.println( "Error during nok archive deletion command: " ) ; + error = true ; + e.printStackTrace() ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + error = true ; + } + } + if( ! error ) { System.out.print( "Creation of the archive ... " ) ; /** Archive creation **/ - command = new String[]{ "/bin/tar", "-cvf", - working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar", - working_directory + "/" + machine.getDirectory() } ; - +// command = new String[]{ "/bin/tar", "-cf", +// working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar", +// working_directory + "/" + machine.getDirectory() } ; +// + command = new String[]{ "/bin/tar", "-cf", + machine.getName() + "_new_" + machine.getComputationId() + ".tar", + machine.getDirectory(), "-C", working_directory } ; + + if( emergencyStop ) + { + return 1 ; + } + try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; + procSave = Runtime.getRuntime().exec( command ) ; + procSave.waitFor() ; - if( p.exitValue() == 0 ) + if( procSave.exitValue() == 0 ) { System.out.println( "Archive successfully created." ) ; + + lastSaveOk = false ; + } else { System.err.println( "Archive not created!" ) ; // printProcessError( p.getErrorStream() ) ; - printProcessError( p ) ; + printProcessError( procSave ) ; error = true ; } @@ -864,17 +1066,22 @@ public class Client extends UnicastRemoteObject implements ServicesClient working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar" } ; + if( emergencyStop ) + { + return 1 ; + } + try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; + procSave = Runtime.getRuntime().exec( command ) ; + procSave.waitFor() ; - if( p.exitValue() == 0 ) + if( procSave.exitValue() == 0 ) { System.out.println( "Archive successfully compressed." ) ; } else { System.err.println( "Archive not compressed!" ) ; // printProcessError( p.getErrorStream() ) ; - printProcessError( p ) ; + printProcessError( procSave ) ; error = true ; } @@ -894,10 +1101,10 @@ public class Client extends UnicastRemoteObject implements ServicesClient /** Sending ok save signal **/ - if( ! error ) - { +// if( ! error ) +// { if( sendSaveOkVM() == 1 ) { error = true ; } - } +// } // /** Deletion of the tar archive **/ // if( ! error ) @@ -929,32 +1136,45 @@ public class Client extends UnicastRemoteObject implements ServicesClient // } // } - saveName = working_directory + "/" + machine.getName() + "_new_" - + machine.getComputationId() + ".tar.gz" ; + saveName = machine.getName() + "_new_" + machine.getComputationId() + ".tar.gz" ; /** Sending save to neighbor **/ if( ! error ) { - ArrayList sn = machine.getSaveNeighbors() ; + if( emergencyStop ) + { + return 1 ; + } + + boolean ok = true ; + + ArrayList sn = machine.getSaveNeighbors() ; for( int i = 0 ; i < sn.size() ; i++ ) { System.out.print( "Sending save to " + sn.get( i ) + " ... " ) ; - command = new String[]{ "/usr/bin/scp", saveName, - sn.get( i ) + ":" - + working_directory } ; + try { + command = new String[]{ "/usr/bin/scp", working_directory + "/" + saveName, + sn.get( i ).getIPHost() + ":" + + sn.get( i ).getWorkingDirectory() } ; + } catch( RemoteException e1 ) { + System.err.println( "Unable to retrieve save neighbor information!" ) ; + e1.printStackTrace() ; + ok = false ; + } + if( ok ) try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; + procSave = Runtime.getRuntime().exec( command ) ; + procSave.waitFor() ; - if( p.exitValue() == 0 ) + if( procSave.exitValue() == 0 ) { System.out.println( "Archive successfully sent." ) ; } else { System.err.println( "Archive not sent!" ) ; // printProcessError( p.getErrorStream() ) ; - printProcessError( p ) ; + printProcessError( procSave ) ; error = true ; } @@ -1034,6 +1254,9 @@ public class Client extends UnicastRemoteObject implements ServicesClient wait_start = 15000 ; max_start_try = 10 ; + sema = new Semaphore( 1 ) ; + emergencyStop = false ; + maxRetryVM = 10 ; timeRetryVM = 10000 ; @@ -1041,6 +1264,7 @@ public class Client extends UnicastRemoteObject implements ServicesClient date_last_save = 0 ; isRestartedSave = false ; + lastSaveOk = true ; /** Connection to server **/ try { @@ -1444,14 +1668,15 @@ public class Client extends UnicastRemoteObject implements ServicesClient @Override public void saveOk() { - String save_name = "VmTest_" + machine.getComputationId() + + String save_name = machine.getName() + "_" + machine.getComputationId() + "_last.tar.gz" ; - String save_new = working_directory + "/" + machine.getName() + "_new_" + String save_new = machine.getName() + "_new_" + machine.getComputationId() + ".tar.gz" ; String[] command = new String[]{ "/bin/mv", - save_new, save_name } ; + working_directory + "/" + save_new, + working_directory + "/" + save_name } ; try { Process p = Runtime.getRuntime().exec( command ) ; @@ -1459,8 +1684,8 @@ public class Client extends UnicastRemoteObject implements ServicesClient if( p.exitValue() == 0 ) { - System.out.println( "Last save OK" ) ; machine.setSave_last( save_name ) ; + System.out.println( "Last save OK" ) ; } else { System.err.println( "Last save NOK!" ) ; System.err.println( "Error: " ) ; @@ -1473,24 +1698,187 @@ public class Client extends UnicastRemoteObject implements ServicesClient } catch( InterruptedException e ) { e.printStackTrace() ; } + + // Changing on save neighbors + for( int i = 0 ; i < machine.getSaveNeighbors().size() ; i++ ) + { + try { + machine.getSaveNeighbors().get( i ).changeSaveName( save_new, save_name, machine.getComputationId() ) ; + } catch( RemoteException e ) { + try { + System.err.println( "Unable to change save name on " + machine.getSaveNeighbors().get( i ).getName() + "!" ) ; + } catch( RemoteException e1 ) { + System.err.println( "Unable to change save name on an unamed save neighbor!" ) ; + e1.printStackTrace() ; + } + e.printStackTrace() ; + } + } + + // Ok here + lastSaveOk = true ; + } + + + public void changeSaveName( String _n1, String _n2, int _id ) + { + if( _n1 != null && _n1.length() > 0 ) + { + System.out.println( "Changing save name for processus " + _id + " ... " ) ; + + String[] command = new String[]{ "/bin/mv", + working_directory + "/" + _n1, + working_directory + "/" + _n2 } ; + + try { + Process p = Runtime.getRuntime().exec( command ) ; + p.waitFor() ; + + if( p.exitValue() == 0 ) + { + System.out.println( "Change save name OK" ) ; + } else { + System.err.println( "Change save name NOK!" ) ; + System.err.println( "Error: " ) ; +// printProcessError( p.getErrorStream() ) ; + printProcessError( p ) ; + } + } catch( IOException e ) { + System.err.println( "Error during save renaming:" ) ; + e.printStackTrace() ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + } + } } + @Override - public void setSavingNeighbor( String _sn ) throws RemoteException + public void setSavingNeighbor( ServicesClient _sn ) { - if( _sn != null && _sn.length() > 0 ) + if( _sn != null ) { - ArrayList as = new ArrayList() ; + ArrayList as = new ArrayList() ; as.add( _sn ) ; - System.out.println( "Save neighbor: " + _sn ) ; + try { + System.out.println( "Save neighbor: " + _sn.getName() ) ; + } catch( RemoteException e ) { + System.err.println( "Unable to retrieve the name of the save neighbor!" ) ; + e.printStackTrace() ; + } machine.setSaveNeighbors( as ) ; } } + + + @Override + public void setSavingNeighbors( ArrayList _sn ) + { + if( _sn != null && _sn.size() > 0 ) + { + System.out.print( "Save neighbors: " ) ; + for( int i = 0 ; i < _sn.size() ; i++ ) + { + try { + System.out.print( _sn.get( i ).getName() ) ; + } catch( RemoteException e ) { + System.err.println( "Unable to retrieve the name of a save neighbor!" ) ; + e.printStackTrace() ; + } + + if( i != _sn.size() - 1 ) + { + System.out.print( ", " ) ; + } else { + System.out.println( "." ) ; + } + } + + machine.setSaveNeighbors( _sn ) ; + } + } + + @Override + public void addSavingNeighbor( ServicesClient _sn ) + { + if( _sn != null ) + { + try { + System.out.println( "Adding save neighbor: " + _sn.getName() ) ; + } catch( RemoteException e ) { + System.err.println( "Unable to retrieve the name of a save neighbor!" ) ; + e.printStackTrace() ; + } + + machine.getSaveNeighbors().add( _sn ) ; + } + } + + + @Override + public void addSavingNeighbors( ArrayList _sn ) + { + if( _sn != null && _sn.size() > 0 ) + { + System.out.print( "Adding save neighbors: " ) ; + for( int i = 0 ; i < _sn.size() ; i++ ) + { + try { + System.out.print( _sn.get( i ).getName() ) ; + } catch( RemoteException e ) { + System.err.println( "Unable to retrieve the name of a save neighbor!" ) ; + e.printStackTrace() ; + } + + if( i != _sn.size() - 1 ) + { + System.out.print( ", " ) ; + } else { + System.out.println( "." ) ; + } + + machine.getSaveNeighbors().add( _sn.get( i ) ) ; + } + } + } + + + @Override + public void replaceSavingNeighbor( ServicesClient _old, ServicesClient _new ) + { + System.out.print( "Replacing a save neihgbor ... " ) ; + if( _old != null && _new != null ) + { + int i = 0 ; + + for( i = 0 ; i < machine.getSaveNeighbors().size() ; i++ ) + { + try { + if( machine.getSaveNeighbors().get( i ).getIPHost().equalsIgnoreCase( _old.getIPVM() ) ) + { + machine.getSaveNeighbors().set( i, _new ) ; + System.out.println( "Save neighbor successfully changed." ) ; + break ; + } + } catch( RemoteException e ) { + System.err.println( "Unable to retrieve the IP address of a save neighbor!" ) ; + e.printStackTrace() ; + } + } + + if( i == machine.getSaveNeighbors().size() ) + { + System.out.println( "I am not concerned by the modification." ) ; + } + } + } + + @Override - public int retrieveSave( String _saveName ) throws RemoteException + public int retrieveSave( String _saveName ) { if( _saveName != null ) { @@ -1498,41 +1886,74 @@ public class Client extends UnicastRemoteObject implements ServicesClient { machine.setSave_last( _saveName ) ; } else { - System.err.println( "I have no saving neighbor to contact!!" ) ; + System.err.println( "I have no save to retrieve!!" ) ; return 1 ; } // TODO NEIGHBORS !!!! //System.out.println( "!!!! NEIGHBORS !!!!!" ) ; - System.out.print( "Retrieving a save on " + machine.getSaveNeighbors().get( 0 ) + " ..." ) ; - String[] command = new String[]{ "/usr/bin/scp", - machine.getSaveNeighbors().get( 0 ) + ":" - + working_directory + "/" + machine.getSave_last(), - working_directory } ; - - try { - Process p = Runtime.getRuntime().exec( command ) ; - p.waitFor() ; + boolean ok = false ; + boolean go = true ; + int i = 0 ; - if( p.exitValue() == 0 ) + while( ! ok && i < machine.getSaveNeighbors().size() ) + { + try { + System.out.print( "Retrieving a save on " + machine.getSaveNeighbors().get( 0 ).getName() + " ... " ) ; + } catch( RemoteException e1 ) { + System.err.println( "Unable to retrieve the name of a save neighbor!" ) ; + e1.printStackTrace() ; + } + + String command[] = {} ; + + try { - System.out.println( "Archive successfully retrieved." ) ; - isRestartedSave = true ; - return 0 ; - } else { - System.err.println( "Archive not retrieved!" ) ; - System.err.println( "Error: " ) ; -// printProcessError( p.getErrorStream() ) ; - printProcessError( p ) ; + command = new String[]{ "/usr/bin/scp", + machine.getSaveNeighbors().get( i ).getIPHost() + ":" + + machine.getSaveNeighbors().get( i ).getWorkingDirectory() + "/" + + machine.getSave_last(), + working_directory } ; + } catch( RemoteException e1 ) { + System.err.println( "Unable to retrieve the name of a save neighbor!" ) ; + e1.printStackTrace() ; + go = false ; + } + + if( go ) + try { + Process p = Runtime.getRuntime().exec( command ) ; + p.waitFor() ; + + if( p.exitValue() == 0 ) + { + System.out.println( "Archive successfully retrieved." ) ; + isRestartedSave = true ; + ok = true ; + } else { + System.err.println( "Archive not retrieved!" ) ; + System.err.println( "Error: " ) ; +// printProcessError( p.getErrorStream() ) ; + printProcessError( p ) ; +// error = true ; + } + } catch( IOException e ) { + System.err.println( "Error during archive retrieve command: " ) ; + e.printStackTrace() ; +// error = true ; + } catch( InterruptedException e ) { + e.printStackTrace() ; // error = true ; } - } catch( IOException e ) { - System.err.println( "Error during archive retrieve command: " ) ; - e.printStackTrace() ; -// error = true ; - } catch( InterruptedException e ) { - e.printStackTrace() ; -// error = true ; + + i++ ; + } + + if( ok ) + { + return 0 ; + } else { + System.err.println( "Unable to retrieve a save archive from any neighbor!" ) ; } } @@ -1563,6 +1984,12 @@ public class Client extends UnicastRemoteObject implements ServicesClient } + public String getWorkingDirectory() + { + return working_directory ; + } + + private class SaveProcess { boolean status ; diff --git a/src/and/hpcvm/Server.java b/src/and/hpcvm/Server.java index dafd9c5..b776409 100644 --- a/src/and/hpcvm/Server.java +++ b/src/and/hpcvm/Server.java @@ -82,14 +82,14 @@ public class Server extends UnicastRemoteObject implements ServicesServer { private ConnectedClient client ; private boolean save_status ; - private String save_neighbor ; + private ArrayList save_neighbor ; private String lastSaveName ; ComputingClient( ConnectedClient cl ) { client = cl ; save_status = false ; - save_neighbor = "none" ; + save_neighbor = new ArrayList() ; lastSaveName = "none" ; } @@ -99,11 +99,13 @@ public class Server extends UnicastRemoteObject implements ServicesServer protected void setSaveStatus( boolean _status ) { save_status = _status ; } - protected void setSaveNeighbor( String _sn ) + protected void setSaveNeighbor( ServicesClient _sn ) { - if( _sn != null && ! _sn.isEmpty() ) + if( _sn != null ) { - save_neighbor = _sn ; + save_neighbor.set( 0, _sn ) ; + + System.out.println( "My save neighbor is " + _sn ) ; try { client.getStub().setSavingNeighbor( _sn ) ; @@ -115,7 +117,15 @@ public class Server extends UnicastRemoteObject implements ServicesServer } } - protected String getSaveNeighbor() { return save_neighbor ; } + protected ServicesClient getSaveNeighbor() + { + if( save_neighbor.isEmpty() ) + { + return null ; + } else { + return save_neighbor.get( 0 ) ; + } + } public void setLastSave( String _saveName ) { @@ -400,16 +410,13 @@ public class Server extends UnicastRemoteObject implements ServicesServer @Override public void run() { - synchronized( counter ){ - counter.inc() ;} - System.out.println("ici"); if( cl != null && cl.getStatus().equalsIgnoreCase( "running" ) || cl.getStatus().equalsIgnoreCase( "saving" ) ) { - System.out.println("ok"); ComputingClient cc = cl.getComputingClient() ; + ServicesClient dead = cc.getClient().getStub() ; String ipDead = cc.getClient().getIP() ; - + boolean ok = false ; for( int i = 0 ; i < clients.size() ; i++ ) @@ -434,19 +441,20 @@ public class Server extends UnicastRemoteObject implements ServicesServer } else { System.out.println( "Trying to replace " + cc.getClient().getName() + " with " + clients.get(i).getName() + " ... " ) ; - + + String save_name = computingClients.get( pos ).getLastSave() ; + ComputingClient ccl = new ComputingClient( clients.get(i) ) ; clients.get( i ).setComputingClient( ccl ) ; - String sn = computingClients.get( pos ).getSaveNeighbor() ; + ServicesClient sn = computingClients.get( pos ).getSaveNeighbor() ; ccl.setSaveNeighbor( sn ) ; computingClients.set( pos, ccl ) ; -// computingClients.get( pos ).setSaveNeighbor( sn ) ; - + int res = 1 ; try { res = computingClients.get( pos ).getClient().getStub(). - retrieveSave( computingClients.get(i).getLastSave() ) ; + retrieveSave( save_name ) ; } catch( RemoteException e ) { System.err.println( "Unable to indicate to client to retrieve last save!" ) ; e.printStackTrace() ; @@ -486,6 +494,19 @@ public class Server extends UnicastRemoteObject implements ServicesServer if( ok ) { + for( int k = 0 ; k < computingClients.size() ; k++ ) + { + try { + computingClients.get( i ).getClient().getStub(). + replaceSavingNeighbor( dead, clients.get( i ).getStub() ) ; + } catch( RemoteException e ) { + System.err.println( "Unable to inform " + computingClients.get( k ).getClient().getName() + + " of the replacement of a save neighbor!" ) ; + e.printStackTrace() ; + } + } + + System.out.println( "Dead client successfully replaced." ) ; // restart vms break ; @@ -550,6 +571,9 @@ public class Server extends UnicastRemoteObject implements ServicesServer // break ; // } // } + synchronized( counter ){ + counter.inc() ;} + new Server.FaultManager( cl ).start() ; nb_disconnections_computing++ ; @@ -579,7 +603,23 @@ public class Server extends UnicastRemoteObject implements ServicesServer if( nb_disconnections_computing > 0 ) { - System.out.println( "I will redeploy save and restart VMs ..." ) ; + System.out.println( "Sending emergency stop signal to all computing nodes ... " ) ; + + for( int i = 0 ; i < clients.size() ; i++ ) + { + if( clients.get( i ).getStatus().equalsIgnoreCase( "running" ) + || clients.get( i ).getStatus().equalsIgnoreCase( "saving" ) ) + { + try { + clients.get( i ).getStub().emergencyStop() ; + } catch( RemoteException e ) { + System.err.println( "Unable to invoke emergency stop signal on " + clients.get( i ).getName() ) ; + e.printStackTrace() ; + } + } + } + + System.out.println( "I will redeploy save and restart VMs ... " ) ; synchronized( counter ) { @@ -591,6 +631,7 @@ public class Server extends UnicastRemoteObject implements ServicesServer while( counter.getNb() != 0 ) { try { + System.out.println( "### WAITING counter ###" ) ; counter.wait() ; // !!!!! synchro } catch( InterruptedException e ) { e.printStackTrace() ; @@ -724,7 +765,7 @@ public class Server extends UnicastRemoteObject implements ServicesServer { System.err.println( "Problem in ComputingClients list!" ) ; } else { - computingClients.get( index ).setSaveNeighbor( computingClients.get( index2 ).getClient().getIP() ) ; + computingClients.get( index ).setSaveNeighbor( computingClients.get( index2 ).getClient().getStub() ) ; } } else { index = computingClients.indexOf( tmp.get( i ) ) ; @@ -734,7 +775,7 @@ public class Server extends UnicastRemoteObject implements ServicesServer { System.err.println( "Problem in ComputingClients list!" ) ; } else { - computingClients.get( index ).setSaveNeighbor( computingClients.get( index2 ).getClient().getIP() ) ; + computingClients.get( index ).setSaveNeighbor( computingClients.get( index2 ).getClient().getStub() ) ; } } } diff --git a/src/and/hpcvm/ServicesClient.java b/src/and/hpcvm/ServicesClient.java index d5535dd..6e4945c 100644 --- a/src/and/hpcvm/ServicesClient.java +++ b/src/and/hpcvm/ServicesClient.java @@ -2,10 +2,12 @@ package and.hpcvm ; import java.rmi.Remote; import java.rmi.RemoteException; +import java.util.ArrayList; public interface ServicesClient extends Remote { - + public void emergencyStop() throws RemoteException ; + public int startVM( int _mode ) throws RemoteException ; public int stopVM() throws RemoteException ; @@ -28,13 +30,25 @@ public interface ServicesClient extends Remote public String getIPVM() throws RemoteException ; + public String getWorkingDirectory() throws RemoteException ; + public void setIPVM( String _ipVM ) throws RemoteException ; public String getName() throws RemoteException ; public void saveOk() throws RemoteException ; - public void setSavingNeighbor( String _sn ) throws RemoteException ; + public void changeSaveName( String _n1, String _n2, int _id ) throws RemoteException ; + + public void setSavingNeighbor( ServicesClient _sn ) throws RemoteException ; + + public void setSavingNeighbors( ArrayList _sn ) throws RemoteException ; + + public void addSavingNeighbor( ServicesClient _sn ) throws RemoteException ; + + public void addSavingNeighbors( ArrayList _sn ) throws RemoteException ; + + public void replaceSavingNeighbor( ServicesClient _old, ServicesClient _new ) throws RemoteException ; public int retrieveSave( String _saveName ) throws RemoteException ; diff --git a/src/and/hpcvm/VirtualMachine.java b/src/and/hpcvm/VirtualMachine.java index 1d7075d..1cba1f1 100644 --- a/src/and/hpcvm/VirtualMachine.java +++ b/src/and/hpcvm/VirtualMachine.java @@ -21,7 +21,7 @@ public class VirtualMachine private Status status ; private int computation_id ; private String working_directory ; - private ArrayList save_neighbors ; + private ArrayList save_neighbors ; private String clientInVM ; private String vm_user ; private String vm_user_passwd ; @@ -43,28 +43,27 @@ public class VirtualMachine status = new Status() ; status.setStatus( "stopped" ) ; computation_id = -1 ; - save_neighbors = new ArrayList() ; - save_neighbors.add( "127.0.0.1" ) ; + save_neighbors = new ArrayList() ; clientInVM = "/home/mpi/InGuest" ; vm_user = "mpi" ; vm_user_passwd = "mpi" ; } - public ArrayList getSaveNeighbors() { return save_neighbors ; } + public ArrayList getSaveNeighbors() { return save_neighbors ; } @SuppressWarnings("unchecked") - public void setSaveNeighbors( ArrayList _sn ) + public void setSaveNeighbors( ArrayList _sn ) { if( _sn != null ) { - save_neighbors = (ArrayList) _sn.clone() ; + save_neighbors = (ArrayList) _sn.clone() ; } } - public void addSaveNeighbor( String _sn ) + public void addSaveNeighbor( ServicesClient _sn ) { - if( _sn != null && _sn.length() > 0 ) + if( _sn != null ) { save_neighbors.add( _sn ) ; } @@ -200,6 +199,7 @@ public class VirtualMachine } public void setSave_last(String save_last) { + System.out.println( "Save name: " + save_last ) ; this.save_last = save_last; } @@ -234,12 +234,48 @@ public class VirtualMachine public int deployLastSave() { - System.out.print( "Deploying the last save ..." ) ; + System.out.print( "Removing current VM ... " ) ; - String[] command = new String[] { "/bin/tar", "-xzvf", + String[] command = new String[] { "/bin/rm", "-rf", + working_directory + "/" + directory } ; + Process pr = null ; + try { + pr = Runtime.getRuntime().exec( command ) ; +// synchronized( pr ){ + pr.waitFor() ; //;} + } catch( IOException e ) { + System.err.println( "Error while removing current VM!" ) ; + e.printStackTrace() ; + return 1 ; + } catch( InterruptedException e ) { + e.printStackTrace() ; + return 1 ; + } + + if( pr.exitValue() == 0 ) + { + System.out.println( "Successful deletion of current VM." ) ; + } else { + System.err.println( "Error: " + pr.exitValue() ) ; + BufferedReader b = new BufferedReader( new InputStreamReader( pr.getErrorStream() ) ) ; + + String l ; + try { + while( (l = b.readLine()) != null ) + { + System.err.println( l ) ; + } + } catch( IOException e ) { + e.printStackTrace() ; + } + } + + System.out.print( "Deploying the last save ... " ) ; + + command = new String[] { "/bin/tar", "-xzf", working_directory + "/" + save_last, "-C", working_directory } ; - Process pr = null ; + pr = null ; try { pr = Runtime.getRuntime().exec( command ) ; // synchronized( pr ){ @@ -247,10 +283,13 @@ public class VirtualMachine } catch( IOException e ) { System.err.println( "Error while deploying the last secure save!" ) ; e.printStackTrace() ; + return 1 ; } catch( InterruptedException e ) { e.printStackTrace() ; + return 1 ; } + if( pr.exitValue() == 0 ) { System.out.println( "Successful extraction of the save archive." ) ; -- 2.20.1