3 import java.io.BufferedReader;
5 import java.io.FileWriter;
6 import java.io.IOException;
7 import java.io.InputStreamReader;
8 import java.net.MalformedURLException;
9 import java.net.ServerSocket;
10 import java.net.Socket;
11 import java.rmi.Naming;
12 import java.rmi.NotBoundException;
13 import java.rmi.RemoteException;
14 import java.rmi.registry.LocateRegistry;
15 import java.rmi.registry.Registry;
16 import java.rmi.server.UnicastRemoteObject;
17 import java.util.ArrayList;
18 import java.util.concurrent.Semaphore;
21 public class Client extends UnicastRemoteObject implements ServicesClient
23 private static final long serialVersionUID = 1L ;
25 private String VmRunCommand ;
26 // private String VmRunCommandArg ;
27 private VirtualMachine machine ;
28 private String server_ip ;
29 private int server_port ;
30 private int client_port ;
31 private int dialog_port ;
32 private ServicesServer serverStub ;
33 private ServicesClient myStub ;
34 private PingServer pingServer ;
35 private DialogVMServer dialogVmServer ;
36 private ServerSocket serverSocket ;
37 private String ushell ;
38 private String working_directory ;
39 private int wait_start ;
40 private int max_start_try ;
41 private boolean isRestartedSave ;
42 private long save_interleave ;
43 private long date_last_save ;
44 private SaveProcess saveProcess;
45 private int maxRetryVM ;
46 private int timeRetryVM ;
47 private Semaphore sema ;
48 private boolean emergencyStop ;
49 private Process procSave ;
50 private boolean lastSaveOk ;
53 protected Client() throws RemoteException
59 public void emergencyStop()
61 emergencyStop = true ;
63 // Saving processus stop
64 synchronized( saveProcess ) {
65 saveProcess.setStatus( false ) ;
67 saveProcess.notifyAll() ;
68 } catch( Exception e ) {}}
72 } catch( Exception e ) {}
78 public int startVM( int _mode )
80 if( machine != null && ! machine.getStatus().equalsIgnoreCase( "running" ) )
84 } catch( InterruptedException e2 ) {
85 System.err.println( "Problem with semaphore acquiring!" ) ;
86 e2.printStackTrace() ;
89 // Checking first start
90 if( machine.checkVmx() == 1 )
99 System.out.print( "Starting VM ... " ) ;
101 machine.setStatus( "undefined" ) ;
105 LocalHost.Instance().getServerStub().changeStatus(
106 LocalHost.Instance().getIP(), "undefined" ) ;
107 } catch( RemoteException e ) {
108 System.err.println( "Unable to inform the server of the VM status!" ) ;
109 e.printStackTrace() ;
113 String[] command = new String[]{VmRunCommand, "-T", "player", "start",
114 working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name(), "nogui"} ;
125 Process p = Runtime.getRuntime().exec( command ) ;
128 if( p.exitValue() == 0 )
130 System.out.println( "Virtual machine successfully started." ) ;
133 System.err.println( "Virtual machine not started!" ) ;
134 ret = printProcessError( p ) ;
143 if( retry >= maxRetryVM )
145 System.err.println( "Unable to start VM!" ) ;
151 System.out.println( "Retrying (" + retry + ") ... " ) ;
152 Thread.sleep( timeRetryVM ) ;
155 } catch( IOException e ) {
156 System.err.println( "Error during execution of start command: " ) ;
157 e.printStackTrace() ;
162 } catch( InterruptedException e ) {
163 e.printStackTrace() ;
171 boolean started = false ;
183 /** Waiting for VM being started **/
185 Thread.sleep( wait_start ) ;
186 } catch( InterruptedException e ) {
187 e.printStackTrace() ;
190 String cmd2 = VmRunCommand + " -T " + " player " + " -gu " + machine.getVmUser() +
191 " -gp " + machine.getVmUserPasswd() + " runScriptInGuest " +
192 working_directory + "/" + machine.getDirectory()
193 + "/" + machine.getVmx_name() + " " + ushell +
194 " \"echo ok\"" ;// + " -noWait " ;
197 FileWriter fw = new FileWriter( new File( working_directory + "/testStarted.sh" ) ) ;
201 } catch( IOException e1 ) {
202 e1.printStackTrace() ;
208 command = new String[]{ ushell, working_directory + "/testStarted.sh"} ;
218 Process p = Runtime.getRuntime().exec( command ) ;
221 if( p.exitValue() == 0 )
224 // machine.setStatus( "running" ) ;
227 // LocalHost.Instance().getServerStub().changeStatus(
228 // LocalHost.Instance().getIP(), "running" ) ;
232 System.err.println( "Error while checking if the VM is started!" ) ;
233 // printProcessError( p.getErrorStream() ) ;
234 ret = printProcessError( p ) ;
243 if( retry >= maxRetryVM )
245 System.err.println( "Unable to check VM!" ) ;
251 System.out.println( "Retrying (" + retry + ") ... " ) ;
252 Thread.sleep( timeRetryVM ) ;
254 // wait_start = wait_start / 2 ;
257 } catch( IOException e ) {
258 e.printStackTrace() ;
262 } catch( InterruptedException e ) {
263 e.printStackTrace() ;
269 if( count == max_start_try && ! started )
271 System.err.println( "Virtual machine not responding!!" ) ;
274 LocalHost.Instance().getServerStub().changeStatus(
275 LocalHost.Instance().getIP(), "undefined" ) ;
276 } catch( RemoteException e ) {
277 e.printStackTrace() ;
287 Thread.sleep( 3000 ) ;
288 } catch( InterruptedException e ) {
289 e.printStackTrace() ;
295 /** Sending the host ip **/
297 System.out.print( "Sending host IP to VM ... " ) ;
299 String cmd2 = VmRunCommand + " -T " + " player " + " -gu " + machine.getVmUser() +
300 " -gp " + machine.getVmUserPasswd() + " runScriptInGuest " +
301 working_directory + "/" + machine.getDirectory()
302 + "/" + machine.getVmx_name() + " " + ushell +
303 " \"echo " + LocalHost.Instance().getIP() + " " + dialog_port
304 + " > /tmp/vm_host_IP\"" ;
307 FileWriter fw = new FileWriter( new File( working_directory + "/sendHostIP.sh" ) ) ;
311 } catch( IOException e1 ) {
312 e1.printStackTrace() ;
318 command = new String[]{ ushell, working_directory + "/sendHostIP.sh"} ;
331 Process p = Runtime.getRuntime().exec( command ) ;
334 if( p.exitValue() == 0 )
336 System.out.println( "VM received the host IP." ) ;
339 System.err.println( "VM did not received the host IP!" ) ;
340 // printProcessError( p.getErrorStream() ) ;
341 ret = printProcessError( p ) ;
350 if( retry >= maxRetryVM )
352 System.err.println( "Unable to send information to VM!" ) ;
359 System.out.println( "Retrying (" + retry + ") ... " ) ;
360 Thread.sleep( timeRetryVM ) ;
363 } catch( IOException e ) {
364 System.err.println( "Error during execution of runScriptInGuest command: " ) ;
365 e.printStackTrace() ;
369 } catch( InterruptedException e) {
370 e.printStackTrace() ;
377 /** Sending the vm ip **/
379 System.out.print( "Sending its IP to VM ... " ) ;
381 cmd2 = VmRunCommand + " -T " + " player " + " -gu " + machine.getVmUser() +
382 " -gp " + machine.getVmUserPasswd() + " runScriptInGuest " +
383 working_directory + "/" + machine.getDirectory()
384 + "/" + machine.getVmx_name() + " " + ushell +
385 " \"echo " + machine.getIp()
386 + " > /tmp/vm_IP\"" ;
389 FileWriter fw = new FileWriter( new File( working_directory + "/sendVmIP.sh" ) ) ;
393 } catch( IOException e1 ) {
394 e1.printStackTrace() ;
400 command = new String[]{ ushell, working_directory + "/sendVmIP.sh"} ;
413 Process p = Runtime.getRuntime().exec( command ) ;
416 if( p.exitValue() == 0 )
418 System.out.println( "VM received its assigned IP." ) ;
421 machine.setStatus( "running" ) ;
423 System.err.println( "VM did not received its assigned IP!" ) ;
424 // printProcessError( p.getErrorStream() ) ;
425 ret = printProcessError( p ) ;
434 if( retry >= maxRetryVM )
436 System.err.println( "Unable to send information to VM!" ) ;
443 System.out.println( "Retrying (" + retry + ") ... " ) ;
444 Thread.sleep( timeRetryVM ) ;
447 } catch( IOException e ) {
448 System.err.println( "Error during execution of runScriptInGuest command: " ) ;
449 e.printStackTrace() ;
453 } catch( InterruptedException e ) {
454 e.printStackTrace() ;
464 LocalHost.Instance().getServerStub().changeStatus(
465 LocalHost.Instance().getIP(), "running" ) ;
466 } catch (RemoteException e) {
467 System.err.println( "Unable to inform the server of the VM started status!" ) ;
485 if( machine != null && machine.getStatus().equalsIgnoreCase( "stopped" ) )
487 emergencyStop = false ;
491 if( machine != null && ! machine.getStatus().equalsIgnoreCase( "stopped" ) )
495 } catch( InterruptedException e2 ) {
496 System.err.println( "Problem with semaphore acquiring!" ) ;
497 e2.printStackTrace() ;
500 System.out.print( "Stopping VM ... " ) ;
505 machine.setStatus( "undefined" ) ;
507 LocalHost.Instance().getServerStub().changeStatus(
508 LocalHost.Instance().getIP(), "undefined" ) ;
509 } catch( RemoteException e ) {
510 System.err.println( "Unable to inform the server of the VM status!" ) ;
511 e.printStackTrace() ;
514 String[] command = new String[]{VmRunCommand, "-T", "player", "stop",
515 working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name()} ;
519 Process p = Runtime.getRuntime().exec( command ) ;
522 if( p.exitValue() == 0 )
524 System.out.println( "Virtual machine successfully stopped." ) ;
525 machine.setStatus( "stopped" ) ;
528 System.err.println( "Virtual machine not stopped!" ) ;
529 // printProcessError( p.getErrorStream() ) ;
530 ret = printProcessError( p ) ;
538 if( retry >= maxRetryVM )
540 System.err.println( "Unable to stop VM!" ) ;
544 System.out.println( "Retrying (" + retry + ") ... " ) ;
545 Thread.sleep( timeRetryVM ) ;
550 } catch( IOException e ) {
551 System.err.println( "Error during execution of stop command: " ) ;
552 e.printStackTrace() ;
555 } catch( InterruptedException e ) {
556 e.printStackTrace() ;
561 machine.setStatus( "stopped" ) ;
563 LocalHost.Instance().getServerStub().changeStatus(
564 LocalHost.Instance().getIP(), "stopped" ) ;
565 } catch( RemoteException e1 ) {
566 System.err.println( "Unable to inform the server of the VM stopped status!" ) ;
567 e1.printStackTrace() ;
570 // if( ! isRestartedSave )
572 // /** Restoring the original vmx file (necessary after a crash) **/
573 // command = new String[]{ "/bin/cp",
574 // working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name_normal(),
575 // working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name() } ;
578 // Process p = Runtime.getRuntime().exec( command ) ;
581 // if( p.exitValue() == 0 )
583 // System.out.println( "Successfully replaced the VMX file." ) ;
585 // emergencyStop = false ;
588 // System.err.println( "Unsuccessful replacement of the VMX file!" ) ;
589 //// printProcessError( p.getErrorStream() ) ;
590 // printProcessError( p ) ;
594 // } catch( IOException e ) {
595 // System.err.println( "Error during VMX file replacement: " ) ;
596 // e.printStackTrace() ;
599 // } catch( InterruptedException e ) {
600 // e.printStackTrace() ;
605 if( machine.checkVmx() == 0 )
608 emergencyStop = false ;
619 public int suspendVM( int _mode )
621 if( machine != null && machine.getStatus().equalsIgnoreCase( "suspended" ) )
626 if( machine != null && ! machine.getStatus().equalsIgnoreCase( "suspended" ) )
630 } catch( InterruptedException e2 ) {
631 System.err.println( "Problem with semaphore acquiring!" ) ;
632 e2.printStackTrace() ;
635 System.out.print( "Suspending VM ... " ) ;
640 machine.setStatus( "undefined" ) ;
644 LocalHost.Instance().getServerStub().changeStatus(
645 LocalHost.Instance().getIP(), "undefined" ) ;
646 } catch( RemoteException e ) {
647 System.err.println( "Unable to inform the server of the VM status!" ) ;
648 e.printStackTrace() ;
652 String[] command = new String[]{VmRunCommand, "-T", "player", "suspend",
653 working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name()} ;
657 Process p = Runtime.getRuntime().exec( command ) ;
659 if( p.exitValue() == 0 )
661 System.out.println( "Virtual machine successfully suspended." ) ;
662 machine.setStatus( "suspended" ) ;
665 System.err.println( "Virtual machine not suspended!" ) ;
666 // printProcessError( p.getErrorStream() ) ;
667 ret = printProcessError( p ) ;
676 if( retry >= maxRetryVM )
678 System.err.println( "Unable to suspend VM!" ) ;
684 System.out.println( "Retrying (" + retry + ") ... " ) ;
685 Thread.sleep( timeRetryVM ) ;
690 } catch( IOException e ) {
691 System.err.println( "Error during execution of suspend command: " ) ;
692 e.printStackTrace() ;
693 } catch( InterruptedException e ) {
694 e.printStackTrace() ;
697 machine.setStatus( "suspended" ) ;
701 LocalHost.Instance().getServerStub().changeStatus(
702 LocalHost.Instance().getIP(), "suspended" ) ;
703 } catch( RemoteException e ) {
704 System.err.println( "Unable to inform the server of the VM suspended status!" ) ;
705 e.printStackTrace() ;
720 public int restartVM()
722 if( machine != null )
724 System.out.print( "Restarting VM ... " ) ;
730 LocalHost.Instance().getServerStub().changeStatus(
731 LocalHost.Instance().getIP(), "undefined" ) ;
732 } catch( RemoteException e ) {
733 System.err.println( "Unable to inform the server of the VM status!" ) ;
734 e.printStackTrace() ;
737 String[] command = new String[]{VmRunCommand, "-T", "player", "reset",
738 working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name()} ;
742 Process p = Runtime.getRuntime().exec( command ) ;
745 if( p.exitValue() == 0 )
747 System.out.println( "Virtual machine successfully restarted." ) ;
749 if( sendSaveOkVM() == 1 ) { return 1 ; }
755 System.err.println( "Virtual machine not restarted!" ) ;
756 // printProcessError( p.getErrorStream() ) ;
757 ret = printProcessError( p ) ;
764 if( retry >= maxRetryVM )
766 System.err.println( "Unable to start VM!" ) ;
769 System.out.println( "Retrying (" + retry + ") ... " ) ;
770 Thread.sleep( timeRetryVM ) ;
775 } catch( IOException e ) {
776 System.err.println( "Error during execution of restart command: " ) ;
777 e.printStackTrace() ;
778 } catch( InterruptedException e ) {
779 e.printStackTrace() ;
787 public int restartVMAfterCrash()
789 System.out.println( "Restarting VM after a crash ..." ) ;
792 LocalHost.Instance().getServerStub().changeStatus(
793 LocalHost.Instance().getIP(), "undefined" ) ;
794 } catch( RemoteException e ) {
795 System.err.println( "Unable to inform the server of the VM status!" ) ;
796 e.printStackTrace() ;
801 if( machine.deployLastSave() == 0 )
803 if( isRestartedSave )
805 // Using the specific vmx file
806 machine.setDeployFault( true ) ;
808 // Writing the restarted save mark
810 FileWriter fw = new FileWriter( new File( working_directory + "/" + machine.getDirectory() + "/fault.hpcvm" ) ) ;
811 fw.write( "fault!" ) ;
815 } catch( IOException e1 ) {
816 e1.printStackTrace() ;
817 System.err.println( "Unable to mark the fault!" ) ;
820 if( machine.checkVmx() == 1 )
825 // System.out.print( "Changing VMX file after crash ... " ) ;
827 // String[] command = new String[]{ "/bin/cp",
828 // working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name_crash(),
829 // working_directory + "/" + machine.getDirectory() + "/" + machine.getVmx_name() } ;
832 // Process p = Runtime.getRuntime().exec( command ) ;
835 // if( p.exitValue() == 0 )
837 // System.out.println( "Successfully replaced the VMX file." ) ;
839 // System.err.println( "Unsuccessful replacement of the VMX file!" ) ;
840 //// printProcessError( p.getErrorStream() ) ;
841 // printProcessError( p ) ;
845 // } catch( IOException e ) {
846 // System.err.println( "Error during VMX file replacement: " ) ;
847 // e.printStackTrace() ;
848 // } catch( InterruptedException e ) {
849 // e.printStackTrace() ;
852 /** Retrieving VM assigned IP **/
856 vmIP = LocalHost.Instance().getServerStub().getAssociatedIP(
857 LocalHost.Instance().getIP() ) ;
858 } catch (RemoteException e) {
859 System.err.println( "Problem while retrieving the VM assigned IP!!" ) ;
860 e.printStackTrace() ;
864 machine.setIp( vmIP ) ;
866 isRestartedSave = false ;
869 if( startVM( 0 ) == 0 )
871 if( sendSaveOkVM() == 0 )
873 date_last_save = System.currentTimeMillis() ;
887 private int sendSaveOkVM()
892 /** Informing the program that it's ok **/
893 System.out.print( "Sending OK signal to the program ... " ) ;
895 String cmd2 = VmRunCommand + " -T " + " player " + " -gu " + machine.getVmUser() +
896 " -gp " + machine.getVmUserPasswd() + " runScriptInGuest " +
897 working_directory + "/" + machine.getDirectory()
898 + "/" + machine.getVmx_name() + " " + ushell +
899 " \"echo ok > /tmp/vm_save_ok\"" ;// + " -noWait " ;
902 FileWriter fw = new FileWriter( new File( working_directory + "/saveOk.sh" ) ) ;
906 } catch( IOException e1 ) {
907 e1.printStackTrace() ;
910 String[] command = new String[]{ ushell, working_directory + "/saveOk.sh"} ;
914 Process p = Runtime.getRuntime().exec( command ) ;
917 if( p.exitValue() == 0 )
919 System.out.println( "Signal successfully sent." ) ;
923 System.err.println( "Signal not sent!" ) ;
924 // printProcessError( p.getErrorStream() ) ;
925 ret = printProcessError( p ) ;
932 if( retry >= maxRetryVM )
934 System.err.println( "Unable to send ok signal to VM!" ) ;
937 System.out.println( "Retrying (" + retry + ") ... " ) ;
938 Thread.sleep( timeRetryVM ) ;
942 } catch( IOException e ) {
943 System.err.println( "Error during ok save signal send command: " ) ;
944 e.printStackTrace() ;
946 } catch( InterruptedException e ) {
947 e.printStackTrace() ;
958 synchronized( saveProcess ){
959 while( saveProcess.getStatus() )
963 } catch( InterruptedException e ) {
964 e.printStackTrace() ;
968 System.out.println( "Saving VM ..." ) ;
969 saveProcess.setStatus( true ) ;
971 machine.setStatus( "saving" ) ;
973 LocalHost.Instance().getServerStub().changeStatus(
974 LocalHost.Instance().getIP(), "saving" ) ;
975 } catch( RemoteException e ) {
976 System.err.println( "Unable to inform the server of the VM status!" ) ;
977 e.printStackTrace() ;
981 String saveName = "" ;
982 boolean error = false ;
983 boolean errorVM = false ;
985 if( suspendVM( 1 ) == 1 ) { error = true ; errorVM = true ; }
989 System.out.println( "Deletion of last nok archive ... " ) ;
991 command = new String[]{ "/bin/rm", "-rf",
992 working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar",
993 working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar.gz", } ;
996 procSave = Runtime.getRuntime().exec( command ) ;
999 if( procSave.exitValue() == 0 )
1001 System.out.println( "Last nok archive successfully deleted." ) ;
1003 System.err.println( "Last nok archive not deleted!" ) ;
1004 // printProcessError( p.getErrorStream() ) ;
1005 printProcessError( procSave ) ;
1009 } catch( IOException e ) {
1010 System.err.println( "Error during nok archive deletion command: " ) ;
1012 e.printStackTrace() ;
1013 } catch( InterruptedException e ) {
1014 e.printStackTrace() ;
1021 System.out.print( "Creation of the archive ... " ) ;
1022 /** Archive creation **/
1023 // command = new String[]{ "/bin/tar", "-cf",
1024 // working_directory + "/" + machine.getName() + "_new_" + machine.getComputationId() + ".tar",
1025 // working_directory + "/" + machine.getDirectory() } ;
1027 command = new String[]{ "/bin/tar", "-cf",
1028 machine.getName() + "_new_" + machine.getComputationId() + ".tar",
1029 machine.getDirectory(), "-C", working_directory } ;
1037 procSave = Runtime.getRuntime().exec( command ) ;
1038 procSave.waitFor() ;
1040 if( procSave.exitValue() == 0 )
1042 System.out.println( "Archive successfully created." ) ;
1044 lastSaveOk = false ;
1047 System.err.println( "Archive not created!" ) ;
1048 // printProcessError( p.getErrorStream() ) ;
1049 printProcessError( procSave ) ;
1053 } catch( IOException e ) {
1054 System.err.println( "Error during archive creation command: " ) ;
1056 e.printStackTrace() ;
1057 } catch( InterruptedException e ) {
1058 e.printStackTrace() ;
1062 /** Compression of the archive **/
1065 System.out.print( "Compression of the archive ... " ) ;
1066 command = new String[]{ "/bin/gzip",
1067 working_directory + "/" + machine.getName()
1068 + "_new_" + machine.getComputationId() + ".tar" } ;
1076 procSave = Runtime.getRuntime().exec( command ) ;
1077 procSave.waitFor() ;
1079 if( procSave.exitValue() == 0 )
1081 System.out.println( "Archive successfully compressed." ) ;
1083 System.err.println( "Archive not compressed!" ) ;
1084 // printProcessError( p.getErrorStream() ) ;
1085 printProcessError( procSave ) ;
1089 } catch( IOException e ) {
1090 System.err.println( "Error during archive compression command: " ) ;
1091 e.printStackTrace() ;
1093 } catch( InterruptedException e ) {
1094 e.printStackTrace() ;
1100 /** Restarting VM **/
1101 if( errorVM || startVM( 0 ) == 1 ) { error = true ; }
1104 /** Sending ok save signal **/
1107 if( sendSaveOkVM() == 1 ) { error = true ; }
1112 // /** Deletion of the tar archive **/
1115 // command = new String[]{ "/bin/rm",
1116 // working_directory + "/" + machine.getName()
1117 // + "_new_" + machine.getComputationId() + ".tar" } ;
1120 // Process p = Runtime.getRuntime().exec( command ) ;
1123 // if( p.exitValue() == 0 )
1125 // System.out.println( "Archive (not compressed) successfully deleted." ) ;
1127 // System.err.println( "Archive (not compressed) not deleted!" ) ;
1128 // printProcessError( p.getErrorStream() ) ;
1132 // } catch( IOException e ) {
1133 // System.err.println( "Error during archive (not compressed) deletion command: " ) ;
1134 // e.printStackTrace() ;
1136 // } catch( InterruptedException e ) {
1137 // e.printStackTrace() ;
1142 saveName = machine.getName() + "_new_" + machine.getComputationId() + ".tar.gz" ;
1144 /** Sending save to neighbor **/
1152 ArrayList<ServicesClient> sn = machine.getSaveNeighbors() ;
1154 for( int i = 0 ; i < sn.size() ; i++ )
1161 name = sn.get( i ).getName() ;
1162 wd = sn.get( i ).getWorkingDirectory() ;
1163 snIP = sn.get( i ).getIPHost() ;
1164 } catch (RemoteException e2) {
1165 System.err.println( "Unable to retrieve information on one save neighbor!" ) ;
1166 e2.printStackTrace() ;
1170 System.out.print( "Sending save to " + name + " ... " ) ;
1172 command = new String[]{ "/usr/bin/scp", working_directory + "/" + saveName,
1176 procSave = Runtime.getRuntime().exec( command ) ;
1177 procSave.waitFor() ;
1179 if( procSave.exitValue() == 0 )
1181 System.out.println( "Archive successfully sent." ) ;
1183 System.err.println( "Archive not sent!" ) ;
1184 // printProcessError( p.getErrorStream() ) ;
1185 printProcessError( procSave ) ;
1189 } catch( IOException e ) {
1190 System.err.println( "Error during archive send command: " ) ;
1191 e.printStackTrace() ;
1193 } catch( InterruptedException e ) {
1194 e.printStackTrace() ;
1201 /** Informing the server the save is done **/
1205 LocalHost.Instance().getServerStub().saveOk( LocalHost.Instance().getIP(), saveName ) ;
1206 } catch( RemoteException e ) {
1207 System.err.println( "Problem while informing the server about the save state!" ) ;
1208 e.printStackTrace() ;
1211 synchronized( saveProcess ) {
1212 saveProcess.setStatus( false ) ;
1214 saveProcess.notifyAll() ;
1215 } catch( Exception e ) {}}
1220 synchronized( saveProcess ) {
1221 saveProcess.setStatus( false ) ;
1223 saveProcess.notifyAll() ;
1224 } catch( Exception e ) {}}
1231 public int reloadConfig()
1233 System.out.println( "Reloading configuration ... " ) ;
1239 public void init( String _server_ip, int _server_port, int _client_port, int _dialog_port )
1241 System.out.println( "Initialisation Client ... " ) ;
1242 System.out.println( "IP " + LocalHost.Instance().getIP() ) ;
1244 server_ip = _server_ip ;
1245 server_port = _server_port ;
1246 client_port = _client_port ;
1247 dialog_port = _server_port + 1 ; // _dialog_port ;
1250 saveProcess = new SaveProcess() ;
1252 machine = new VirtualMachine() ;
1254 VmRunCommand = "/usr/bin/vmrun" ;
1255 // VmRunCommandArg = "-T player" ;
1257 // vm_user = "mpi" ;
1258 // vm_user_passwd = "mpi" ;
1259 ushell = "/bin/bash" ;
1260 working_directory = "/localhome/vmware" ;
1262 wait_start = 15000 ;
1263 max_start_try = 10 ;
1265 sema = new Semaphore( 1 ) ;
1266 emergencyStop = false ;
1269 timeRetryVM = 10000 ;
1271 save_interleave = 30 * 60 * 1000 ;
1272 date_last_save = 0 ;
1274 isRestartedSave = false ;
1277 /** Connection to server **/
1279 serverStub = (ServicesServer) Naming.lookup( "rmi://"
1280 + server_ip + ":" + server_port + "/Server" ) ;
1281 } catch (MalformedURLException e) {
1282 e.printStackTrace();
1283 } catch (RemoteException e) {
1284 e.printStackTrace();
1285 } catch (NotBoundException e) {
1286 e.printStackTrace();
1289 if( serverStub == null )
1291 System.err.println( "Unable to connect to server!!" ) ;
1292 System.err.println( "Server IP: " + server_ip + " -- server port: " + server_port ) ;
1297 System.out.println( "Connected to server " + server_ip + " on port " + server_port + "." ) ;
1299 // LocalHost.Instance().setServerIP( server_ip ) ;
1300 LocalHost.Instance().setServerStub( serverStub ) ;
1303 /** Creating the local server **/
1306 /** Starting all threads **/
1310 private void exportObject()
1312 // ServicesClient ref = null ;
1313 Registry reg = null ;
1319 reg = LocateRegistry.getRegistry( client_port ) ;
1321 String tab[] = reg.list() ;
1323 System.out.println( "There is an existing RMI Registry on port " +
1324 client_port + " with " + tab.length + " entries!" ) ;
1325 for( int i = 0 ; i < tab.length ; i++ )
1328 if( UnicastRemoteObject.unexportObject( Naming.lookup(tab[i]), true ) )
1330 System.out.println( "Register successfuly deleted!" ) ;
1332 System.err.println( "Register undeleted !!!" ) ;
1334 } catch( Exception e ) {
1335 e.printStackTrace() ;
1339 } catch( RemoteException e ) {
1343 if ( System.getSecurityManager() == null )
1345 System.setSecurityManager( new SecurityManager() ) ;
1348 LocateRegistry.createRegistry( client_port ) ;
1349 LocateRegistry.getRegistry( client_port ).rebind( "Client", this ) ;
1350 myStub = (ServicesClient) Naming.lookup( "rmi://"
1351 + LocalHost.Instance().getIP() + ":" + client_port
1353 } catch( Exception e ) {
1354 System.err.println( "Error in Client.exportObject() when creating local services:" + e ) ;
1355 System.err.println( "Exit from Client.exportObject" ) ;
1359 LocalHost.Instance().setStub( myStub ) ;
1363 private boolean printProcessError( Process _p )
1365 boolean ret = false ;
1369 System.err.println( "Error: " + _p.exitValue() ) ;
1370 BufferedReader br = new BufferedReader( new InputStreamReader( _p.getErrorStream() ) ) ;
1373 while( (line = br.readLine()) != null )
1375 System.err.println( line ) ;
1376 if( line.contains( "egmentation" ) )
1381 } catch( IOException e ) {
1382 e.printStackTrace() ;
1393 /** Registering on server **/
1396 ret = LocalHost.Instance().getServerStub().register( LocalHost.Instance().getStub() );
1397 } catch (RemoteException e1) {
1398 e1.printStackTrace();
1404 case 0: System.out.println( "Successfully registered on server." ) ; break ;
1405 case 1: System.err.println( "Problem on server while registreting!" ) ; return 1 ;
1406 case 2: System.out.println( "Already registered on server!" ) ; break ;
1409 /** Retrieving VM assigned IP **/
1410 String vmIP = null ;
1413 vmIP = LocalHost.Instance().getServerStub().getAssociatedIP(
1414 LocalHost.Instance().getIP() ) ;
1415 } catch (RemoteException e) {
1416 System.err.println( "Problem while retrieving the VM assigned IP!!" ) ;
1417 e.printStackTrace() ;
1421 machine.setIp( vmIP ) ;
1423 System.out.println( "Assigned IP address for the VM: " + vmIP ) ;
1426 /** Starting alive ping to server **/
1427 pingServer = new PingServer() ;
1428 pingServer.start() ;
1430 /** Starting socket server for VM dialog **/
1431 dialogVmServer = new DialogVMServer() ;
1432 dialogVmServer.start() ;
1438 private class PingServer extends Thread
1440 private boolean run ;
1447 protected void stopPing() { run = false ; }
1455 LocalHost.Instance().getServerStub().ping( LocalHost.Instance().getIP() ) ;
1456 } catch (RemoteException e1) {
1457 e1.printStackTrace();
1461 Thread.sleep( 2000 ) ;
1462 } catch( InterruptedException e ) {
1463 e.printStackTrace() ;
1470 private class DialogVMServer extends Thread
1472 private boolean run ;
1473 private Socket socket ;
1474 private ArrayList<DialogVM> dialogs = new ArrayList<DialogVM>() ;
1481 protected void stopDialogVMServer()
1485 if( serverSocket != null )
1488 serverSocket.close() ;
1489 // socket = serverSocket.accept() ;
1491 for( int i = 0 ; i < dialogs.size() ; i++ )
1493 dialogs.get( i ).stopDialogVM() ;
1496 } catch( IOException e ) {
1497 e.printStackTrace() ;
1507 serverSocket = new ServerSocket( 0 ) ;
1508 dialog_port = serverSocket.getLocalPort() ;
1510 System.out.println( "SocketServer listening on port " + dialog_port ) ;
1511 } catch( IOException e ) {
1512 System.err.println( "Unable to launch the SocketServer on port " + dialog_port + "!" ) ;
1513 e.printStackTrace() ;
1521 socket = serverSocket.accept() ;
1523 dialogs.add( new DialogVM( socket ) ) ;
1524 dialogs.get( dialogs.size() - 1 ).start() ;
1525 } catch( IOException e ) {
1526 System.err.println( "Problem with the accept function!" ) ;
1527 e.printStackTrace() ;
1534 private class DialogVM extends Thread
1536 private boolean run ;
1537 private Socket socket ;
1538 private BufferedReader reader ;
1539 private String line ;
1541 DialogVM( Socket _socket ) { run = true ; socket = _socket ; }
1543 protected void stopDialogVM()
1548 reader.close() ; reader = null ;
1549 socket.close() ; socket = null ;
1550 } catch( IOException e ) {
1551 e.printStackTrace() ;
1559 reader = new BufferedReader( new InputStreamReader( socket.getInputStream() ) ) ;
1560 } catch( IOException e ) {
1561 System.err.println( "Unable to open a dialog socket with the VM!" ) ;
1562 e.printStackTrace();
1571 if( reader != null )
1573 line = reader.readLine() ;
1576 /** VM is starting -- retrieving informations **/
1577 if( run && line != null && line.equalsIgnoreCase( "infos" ) )
1579 /* Receiving name */
1580 machine.setName( reader.readLine() ) ;
1583 String ip = reader.readLine() ;
1584 if( ! ip.equalsIgnoreCase( machine.getIp() ) )
1586 System.err.println( "VM IP not well configured!!" ) ;
1590 reader.close() ; reader = null ;
1591 socket.close() ; socket = null ;
1596 /** It's time to do a save **/
1597 if( run && line != null && line.equalsIgnoreCase( "save" ) )
1600 machine.setComputationId( Integer.parseInt( reader.readLine() ) ) ;
1601 } catch( Exception e ) {
1602 System.err.println( "Problem while reading the computation id!" ) ;
1603 e.printStackTrace() ;
1606 if( (System.currentTimeMillis() - date_last_save) > save_interleave )
1608 date_last_save = System.currentTimeMillis() ;
1611 reader.close() ; reader = null ;
1612 socket.close() ; socket = null ;
1616 /* Starting the VM save */
1619 date_last_save = System.currentTimeMillis() ;
1626 /** Computation is done, we can shutdown the VM **/
1627 if( run && line != null && line.equalsIgnoreCase( "quit" ) )
1630 Thread.sleep( 5000 ) ;
1631 } catch( InterruptedException e ) {
1632 e.printStackTrace() ;
1636 reader.close() ; reader = null ;
1637 socket.close() ; socket = null ;
1644 } catch( IOException e ) {
1645 e.printStackTrace() ;
1656 pingServer.stopPing() ;
1658 dialogVmServer.stopDialogVMServer() ;
1660 // unexportObject ??
1666 public String getIPHost()
1668 return LocalHost.Instance().getIP() ;
1672 public String getName()
1674 return LocalHost.Instance().getName() ;
1679 public void saveOk()
1681 String save_name = machine.getName() + "_last_" + machine.getComputationId() +
1684 String save_new = machine.getName() + "_new_"
1685 + machine.getComputationId() + ".tar.gz" ;
1687 String[] command = new String[]{ "/bin/mv",
1688 working_directory + "/" + save_new,
1689 working_directory + "/" + save_name } ;
1692 Process p = Runtime.getRuntime().exec( command ) ;
1695 if( p.exitValue() == 0 )
1697 machine.setSave_last( save_name ) ;
1698 System.out.println( "Last save OK" ) ;
1700 System.err.println( "Last save NOK!" ) ;
1701 System.err.println( "Error: " ) ;
1702 // printProcessError( p.getErrorStream() ) ;
1703 printProcessError( p ) ;
1705 } catch( IOException e ) {
1706 System.err.println( "Error during last archive move:" ) ;
1707 e.printStackTrace() ;
1708 } catch( InterruptedException e ) {
1709 e.printStackTrace() ;
1712 // Changing on save neighbors
1713 for( int i = 0 ; i < machine.getSaveNeighbors().size() ; i++ )
1716 machine.getSaveNeighbors().get( i ).changeSaveName( save_new, save_name, machine.getComputationId() ) ;
1717 } catch( RemoteException e ) {
1719 System.err.println( "Unable to change save name on " + machine.getSaveNeighbors().get( i ).getName() + "!" ) ;
1720 } catch( RemoteException e1 ) {
1721 System.err.println( "Unable to change save name on an unamed save neighbor!" ) ;
1722 e1.printStackTrace() ;
1724 e.printStackTrace() ;
1728 // Informing the server
1731 ret = LocalHost.Instance().getServerStub().changeSaveName( LocalHost.Instance().getIP(), save_name ) ;
1732 } catch( RemoteException e ) {
1733 System.err.println( "Unable to inform the server about the new save name!" ) ;
1734 e.printStackTrace() ;
1739 System.out.println( "Successfully informing the server about the new save name." ) ;
1741 System.err.println( "Problem on the server while informing it about the new save name!" ) ;
1749 public void changeSaveName( String _n1, String _n2, int _id )
1751 if( _n1 != null && _n1.length() > 0 )
1753 System.out.println( "Changing save name for processus " + _id + " ... " ) ;
1755 String[] command = new String[]{ "/bin/mv",
1756 working_directory + "/" + _n1,
1757 working_directory + "/" + _n2 } ;
1760 Process p = Runtime.getRuntime().exec( command ) ;
1763 if( p.exitValue() == 0 )
1765 System.out.println( "Change save name OK" ) ;
1767 System.err.println( "Change save name NOK!" ) ;
1768 System.err.println( "Error: " ) ;
1769 // printProcessError( p.getErrorStream() ) ;
1770 printProcessError( p ) ;
1772 } catch( IOException e ) {
1773 System.err.println( "Error during save renaming:" ) ;
1774 e.printStackTrace() ;
1775 } catch( InterruptedException e ) {
1776 e.printStackTrace() ;
1783 public void setSavingNeighbor( ServicesClient _sn )
1787 ArrayList<ServicesClient> as = new ArrayList<ServicesClient>() ;
1791 System.out.println( "Save neighbor: " + _sn.getName() ) ;
1792 } catch( RemoteException e ) {
1793 System.err.println( "Unable to retrieve the name of the save neighbor!" ) ;
1794 e.printStackTrace() ;
1797 machine.setSaveNeighbors( as ) ;
1803 public void setSavingNeighbors( ArrayList<ServicesClient> _sn )
1805 if( _sn != null && _sn.size() > 0 )
1807 System.out.print( "Save neighbors: " ) ;
1808 for( int i = 0 ; i < _sn.size() ; i++ )
1811 System.out.print( _sn.get( i ).getName() ) ;
1812 } catch( RemoteException e ) {
1813 System.err.println( "Unable to retrieve the name of a save neighbor!" ) ;
1814 e.printStackTrace() ;
1817 if( i != _sn.size() - 1 )
1819 System.out.print( ", " ) ;
1821 System.out.println( "." ) ;
1825 machine.setSaveNeighbors( _sn ) ;
1831 public void addSavingNeighbor( ServicesClient _sn )
1836 System.out.println( "Adding save neighbor: " + _sn.getName() ) ;
1837 } catch( RemoteException e ) {
1838 System.err.println( "Unable to retrieve the name of a save neighbor!" ) ;
1839 e.printStackTrace() ;
1842 machine.getSaveNeighbors().add( _sn ) ;
1848 public void addSavingNeighbors( ArrayList<ServicesClient> _sn )
1850 if( _sn != null && _sn.size() > 0 )
1852 System.out.print( "Adding save neighbors: " ) ;
1853 for( int i = 0 ; i < _sn.size() ; i++ )
1856 System.out.print( _sn.get( i ).getName() ) ;
1857 } catch( RemoteException e ) {
1858 System.err.println( "Unable to retrieve the name of a save neighbor!" ) ;
1859 e.printStackTrace() ;
1862 if( i != _sn.size() - 1 )
1864 System.out.print( ", " ) ;
1866 System.out.println( "." ) ;
1869 machine.getSaveNeighbors().add( _sn.get( i ) ) ;
1876 public void replaceSavingNeighbor( String _old, ServicesClient _new )
1878 System.out.print( "Replacing a save neihgbor ... " ) ;
1879 if( _old != null && _new != null )
1883 for( i = 0 ; i < machine.getSaveNeighbors().size() ; i++ )
1886 if( machine.getSaveNeighbors().get( i ).getIPHost().equalsIgnoreCase( _old ) )
1888 machine.getSaveNeighbors().set( i, _new ) ;
1889 System.out.println( "Save neighbor successfully changed." ) ;
1892 } catch( RemoteException e ) {
1893 System.err.println( "Unable to retrieve the IP address of a save neighbor!" ) ;
1894 e.printStackTrace() ;
1898 if( i == machine.getSaveNeighbors().size() )
1900 System.out.println( "I am not concerned by the modification." ) ;
1907 public int retrieveSave( String _saveName )
1909 if( _saveName != null )
1911 if( ! _saveName.equalsIgnoreCase( "none" ) )
1913 machine.setSave_last( _saveName ) ;
1915 System.err.println( "I have no save to retrieve!!" ) ;
1919 // TODO NEIGHBORS !!!!
1920 //System.out.println( "!!!! NEIGHBORS !!!!!" ) ;
1921 boolean ok = false ;
1925 while( ! ok && i < machine.getSaveNeighbors().size() )
1928 System.out.print( "Retrieving a save on " + machine.getSaveNeighbors().get( 0 ).getName() + " ... " ) ;
1929 } catch( RemoteException e1 ) {
1930 System.err.println( "Unable to retrieve the name of a save neighbor!" ) ;
1931 e1.printStackTrace() ;
1934 String command[] = {} ;
1938 command = new String[]{ "/usr/bin/scp",
1939 machine.getSaveNeighbors().get( i ).getIPHost() + ":" +
1940 machine.getSaveNeighbors().get( i ).getWorkingDirectory() + "/" +
1941 machine.getSave_last(),
1942 working_directory } ;
1943 } catch( RemoteException e1 ) {
1944 System.err.println( "Unable to retrieve the name of a save neighbor!" ) ;
1945 e1.printStackTrace() ;
1951 Process p = Runtime.getRuntime().exec( command ) ;
1954 if( p.exitValue() == 0 )
1956 System.out.println( "Archive successfully retrieved." ) ;
1957 isRestartedSave = true ;
1960 System.err.println( "Archive not retrieved!" ) ;
1961 System.err.println( "Error: " ) ;
1962 // printProcessError( p.getErrorStream() ) ;
1963 printProcessError( p ) ;
1966 } catch( IOException e ) {
1967 System.err.println( "Error during archive retrieve command: " ) ;
1968 e.printStackTrace() ;
1970 } catch( InterruptedException e ) {
1971 e.printStackTrace() ;
1982 System.err.println( "Unable to retrieve a save archive from any neighbor!" ) ;
1991 public String getIPVM() throws RemoteException
1993 if( machine != null )
1995 return machine.getIp() ;
2003 public void setIPVM( String _ipVM ) throws RemoteException
2005 if( _ipVM != null && ! _ipVM.isEmpty() )
2007 System.out.println( "The VM IP is now: " + _ipVM ) ;
2008 machine.setIp( _ipVM ) ;
2013 public String getWorkingDirectory()
2015 return working_directory ;
2019 public Integer deployVM( String _name, String _archive, String _directory )
2021 if( _name != null && _name.length() > 0 && _archive != null && _name.length() > 0 )
2023 File file = new File( working_directory + "/" + _archive ) ;
2024 if( ! file.exists() )
2026 System.err.println( "There is no archive named " + _archive + " in my working directory!" ) ;
2029 } else if( file.isDirectory() ) {
2030 System.err.println( _archive + " is a directory!" ) ;
2037 machine.setName( _name ) ;
2038 machine.setInitial_archive_name( _archive ) ;
2039 machine.setDirectory( _directory ) ;
2041 if( machine.deployInitialVM() == 1 )
2043 System.err.println( "Unable to deploy the initial VM archive!" ) ;
2053 private class SaveProcess
2062 protected boolean getStatus() { return status ; }
2064 protected void setStatus( boolean _b ) { status = _b ; }
2070 /** La programmation est un art, respectons ceux qui la pratiquent !! **/