#!/usr/bin/perl -w
# DO NOT EDIT
# This file was automatically generated from the .plin file, edit that
#this script takes a file (either on stdin or a fn on the command line) which contains
# a list of experiments to run
# the format is:
# <M> <A> <C> <N> <L> <reps>
# where
# <M>: number of machine
# <A>: number of agents
# <C>: computation of agents {slow, medium, fast}
# <N>: migration [on/off]
# <L>: bit vector (in octal or hex) for whether machine are heavily loaded (bits read from right to left)
# <reps>: number of trials to run of this type

# All machines which are to run parts of the system must have a directory /home/pfr/spades
# which is the root of the spades CVS checkout. Symlinks should be fine.

##############
#Config stuff

#IMPORTANT: the local host must be the first one listed here!
#@machinelist = qw(shamu.coral.cs.cmu.edu reef.coral.cs.cmu.edu gs219.sp.cs.cmu.edu gs18.sp.cs.cmu.edu tenages.tasks.cs.cmu.edu);
#@machinelist = qw(shamu.coral.cs.cmu.edu reef.coral.cs.cmu.edu gs219.sp.cs.cmu.edu gs18.sp.cs.cmu.edu school.coral.cs.cmu.edu tenages.tasks.cs.cmu.edu);
#@machinelist = qw(shamu.coral.cs.cmu.edu reef.coral.cs.cmu.edu);
#@machinelist = qw(shamu.coral.cs.cmu.edu);
# note no f14g, f6g
#@machinelist = qw(f2g f3g f4g f5g f7g f8g f9g f10g f11g f12g f13g f15g f16g);
@machinelist = qw(zebrafish.coral.cs.cmu.edu);

$SSH = "ssh";
$SCP = "scp";

$max_waits = 30;
$sleep_time = 30; # in seconds
##############

sub bgexecute {
  #print ('bgexec: '.(join ' ', @_)."\n");
  if ($pid = fork) {
    #parent, just return
    return $pid;
  } elsif (defined $pid) {
    #child
    $command = join ' ',@_;
    #print "$$: about to run '$command'\n";
    `$command`;
    #print "$$: command finished '$command'\n";
    exit 0;
  } else {
    die "Could not execute process in the background: $!";
  }
}

# This collects the many children that we fork
use POSIX "sys_wait_h";
sub REAPER {
  #print "Collecting children\n";
  $SIG{CHLD} = \&REAPER;
  my $child;
  while ( ($child = waitpid(-1, WNOHANG)) != -1 && $child != 0) {
    # I don't care what the status was
    #print "Got a child\n";
  }
  #print "Done collecting children\n";
}
$SIG{CHLD} = \&REAPER;

##############


use File::Basename;
use Cwd qw(abs_path);
$scriptdir = abs_path(dirname($0));

#$spadesdir = '/cluster/ferrari2/pfr/sim/spades';
$spadesdir = "$scriptdir/..";

$final_outputdir = "$spadesdir/data/run_exp.$$";
$temp_outputdir = "/tmp/run_exp.$$";
mkdir ($final_outputdir, 0777) || die "Could not create final_outputdir '$final_outputdir': $!";
foreach $m (@machinelist) {
  system "$SSH $m mkdir $temp_outputdir";
}
while (<>) {
  chomp;

  s/\#.*//;
  next if (/^\s*$/);

  ($M, $A, $C, $N, $L, $reps) = split;

  $final_expoutputdir = "$final_outputdir/${M}_${A}_${C}_${N}_${L}";
  $temp_expoutputdir = "$temp_outputdir/${M}_${A}_${C}_${N}_${L}";
  mkdir ($final_expoutputdir, 0777) || die "Could not create final_expoutputdir '$final_expoutputdir': $!";
  foreach $m (@machinelist) {
    system "$SSH $m mkdir $temp_expoutputdir";
  }

  #sanity check this exp trial!
  $errmsg = 0;

  if ($M < 1 || $M > @machinelist) { $errmsg = "Weird machine number" }
  if ($A < 1) { $errmsg = "Weird agent number" }
  if ($reps < 1) { $errmsg = "Weird reps" }
  if ($M > $A) { $errmsg = "More machines that agents!" }
  if ($C ne "fast" && $C ne "medium" && $C ne "med" && $C ne "slow" && $C ne "soccer") { $errmsg = "Don't understand the speed" }
  if ($N ne "on" && $N ne "off") { $errmsg = "Weird migration value" }
  $loadval = oct $L;

  if ($errmsg) {
    #Sanity check failed
    open (FH, ">$final_expoutputdir/ERROR") || die "Could not open error file: $!";
    print FH "$_\n";
    print FH "$errmsg\n";
    next;
  }

  for ($count = 0; $count < $reps; $count++) {

    if (-e "$scriptdir/EXIT") {
      print "Got an EXIT request!\n";
      last;
    }

    $final_trialoutputdir = "$final_expoutputdir/$count";
    $temp_trialoutputdir = "$temp_expoutputdir/$count";
    mkdir ($final_trialoutputdir, 0777) || die "Could not create final_trialoutputdir '$final_trialoutputdir': $!";
    foreach $m (@machinelist[0..($M-1)]) {
      system "$SSH $m mkdir $temp_trialoutputdir";
    }

    $engine_host = $machinelist[0];

    #First, start up the engine machine
    $cmdline = "$spadesdir/sample_world_model/ballworld --file $spadesdir/sample_world_model/ballworld.conf ";
    $cmdline .= "--logfile_dir $temp_trialoutputdir --action_log_fn \%D/worldactions.log ";
    #$cmdline .= "--action_log_level 10 ";
    $cmdline .= "--action_log_level 200 ";
    $cmdline .= "--monitor_log_fn \%D/monitor.log ";
    $cmdline .= "--use_text_event_log off ";
    $cmdline .= "--agent_db_fn $spadesdir/sample_agent/agentdb.xml ";
    $cmdline .= "--num_comm_servers_wanted $M --num_agents $A ";
    $cmdline .= "--agent_speed $C ";
    $cmdline .= "--use_migration $N ";
    $world_pid = bgexecute("$SSH $engine_host $cmdline > $temp_trialoutputdir/worldoutput.log 2>&1");

    sleep 2; # We have to give the engine time to start!

    #Now load the machines
    for ($i = 0; $i < $M; $i++) {
      if ($loadval & (1<<$i)) {
	#Let's load this machine
	$m = $machinelist[$i];
	# we have to exec in the background becuase the child processes won't finish now.
	bgexecute ("$SSH $m $spadesdir/bin/load_machine.sh");
      }
    }

    #Now start up the comm servers
    $basecmdline = "$spadesdir/commserver/commserver --file $spadesdir/commserver/commserver.conf ";
    $basecmdline .= "--logfile_dir $temp_trialoutputdir ";
    $basecmdline .= "--action_log_level 10 ";
    $basecmdline .= "--create_agent_logfiles off ";
    #$basecmdline .= "--create_agent_logfiles on ";
    $basecmdline .= "--engine_host $engine_host ";
    $basecmdline .= "--agent_db_fn $spadesdir/sample_agent/agentdb.xml ";
    for ($i=0; $i<$M; $i++) {
      $cmdline = $basecmdline."--action_log_fn \%D/cs${i}actions.log ";
      $m = $machinelist[$i];
      bgexecute ("$SSH $m $cmdline > $temp_trialoutputdir/cs${i}output.log 2>&1");
      sleep(1);
    }

    # Wait to for the simulation to finish
    #print "starting to wait\n";
    for ($i = 0; $i < $max_waits; $i++) {
      $res = waitpid ($world_pid, WNOHANG);
      #print "$res\n";
      last if ($res == $world_pid || $res == -1);
      sleep $sleep_time;
    }
    if ($i >= $max_waits) {
      warn "Ballworld did not finish like it was supposed to! Killing it";
      kill 'TERM', $world_pid;
    }
    #print "done wait\n";

    #Let's zip up files and kill all busybee processes
    foreach $m (@machinelist[0..($M-1)]) {
      system ("ssh $m gzip $temp_trialoutputdir/".'\*.log');
      system "ssh $m killall busybee";
      system "ssh $m killall client";
    }

  }

  #temp stuff to shut down at a particular time
  #(undef,undef,$hour) = localtime(time);
  #last if ($hour > 8 and $hour < 9);
}


#Now we need to go collect the distributed data and bring it to the local machine
foreach $m (@machinelist) {
  system "$SCP -Cr $m:$temp_outputdir/* $final_outputdir";
  system "ssh $m rm -rf $temp_outputdir"
}
