Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
better support of parallel typed tasks in dump and dotty functions
[simgrid.git] / src / simdag / sd_task.c
index 0b7e382..71d9205 100644 (file)
@@ -201,7 +201,7 @@ SD_task_t SD_task_create_comp_par_amdahl(const char *name, void *data,
  * \return the new task
  */
 SD_task_t SD_task_create_comm_par_mxn_1d_block(const char *name, void *data,
-                                                                                          double amount)
+                                               double amount)
 {
   SD_task_t res = SD_task_create(name, data, amount);
   res->workstation_list=NULL;
@@ -239,13 +239,13 @@ void SD_task_destroy(SD_task_t task)
   xbt_free(task->communication_amount);
   xbt_free(task->computation_amount);
 
-  xbt_mallocator_release(sd_global->task_mallocator,task);
-  sd_global->task_number--;
-
 #ifdef HAVE_TRACING
   if (task->category) xbt_free(task->category);
 #endif
 
+  xbt_mallocator_release(sd_global->task_mallocator,task);
+  sd_global->task_number--;
+
   XBT_DEBUG("Task destroyed.");
 }
 
@@ -279,14 +279,22 @@ void SD_task_set_data(SD_task_t task, void *data)
 /**
  * \brief Sets the rate of a task
  *
- * This will change the percentage of the available power or network bandwidth
- * a task can use.
+ * This will change the network bandwidth a task can use. This rate
+ * depends on both the nominal bandwidth on the route onto which the task is
+ * scheduled (\see SD_task_get_current_bandwidth) and the amount of data to
+ * transfer.
  *
- * \param task a task
- * \param rate the new rate you want to associate with this task
+ * To divide the nominal bandwidth by 2, the rate then has to be :
+ *    rate = bandwidth/(2*amount)
+ *
+ * \param task a \see SD_TASK_COMM_E2E task (end-to-end communication)
+ * \param rate the new rate you want to associate with this task.
  */
 void SD_task_set_rate(SD_task_t task, double rate)
 {
+  xbt_assert(task->kind == SD_TASK_COMM_E2E,
+             "The rate can be modified for end-to-end communications only.");
+
   task->rate = rate;
 }
 
@@ -496,12 +504,17 @@ void SD_task_dump(SD_task_t task)
     case SD_TASK_COMP_PAR_AMDAHL:
       XBT_INFO("  - kind: parallel computation following Amdahl's law");
       break;
+    case SD_TASK_COMM_PAR_MXN_1D_BLOCK:
+      XBT_INFO("  - kind: MxN data redistribution assuming 1D block distribution");
+      break;
     default:
       XBT_INFO("  - (unknown kind %d)", task->kind);
     }
   }
   XBT_INFO("  - amount: %.0f", SD_task_get_amount(task));
-  XBT_INFO("  - Dependencies to satisfy: %u", task->unsatisfied_dependencies);
+  if (task->kind == SD_TASK_COMP_PAR_AMDAHL)
+    XBT_INFO("  - alpha: %.2f", task->alpha);
+  XBT_INFO("  - Dependencies to satisfy: %d", task->unsatisfied_dependencies);
   if (!xbt_dynar_is_empty(task->tasks_before)) {
     XBT_INFO("  - pre-dependencies:");
     xbt_dynar_foreach(task->tasks_before, counter, dependency) {
@@ -524,9 +537,11 @@ void SD_task_dotty(SD_task_t task, void *out)
   fprintf(out, "  T%p [label=\"%.20s\"", task, task->name);
   switch (task->kind) {
   case SD_TASK_COMM_E2E:
+  case SD_TASK_COMM_PAR_MXN_1D_BLOCK:
     fprintf(out, ", shape=box");
     break;
   case SD_TASK_COMP_SEQ:
+  case SD_TASK_COMP_PAR_AMDAHL:
     fprintf(out, ", shape=circle");
     break;
   default:
@@ -624,9 +639,6 @@ void SD_task_dependency_add(const char *name, void *data, SD_task_t src,
          SD_task_get_name(dst));
     __SD_task_set_state(dst, SD_SCHEDULED);
   }
-
-  /*  __SD_print_dependencies(src);
-     __SD_print_dependencies(dst); */
 }
 
 /**
@@ -965,8 +977,12 @@ void SD_task_unschedule(SD_task_t task)
            SD_task_get_name(task));
 
   if (__SD_task_is_scheduled_or_runnable(task)  /* if the task is scheduled or runnable */
-      &&task->kind == SD_TASK_NOT_TYPED)        /* Don't free scheduling data for typed tasks */
+      && ((task->kind == SD_TASK_COMP_PAR_AMDAHL) ||
+          (task->kind == SD_TASK_COMM_PAR_MXN_1D_BLOCK))) { /* Don't free scheduling data for typed tasks */
     __SD_task_destroy_scheduling_data(task);
+    task->workstation_list=NULL;
+    task->workstation_nb = 0;
+  }
 
   if (__SD_task_is_running(task))       /* the task should become SD_FAILED */
     surf_workstation_model->action_cancel(task->surf_action);
@@ -980,7 +996,8 @@ void SD_task_unschedule(SD_task_t task)
   task->start_time = -1.0;
 }
 
-/* Destroys the data memorized by SD_task_schedule. Task state must be SD_SCHEDULED or SD_RUNNABLE.
+/* Destroys the data memorized by SD_task_schedule.
+ * Task state must be SD_SCHEDULED or SD_RUNNABLE.
  */
 static void __SD_task_destroy_scheduling_data(SD_task_t task)
 {
@@ -995,9 +1012,9 @@ static void __SD_task_destroy_scheduling_data(SD_task_t task)
   task->computation_amount = task->communication_amount = NULL;
 }
 
-/* Runs a task. This function is directly called by __SD_task_try_to_run if the task
- * doesn't have to wait in fifos. Otherwise, it is called by __SD_task_just_done when
- * the task gets out of its fifos.
+/* Runs a task. This function is directly called by __SD_task_try_to_run if
+ * the task doesn't have to wait in FIFOs. Otherwise, it is called by
+ * __SD_task_just_done when the task gets out of its FIFOs.
  */
 void __SD_task_really_run(SD_task_t task)
 {
@@ -1012,12 +1029,11 @@ void __SD_task_really_run(SD_task_t task)
               "Task '%s': workstation_list is NULL!",
               SD_task_get_name(task));
 
-
-
   XBT_DEBUG("Really running task '%s'", SD_task_get_name(task));
+  int workstation_nb = task->workstation_nb;
 
   /* set this task as current task for the workstations in sequential mode */
-  for (i = 0; i < task->workstation_nb; i++) {
+  for (i = 0; i < workstation_nb; i++) {
     if (SD_workstation_get_access_mode(task->workstation_list[i]) ==
         SD_WORKSTATION_SEQUENTIAL_ACCESS) {
       task->workstation_list[i]->current_task = task;
@@ -1033,71 +1049,29 @@ void __SD_task_really_run(SD_task_t task)
 
   /* we have to create a Surf workstation array instead of the SimDag
    * workstation array */
-  surf_workstations = xbt_new(void *, task->workstation_nb);
+  surf_workstations = xbt_new(void *, workstation_nb);
 
-  for (i = 0; i < task->workstation_nb; i++)
+  for (i = 0; i < workstation_nb; i++)
     surf_workstations[i] = task->workstation_list[i]->surf_workstation;
 
-  /* It's allowed to pass a NULL vector as cost to mean vector of 0.0 (easing
-   * user's life). Let's deal with it */
-#define cost_or_zero(array,pos) ((array)?(array)[pos]:0.0)
-
-  task->surf_action = NULL;
-  if ((task->workstation_nb == 1)
-      && (cost_or_zero(task->communication_amount, 0) == 0.0)) {
-    task->surf_action =
-        surf_workstation_model->extension.
-        workstation.execute(surf_workstations[0],
-                            cost_or_zero(task->computation_amount, 0));
-  } else if ((task->workstation_nb == 1)
-             && (cost_or_zero(task->computation_amount, 0) == 0.0)) {
-
-    task->surf_action =
-        surf_workstation_model->extension.
-        workstation.communicate(surf_workstations[0], surf_workstations[0],
-                                cost_or_zero(task->communication_amount,
-                                             0), task->rate);
-  } else if ((task->workstation_nb == 2)
-             && (cost_or_zero(task->computation_amount, 0) == 0.0)
-             && (cost_or_zero(task->computation_amount, 1) == 0.0)) {
-    int nb = 0;
-    double value = 0.0;
-
-    for (i = 0; i < task->workstation_nb * task->workstation_nb; i++) {
-      if (cost_or_zero(task->communication_amount, i) > 0.0) {
-        nb++;
-        value = cost_or_zero(task->communication_amount, i);
-      }
-    }
-    if (nb == 1) {
-      task->surf_action =
-          surf_workstation_model->extension.
-          workstation.communicate(surf_workstations[0],
-                                  surf_workstations[1], value, task->rate);
-    }
-  }
-#undef cost_or_zero
+  double *computation_amount = xbt_new0(double, workstation_nb);
+  double *communication_amount = xbt_new0(double, workstation_nb * workstation_nb);
 
-  if (!task->surf_action) {
-    double *computation_amount = xbt_new(double, task->workstation_nb);
-    double *communication_amount = xbt_new(double, task->workstation_nb *
-                                           task->workstation_nb);
 
+  if(task->computation_amount)
     memcpy(computation_amount, task->computation_amount, sizeof(double) *
-           task->workstation_nb);
+           workstation_nb);
+  if(task->communication_amount)
     memcpy(communication_amount, task->communication_amount,
-           sizeof(double) * task->workstation_nb * task->workstation_nb);
+           sizeof(double) * workstation_nb * workstation_nb);
 
-    task->surf_action =
+  task->surf_action =
         surf_workstation_model->extension.
-        workstation.execute_parallel_task(task->workstation_nb,
+        workstation.execute_parallel_task(workstation_nb,
                                           surf_workstations,
                                           computation_amount,
                                           communication_amount,
                                           task->rate);
-  } else {
-    xbt_free(surf_workstations);
-  }
 
   surf_workstation_model->action_data_set(task->surf_action, task);
 
@@ -1115,10 +1089,11 @@ void __SD_task_really_run(SD_task_t task)
 
 }
 
-/* Tries to run a task. This function is called by SD_simulate() when a scheduled task becomes SD_RUNNABLE
- * (ie when its dependencies are satisfied).
- * If one of the workstations where the task is scheduled on is busy (in sequential mode),
- * the task doesn't start.
+/* Tries to run a task. This function is called by SD_simulate() when a
+ * scheduled task becomes SD_RUNNABLE (i.e., when its dependencies are
+ * satisfied).
+ * If one of the workstations where the task is scheduled on is busy (in
+ * sequential mode), the task doesn't start.
  * Returns whether the task has started.
  */
 int __SD_task_try_to_run(SD_task_t task)
@@ -1140,11 +1115,11 @@ int __SD_task_try_to_run(SD_task_t task)
 
   XBT_DEBUG("Task '%s' can start: %d", SD_task_get_name(task), can_start);
 
-  if (!can_start) {             /* if the task cannot start and is not in the fifos yet */
+  if (!can_start) {             /* if the task cannot start and is not in the FIFOs yet */
     for (i = 0; i < task->workstation_nb; i++) {
       workstation = task->workstation_list[i];
       if (workstation->access_mode == SD_WORKSTATION_SEQUENTIAL_ACCESS) {
-        XBT_DEBUG("Pushing task '%s' in the fifo of workstation '%s'",
+        XBT_DEBUG("Pushing task '%s' in the FIFO of workstation '%s'",
                SD_task_get_name(task),
                SD_workstation_get_name(workstation));
         xbt_fifo_push(workstation->task_fifo, task);
@@ -1163,7 +1138,7 @@ int __SD_task_try_to_run(SD_task_t task)
 
 /* This function is called by SD_simulate when a task is done.
  * It updates task->state and task->action and executes if necessary the tasks
- * which were waiting in fifos for the end of `task'
+ * which were waiting in FIFOs for the end of `task'
  */
 void __SD_task_just_done(SD_task_t task)
 {
@@ -1193,14 +1168,14 @@ void __SD_task_just_done(SD_task_t task)
   XBT_DEBUG("Looking for candidates");
 
   /* if the task was executed on sequential workstations,
-     maybe we can execute the next task of the fifo for each workstation */
+     maybe we can execute the next task of the FIFO for each workstation */
   for (i = 0; i < task->workstation_nb; i++) {
     workstation = task->workstation_list[i];
     XBT_DEBUG("Workstation '%s': access_mode = %d",
               SD_workstation_get_name(workstation), (int)workstation->access_mode);
     if (workstation->access_mode == SD_WORKSTATION_SEQUENTIAL_ACCESS) {
       xbt_assert(workstation->task_fifo != NULL,
-                  "Workstation '%s' has sequential access but no fifo!",
+                  "Workstation '%s' has sequential access but no FIFO!",
                   SD_workstation_get_name(workstation));
       xbt_assert(workstation->current_task =
                   task, "Workstation '%s': current task should be '%s'",
@@ -1210,7 +1185,7 @@ void __SD_task_just_done(SD_task_t task)
       /* the task is over so we can release the workstation */
       workstation->current_task = NULL;
 
-      XBT_DEBUG("Getting candidate in fifo");
+      XBT_DEBUG("Getting candidate in FIFO");
       candidate =
           xbt_fifo_get_item_content(xbt_fifo_get_first_item
                                     (workstation->task_fifo));
@@ -1228,9 +1203,10 @@ void __SD_task_just_done(SD_task_t task)
       /* if there was a task waiting for my place */
       if (candidate != NULL) {
         /* Unfortunately, we are not sure yet that we can execute the task now,
-           because the task can be waiting more deeply in some other workstation's fifos...
-           So we memorize all candidate tasks, and then we will check for each candidate
-           whether or not all its workstations are available. */
+           because the task can be waiting more deeply in some other
+           workstation's FIFOs ...
+           So we memorize all candidate tasks, and then we will check for each
+           candidate whether or not all its workstations are available. */
 
         /* realloc if necessary */
         if (candidate_nb == candidate_capacity) {
@@ -1265,7 +1241,7 @@ void __SD_task_just_done(SD_task_t task)
       workstation = candidate->workstation_list[j];
 
       /* I can start on this workstation if the workstation is shared
-         or if I am the first task in the fifo */
+         or if I am the first task in the FIFO */
       can_start = workstation->access_mode == SD_WORKSTATION_SHARED_ACCESS
           || candidate ==
           xbt_fifo_get_item_content(xbt_fifo_get_first_item
@@ -1280,14 +1256,14 @@ void __SD_task_just_done(SD_task_t task)
       for (j = 0; j < candidate->workstation_nb && can_start; j++) {
         workstation = candidate->workstation_list[j];
 
-        /* update the fifo */
+        /* update the FIFO */
         if (workstation->access_mode == SD_WORKSTATION_SEQUENTIAL_ACCESS) {
           candidate = xbt_fifo_shift(workstation->task_fifo);   /* the return value is stored just for debugging */
-          XBT_DEBUG("Head of the fifo: '%s'",
+          XBT_DEBUG("Head of the FIFO: '%s'",
                  (candidate !=
                   NULL) ? SD_task_get_name(candidate) : "NULL");
           xbt_assert(candidate == candidates[i],
-                      "Error in __SD_task_just_done: bad first task in the fifo");
+                      "Error in __SD_task_just_done: bad first task in the FIFO");
         }
       }                         /* for each workstation */
 
@@ -1395,21 +1371,23 @@ void SD_task_distribute_comp_amdhal(SD_task_t task, int ws_count)
 /** @brief Auto-schedules a task.
  *
  * Auto-scheduling mean that the task can be used with SD_task_schedulev(). This
- * allows to specify the task costs at creation, and decorelate them from the
+ * allows to specify the task costs at creation, and decouple them from the
  * scheduling process where you just specify which resource should deliver the
  * mandatory power.
  *
- * To be auto-schedulable, a task must be created with SD_task_create_comm_e2e() or
- * SD_task_create_comp_seq(). Check their definitions for the exact semantic of each
- * of them.
+ * To be auto-schedulable, a task must be created with SD_task_create_comm_e2e()
+ * or SD_task_create_comp_seq(). Check their definitions for the exact semantic
+ * of each of them.
  *
  * @todo
  * We should create tasks kind for the following categories:
  *  - Point to point communication (done)
  *  - Sequential computation       (done)
  *  - group communication (redistribution, several kinds)
- *  - parallel tasks with no internal communication (one kind per speedup model such as amdal)
- *  - idem+ internal communication. Task type not enough since we cannot store comm cost alongside to comp one)
+ *  - parallel tasks with no internal communication (one kind per speedup
+ *    model such as Amdahl)
+ *  - idem+ internal communication. Task type not enough since we cannot store
+ *    comm cost alongside to comp one)
  */
 void SD_task_schedulev(SD_task_t task, int count,
                        const SD_workstation_t * list)
@@ -1425,9 +1403,16 @@ void SD_task_schedulev(SD_task_t task, int count,
     SD_task_distribute_comp_amdhal(task, count);
   case SD_TASK_COMM_E2E:
   case SD_TASK_COMP_SEQ:
-    xbt_assert(task->workstation_nb == count,"Got %d locations, but were expecting %d locations",count,task->workstation_nb);
+    xbt_assert(task->workstation_nb == count,
+               "Got %d locations, but were expecting %d locations",
+               count,task->workstation_nb);
     for (i = 0; i < count; i++)
       task->workstation_list[i] = list[i];
+    if (SD_task_get_kind(task)== SD_TASK_COMP_SEQ && !task->computation_amount){
+      /*This task has failed and is rescheduled. Reset the computation amount*/
+      task->computation_amount = xbt_new0(double, 1);
+      task->computation_amount[0] = task->remains;
+    }
     SD_task_do_schedule(task);
     break;
   default:
@@ -1443,7 +1428,8 @@ void SD_task_schedulev(SD_task_t task, int count,
 
   }
 
-  /* Iterate over all childs and parent being COMM_E2E to say where I am located (and start them if runnable) */
+  /* Iterate over all children and parents being COMM_E2E to say where I am
+   * located (and start them if runnable) */
   if (task->kind == SD_TASK_COMP_SEQ) {
     XBT_VERB("Schedule computation task %s on %s. It costs %.f flops",
           SD_task_get_name(task),
@@ -1487,7 +1473,8 @@ void SD_task_schedulev(SD_task_t task, int count,
       }
     }
   }
-  /* Iterate over all childs and parent being MXN_1D_BLOC to say where I am located (and start them if runnable) */
+  /* Iterate over all children and parents being MXN_1D_BLOCK to say where I am
+   * located (and start them if runnable) */
   if (task->kind == SD_TASK_COMP_PAR_AMDAHL) {
     XBT_VERB("Schedule computation task %s on %d workstations. %.f flops"
              " will be distributed following Amdahl'Law",