1 /* context_raw - fast context switching inspired from System V ucontexts */
3 /* Copyright (c) 2009-2014. The SimGrid Team.
4 * All rights reserved. */
6 /* This program is free software; you can redistribute it and/or modify it
7 * under the terms of the license (GNU LGPL) which comes with this package. */
8 #include "smx_private.h"
9 #include "xbt/parmap.h"
10 #include "xbt/dynar.h"
13 typedef char * raw_stack_t;
14 typedef void (*rawctx_entry_point_t)(void *);
16 typedef struct s_smx_ctx_raw {
17 s_smx_ctx_base_t super; /* Fields of super implementation */
18 char *malloced_stack; /* malloced area containing the stack */
19 raw_stack_t stack_top; /* pointer to stack top (within previous area) */
20 #ifdef TIME_BENCH_PER_SR
21 unsigned int thread; /* Just for measuring purposes */
23 } s_smx_ctx_raw_t, *smx_ctx_raw_t;
25 #ifdef CONTEXT_THREADS
26 static xbt_parmap_t raw_parmap;
27 static smx_ctx_raw_t* raw_workers_context; /* space to save the worker context in each thread */
28 static unsigned long raw_threads_working; /* number of threads that have started their work */
29 static xbt_os_thread_key_t raw_worker_id_key; /* thread-specific storage for the thread id */
32 #ifdef ADAPTIVE_THRESHOLD
33 #define SCHED_ROUND_LIMIT 5
34 xbt_os_timer_t round_time;
35 double par_time,seq_time;
36 double par_ratio,seq_ratio;
37 static unsigned int par_proc_that_ran = 0,seq_proc_that_ran = 0; /* Counters of processes that have run in SCHED_ROUND_LIMIT scheduling rounds */
38 static unsigned int seq_sched_round, par_sched_round; /* Amount of SR that ran serial/parallel*/
41 static unsigned long raw_process_index = 0; /* index of the next process to run in the
42 * list of runnable processes */
43 static smx_ctx_raw_t raw_maestro_context;
45 extern raw_stack_t raw_makecontext(char* malloced_stack, int stack_size,
46 rawctx_entry_point_t entry_point, void* arg);
47 extern void raw_swapcontext(raw_stack_t* old, raw_stack_t new);
53 ".globl _raw_makecontext\n"
57 ".globl raw_makecontext\n"
61 ".globl raw_makecontext\n"
62 ".type raw_makecontext,@function\n"
63 "raw_makecontext:\n"/* Calling convention sets the arguments in rdi, rsi, rdx and rcx, respectively */
65 " mov %rdi,%rax\n" /* stack */
66 " add %rsi,%rax\n" /* size */
67 " andq $-16, %rax\n" /* align stack */
68 " movq $0, -8(%rax)\n" /* @return for func */
69 " mov %rdx,-16(%rax)\n" /* func */
70 " mov %rcx,-24(%rax)\n" /* arg/rdi */
71 " movq $0, -32(%rax)\n" /* rsi */
72 " movq $0, -40(%rax)\n" /* rdx */
73 " movq $0, -48(%rax)\n" /* rcx */
74 " movq $0, -56(%rax)\n" /* r8 */
75 " movq $0, -64(%rax)\n" /* r9 */
76 " movq $0, -72(%rax)\n" /* rbp */
77 " movq $0, -80(%rax)\n" /* rbx */
78 " movq $0, -88(%rax)\n" /* r12 */
79 " movq $0, -96(%rax)\n" /* r13 */
80 " movq $0, -104(%rax)\n" /* r14 */
81 " movq $0, -112(%rax)\n" /* r15 */
89 ".globl _raw_swapcontext\n"
93 ".globl raw_swapcontext\n"
97 ".globl raw_swapcontext\n"
98 ".type raw_swapcontext,@function\n"
99 "raw_swapcontext:\n" /* Calling convention sets the arguments in rdi and rsi, respectively */
113 " mov %rsp,(%rdi)\n" /* old */
114 " mov %rsi,%rsp\n" /* new */
131 #if defined(APPLE) || defined(_WIN32)
133 ".globl _raw_makecontext\n"
134 "_raw_makecontext:\n"
137 ".globl raw_makecontext\n"
138 ".type raw_makecontext,@function\n"
141 " movl 4(%esp),%eax\n" /* stack */
142 " addl 8(%esp),%eax\n" /* size */
143 " andl $-16, %eax\n" /* align stack */
144 " movl 12(%esp),%ecx\n" /* func */
145 " movl 16(%esp),%edx\n" /* arg */
146 " movl %edx, -4(%eax)\n"
147 " movl $0, -8(%eax)\n" /* @return for func */
148 " movl %ecx,-12(%eax)\n"
149 " movl $0, -16(%eax)\n" /* ebp */
150 " movl $0, -20(%eax)\n" /* ebx */
151 " movl $0, -24(%eax)\n" /* esi */
152 " movl $0, -28(%eax)\n" /* edi */
158 #if defined(APPLE) || defined(_WIN32)
160 ".globl _raw_swapcontext\n"
161 "_raw_swapcontext:\n"
164 ".globl raw_swapcontext\n"
165 ".type raw_swapcontext,@function\n"
168 " movl 4(%esp),%eax\n" /* old */
169 " movl 8(%esp),%edx\n" /* new */
174 " movl %esp,(%eax)\n"
185 /* If you implement raw contexts for other processors, don't forget to
186 update the definition of HAVE_RAWCTX in buildtools/Cmake/CompleteInFiles.cmake */
188 raw_stack_t raw_makecontext(char* malloced_stack, int stack_size,
189 rawctx_entry_point_t entry_point, void* arg) {
193 void raw_swapcontext(raw_stack_t* old, raw_stack_t new) {
199 XBT_LOG_EXTERNAL_DEFAULT_CATEGORY(simix_context);
201 #ifdef TIME_BENCH_PER_SR
202 #include "xbt/xbt_os_time.h"
203 #define NUM_THREADS 4
204 static xbt_os_timer_t timer;
205 static double time_thread_sr[NUM_THREADS];
206 static double time_thread_ssr[NUM_THREADS];
207 static double time_wasted_sr = 0;
208 static double time_wasted_ssr = 0;
209 static unsigned int sr_count = 0;
210 static unsigned int ssr_count = 0;
211 static char new_sr = 0;
214 #ifdef TIME_BENCH_ENTIRE_SRS
215 static unsigned int sr_count = 0;
216 static xbt_os_timer_t timer;
219 #ifdef ADAPTIVE_THRESHOLD
220 int reached_seq_limit, reached_par_limit;
223 static void smx_ctx_raw_wrapper(smx_ctx_raw_t context);
224 static int smx_ctx_raw_factory_finalize(smx_context_factory_t *factory);
225 static smx_context_t smx_ctx_raw_create_context(xbt_main_func_t code, int argc,
226 char **argv, void_pfn_smxprocess_t cleanup_func, smx_process_t process);
227 static void smx_ctx_raw_free(smx_context_t context);
228 static void smx_ctx_raw_wrapper(smx_ctx_raw_t context);
229 static void smx_ctx_raw_stop(smx_context_t context);
230 static void smx_ctx_raw_suspend_serial(smx_context_t context);
231 static void smx_ctx_raw_resume_serial(smx_process_t first_process);
232 #ifdef TIME_BENCH_PER_SR
233 static void smx_ctx_raw_runall_serial(xbt_dynar_t processes);
234 void smx_ctx_raw_new_sr(void);
236 static void smx_ctx_raw_runall_serial(void);
238 static void smx_ctx_raw_suspend_parallel(smx_context_t context);
239 static void smx_ctx_raw_resume_parallel(smx_process_t first_process);
240 static void smx_ctx_raw_runall_parallel(void);
241 static void smx_ctx_raw_runall(void);
244 * \brief Initializes the raw context factory.
245 * \param factory where to initialize the factory
247 void SIMIX_ctx_raw_factory_init(smx_context_factory_t *factory)
250 XBT_VERB("Using raw contexts. Because the glibc is just not good enough for us.");
251 smx_ctx_base_factory_init(factory);
253 (*factory)->finalize = smx_ctx_raw_factory_finalize;
254 (*factory)->create_context = smx_ctx_raw_create_context;
255 /* Do not overload that method (*factory)->finalize */
256 (*factory)->free = smx_ctx_raw_free;
257 (*factory)->stop = smx_ctx_raw_stop;
258 (*factory)->name = "smx_raw_context_factory";
260 if (SIMIX_context_is_parallel()) {
261 #ifdef CONTEXT_THREADS
262 int nthreads = SIMIX_context_get_nthreads();
263 xbt_os_thread_key_create(&raw_worker_id_key);
264 raw_parmap = xbt_parmap_new(nthreads, SIMIX_context_get_parallel_mode());
265 raw_workers_context = xbt_new(smx_ctx_raw_t, nthreads);
266 raw_maestro_context=NULL;
269 if (SIMIX_context_get_parallel_threshold() > 1) {
270 /* choose dynamically */
271 (*factory)->runall = smx_ctx_raw_runall;
272 (*factory)->suspend = NULL;
275 /* always parallel */
276 (*factory)->runall = smx_ctx_raw_runall_parallel;
277 (*factory)->suspend = smx_ctx_raw_suspend_parallel;
282 (*factory)->runall = smx_ctx_raw_runall_serial;
283 (*factory)->suspend = smx_ctx_raw_suspend_serial;
285 #ifdef TIME_BENCH_ENTIRE_SRS
286 (*factory)->runall = smx_ctx_raw_runall;
287 (*factory)->suspend = NULL;
288 timer = xbt_os_timer_new();
291 #ifdef ADAPTIVE_THRESHOLD
292 round_time = xbt_os_timer_new();
293 reached_seq_limit = 0;
294 reached_par_limit = 0;
297 #ifdef TIME_BENCH_PER_SR
298 timer = xbt_os_timer_new();
303 * \brief Finalizes the raw context factory.
304 * \param factory the raw context factory
306 static int smx_ctx_raw_factory_finalize(smx_context_factory_t *factory)
308 #ifdef TIME_BENCH_PER_SR
309 XBT_VERB("Total wasted time in %u SR: %f", sr_count, time_wasted_sr);
310 XBT_VERB("Total wasted time in %u SSR: %f", ssr_count, time_wasted_ssr);
313 #ifdef CONTEXT_THREADS
315 xbt_parmap_destroy(raw_parmap);
316 xbt_free(raw_workers_context);
318 return smx_ctx_base_factory_finalize(factory);
322 * \brief Creates a new raw context.
323 * \param code main function of this context or NULL to create the maestro
325 * \param argc argument number
326 * \param argv arguments to pass to the main function
327 * \param cleanup_func a function to call to free the user data when the
329 * \param process SIMIX process
332 smx_ctx_raw_create_context(xbt_main_func_t code, int argc, char **argv,
333 void_pfn_smxprocess_t cleanup_func,
334 smx_process_t process)
337 smx_ctx_raw_t context =
338 (smx_ctx_raw_t) smx_ctx_base_factory_create_context_sized(
339 sizeof(s_smx_ctx_raw_t),
346 /* if the user provided a function for the process then use it,
347 otherwise it is the context for maestro */
349 context->malloced_stack = SIMIX_context_stack_new();
351 raw_makecontext(context->malloced_stack,
352 smx_context_usable_stack_size,
353 (void_f_pvoid_t)smx_ctx_raw_wrapper, context);
356 if(process != NULL && raw_maestro_context==NULL)
357 raw_maestro_context = context;
360 MC_ignore_heap(&(raw_maestro_context->stack_top), sizeof(raw_maestro_context->stack_top));
364 return (smx_context_t) context;
368 * \brief Destroys a raw context.
369 * \param context a raw context
371 static void smx_ctx_raw_free(smx_context_t context)
374 SIMIX_context_stack_delete(((smx_ctx_raw_t) context)->malloced_stack);
376 smx_ctx_base_free(context);
380 * \brief Wrapper for the main function of a context.
381 * \param context a raw context
383 static void smx_ctx_raw_wrapper(smx_ctx_raw_t context)
385 (context->super.code) (context->super.argc, context->super.argv);
387 smx_ctx_raw_stop((smx_context_t) context);
391 * \brief Stops a raw context.
393 * This function is called when the main function of the context if finished.
395 * \param context the current context
397 static void smx_ctx_raw_stop(smx_context_t context)
399 smx_ctx_base_stop(context);
400 simix_global->context_factory->suspend(context);
404 * \brief Suspends a running context and resumes another one or returns to
406 * \param context the current context
408 static void smx_ctx_raw_suspend_serial(smx_context_t context)
410 /* determine the next context */
411 smx_context_t next_context;
413 #ifdef TIME_BENCH_PER_SR
414 i = ++raw_process_index;
416 i = raw_process_index++;
418 if (i < xbt_dynar_length(simix_global->process_to_run)) {
419 /* execute the next process */
420 XBT_DEBUG("Run next process");
421 next_context = xbt_dynar_get_as(
422 simix_global->process_to_run, i, smx_process_t)->context;
425 /* all processes were run, return to maestro */
426 XBT_DEBUG("No more process to run");
427 next_context = (smx_context_t) raw_maestro_context;
429 SIMIX_context_set_current(next_context);
430 raw_swapcontext(&((smx_ctx_raw_t) context)->stack_top,
431 ((smx_ctx_raw_t) next_context)->stack_top);
435 * \brief Resumes sequentially all processes ready to run.
436 * \param first_process the first process to resume
438 static void smx_ctx_raw_resume_serial(smx_process_t first_process)
440 smx_ctx_raw_t context = (smx_ctx_raw_t) first_process->context;
441 SIMIX_context_set_current((smx_context_t) context);
442 raw_swapcontext(&raw_maestro_context->stack_top,
443 ((smx_ctx_raw_t) context)->stack_top);
446 #ifdef TIME_BENCH_PER_SR
447 static void smx_ctx_raw_runall_serial(xbt_dynar_t processes)
449 smx_process_t process;
453 unsigned long num_proc = xbt_dynar_length(simix_global->process_to_run);
455 unsigned int data_size = (num_proc / NUM_THREADS) + ((num_proc % NUM_THREADS) ? 1 : 0);
458 time_thread_ssr[0] = 0;
459 xbt_dynar_foreach(processes, cursor, process){
460 XBT_VERB("Schedule item %u of %lu",cursor,num_proc);
461 if(cursor >= t * data_size + data_size){
462 if(time_thread_ssr[t] > tmax)
463 tmax = time_thread_ssr[t];
465 time_thread_ssr[t] = 0;
469 ((smx_ctx_raw_t)process->context)->thread = t;
470 time_thread_sr[t] = 0;
473 xbt_os_cputimer_start(timer);
474 smx_ctx_raw_resume_serial(process);
475 xbt_os_cputimer_stop(timer);
476 elapsed = xbt_os_timer_elapsed(timer);
477 time_thread_ssr[t] += elapsed;
478 time_thread_sr[((smx_ctx_raw_t)process->context)->thread] += elapsed;
484 if(time_thread_ssr[t] > tmax)
485 tmax = time_thread_ssr[t];
487 for(cursor=0; cursor <= t; cursor++){
488 XBT_VERB("Time SSR thread %u = %f (max %f)", cursor, time_thread_ssr[cursor], tmax);
489 time_wasted_ssr += tmax - time_thread_ssr[cursor];
493 void smx_ctx_raw_new_sr(void)
499 for(i=0; i < NUM_THREADS; i++){
500 if(time_thread_sr[i] > tmax)
501 tmax = time_thread_sr[i];
504 for(i=0; i < NUM_THREADS; i++){
505 XBT_CRITICAL("Time SR thread %u = %f (max %f)", i, time_thread_sr[i], tmax);
506 time_wasted_sr += tmax - time_thread_sr[i];
509 XBT_CRITICAL("Total time SR %u = %f, %d", sr_count, tmax, xbt_dynar_length(simix_global->process_that_ran));
510 XBT_CRITICAL("New scheduling round");
514 * \brief Resumes sequentially all processes ready to run.
516 static void smx_ctx_raw_runall_serial(void)
518 smx_process_t first_process =
519 xbt_dynar_get_as(simix_global->process_to_run, 0, smx_process_t);
520 raw_process_index = 1;
522 /* execute the first process */
523 smx_ctx_raw_resume_serial(first_process);
528 * \brief Suspends a running context and resumes another one or returns to
529 * the main function of the current worker thread.
530 * \param context the context of the current worker thread
532 static void smx_ctx_raw_suspend_parallel(smx_context_t context)
534 #ifdef CONTEXT_THREADS
535 /* determine the next context */
536 smx_process_t next_work = xbt_parmap_next(raw_parmap);
537 smx_context_t next_context;
538 raw_stack_t next_stack;
540 if (next_work != NULL) {
541 /* there is a next process to resume */
542 XBT_DEBUG("Run next process");
543 next_context = next_work->context;
544 next_stack = ((smx_ctx_raw_t) next_context)->stack_top;
547 /* all processes were run, go to the barrier */
548 XBT_DEBUG("No more processes to run");
550 unsigned long worker_id =
551 (unsigned long) xbt_os_thread_get_specific(raw_worker_id_key);
553 next_context = (smx_context_t)raw_workers_context[worker_id];
554 XBT_DEBUG("Restoring worker stack %lu (working threads = %lu)",
555 worker_id, raw_threads_working);
556 next_stack = ((smx_ctx_raw_t)next_context)->stack_top;
559 SIMIX_context_set_current(next_context);
560 raw_swapcontext(&((smx_ctx_raw_t) context)->stack_top, next_stack);
565 * \brief Resumes sequentially in the current worker thread the processes ready
567 * \param first_process the first process to resume
569 static void smx_ctx_raw_resume_parallel(smx_process_t first_process)
571 #ifdef CONTEXT_THREADS
572 unsigned long worker_id = __sync_fetch_and_add(&raw_threads_working, 1);
573 xbt_os_thread_set_specific(raw_worker_id_key, (void*) worker_id);
574 smx_ctx_raw_t worker_context = (smx_ctx_raw_t)SIMIX_context_self();
575 raw_workers_context[worker_id] = worker_context;
576 XBT_DEBUG("Saving worker stack %lu", worker_id);
577 raw_stack_t* worker_stack = &(worker_context)->stack_top;
580 smx_context_t context = first_process->context;
581 SIMIX_context_set_current(context);
582 raw_swapcontext(worker_stack, ((smx_ctx_raw_t) context)->stack_top);
587 * \brief Resumes in parallel all processes ready to run.
589 static void smx_ctx_raw_runall_parallel(void)
591 #ifdef CONTEXT_THREADS
592 raw_threads_working = 0;
593 xbt_parmap_apply(raw_parmap, (void_f_pvoid_t) smx_ctx_raw_resume_parallel,
594 simix_global->process_to_run);
596 xbt_die("You asked for a parallel execution, but you don't have any threads.");
601 * \brief Resumes all processes ready to run.
603 #ifdef ADAPTIVE_THRESHOLD
604 static void smx_ctx_raw_runall(void)
606 unsigned long nb_processes = xbt_dynar_length(simix_global->process_to_run);
607 reached_seq_limit = (seq_sched_round % SCHED_ROUND_LIMIT == 0);
608 reached_par_limit = (par_sched_round % SCHED_ROUND_LIMIT == 0);
610 if(reached_par_limit){
612 par_ratio = (par_proc_that_ran != 0) ? (par_time / (double)par_proc_that_ran) : 0;
613 par_time = 0; par_proc_that_ran = 0;
616 if(reached_seq_limit){
618 seq_ratio = (seq_proc_that_ran != 0) ? (seq_time / (double)seq_proc_that_ran) : 0;
619 seq_time = 0; seq_proc_that_ran = 0;
622 if(reached_seq_limit && reached_par_limit){
623 if(seq_ratio > par_ratio){
624 SIMIX_context_set_parallel_threshold(SIMIX_context_get_parallel_threshold() - 1);
626 SIMIX_context_set_parallel_threshold(SIMIX_context_get_parallel_threshold() + 1);
630 XBT_CRITICAL("Adaptive Algorithm. Parallel Threshold is: %d. Processes: %d", SIMIX_context_get_parallel_threshold(), nb_processes);
631 if (nb_processes >= SIMIX_context_get_parallel_threshold()) {
632 XBT_DEBUG("Runall // %lu", nb_processes);
633 simix_global->context_factory->suspend = smx_ctx_raw_suspend_parallel;
634 xbt_os_cputimer_start(round_time);
635 smx_ctx_raw_runall_parallel();
636 xbt_os_cputimer_stop(round_time);
637 par_time += xbt_os_timer_elapsed(round_time);
638 par_proc_that_ran += nb_processes;
641 XBT_DEBUG("Runall serial %lu", nb_processes);
642 simix_global->context_factory->suspend = smx_ctx_raw_suspend_serial;
643 xbt_os_cputimer_start(round_time);
644 #ifdef TIME_BENCH_PER_SR
645 smx_ctx_raw_runall_serial(simix_global->process_to_run);
647 smx_ctx_raw_runall_serial();
649 xbt_os_cputimer_stop(round_time);
650 seq_time += xbt_os_timer_elapsed(round_time);
651 seq_proc_that_ran += nb_processes;
658 static void smx_ctx_raw_runall(void)
660 #ifdef TIME_BENCH_ENTIRE_SRS
662 timer = xbt_os_timer_new();
665 unsigned long nb_processes = xbt_dynar_length(simix_global->process_to_run);
666 if (SIMIX_context_is_parallel() && SIMIX_context_get_parallel_threshold()<nb_processes) {
667 XBT_DEBUG("Runall // %lu", nb_processes);
668 simix_global->context_factory->suspend = smx_ctx_raw_suspend_parallel;
670 #ifdef TIME_BENCH_ENTIRE_SRS
671 xbt_os_cputimer_start(timer);
674 smx_ctx_raw_runall_parallel();
676 #ifdef TIME_BENCH_ENTIRE_SRS
677 xbt_os_cputimer_stop(timer);
678 elapsed = xbt_os_timer_elapsed(timer);
681 XBT_DEBUG("Runall serial %lu", nb_processes);
682 simix_global->context_factory->suspend = smx_ctx_raw_suspend_serial;
684 #ifdef TIME_BENCH_PER_SR
685 smx_ctx_raw_runall_serial(simix_global->process_to_run);
688 #ifdef TIME_BENCH_ENTIRE_SRS
689 xbt_os_cputimer_start(timer);
692 smx_ctx_raw_runall_serial();
694 #ifdef TIME_BENCH_ENTIRE_SRS
695 xbt_os_cputimer_stop(timer);
696 elapsed = xbt_os_timer_elapsed(timer);
701 #ifdef TIME_BENCH_ENTIRE_SRS
702 XBT_CRITICAL("Total time SR %u = %f, %d", sr_count, elapsed, nb_processes);