From 7baa77e301b3581d4107cbbaab44183aa6b9b918 Mon Sep 17 00:00:00 2001 From: Gabriel Corona Date: Fri, 21 Mar 2014 12:35:13 +0100 Subject: [PATCH] Increase the MC heap size as a quickfix for the heap overflow bug When the model-checker is used, two heaps are used: * one for the application and the simulator; * one for the model-checker. The heaps are initially of size STD_HEAP_SIZE (currently about 19-10MiB) and are next to each other. In some cases, the first heap overflows into the second one leading to curruption of the second heap. One possible symptom of this is a freeze in malloc/sem_wait: In strace: futex(0x998c000, FUTEX_WAIT, 0, NULL The application heap overflows into the model-checker heap and memset0 the semaphore/futex at the beginning of the second heap. In gdb (with "watch 0x998c000"): Old value = 1 New value = 0 memset () at ../sysdeps/x86_64/memset.S:98 98 ../sysdeps/x86_64/memset.S: Aucun fichier ou dossier de ce type. (gdb) bt #0 memset () at ../sysdeps/x86_64/memset.S:98 #1 0x00007ffff7a5419b in mmalloc (mdp=0x8603000, size=8389608) at /home/corona/workspace/simgrid/src/xbt/mmalloc/mmalloc.c:140 #2 0x00007ffff7a55d4f in malloc (n=8389608) at /home/corona/workspace/simgrid/src/xbt/mmalloc/mm_legacy.c:71 #3 0x00007ffff7a55e3a in calloc (nmemb=8389608, size=1) at /home/corona/workspace/simgrid/src/xbt/mmalloc/mm_legacy.c:110 #4 0x00007ffff7948224 in xbt_malloc0 (n=8389608) at /home/corona/workspace/simgrid/include/xbt/sysdep.h:119 #5 smx_ctx_base_factory_create_context_sized (size=8389608, code=0x401299 , argc=1, argv=0x8604900, cleanup_func=0x7ffff7927b78 , process=0x865e400) at /home/corona/workspace/simgrid/src/simix/smx_context_base.c:46 #6 0x00007ffff7a1b64d in smx_ctx_sysv_create_context_sized (size=8389608, code=0x401299 , argc=1, argv=0x8604900, cleanup_func=0x7ffff7927b78 , process=0x865e400) at /home/corona/workspace/simgrid/src/simix/smx_context_sysv.c:127 #7 0x00007ffff7a1b80a in smx_ctx_sysv_create_context (code=0x401299 , argc=1, argv=0x8604900, cleanup_func=0x7ffff7927b78 , process=0x865e400) at /home/corona/workspace/simgrid/src/simix/smx_context_sysv.c:186 #8 0x00007ffff795749d in SIMIX_context_new (code=0x401299 , argc=1, argv=0x8604900, cleanup_func=0x7ffff7927b78 , simix_process=0x865e400) at /home/corona/workspace/simgrid/src/simix/smx_private.h:249 #9 0x00007ffff7958787 in SIMIX_process_create (process=0x7fffffffddc0, name=0x8604a00 "coordinator", code=0x401299 , data=0x8605000, hostname=0x906ec00 "Tremblay", kill_time=-1, argc=1, argv=0x8604900, properties=0x0, auto_restart=0, parent_process=0x865e200) at /home/corona/workspace/simgrid/src/simix/smx_process.c:272 #10 0x00007ffff79580db in SIMIX_pre_process_create (simcall=0x865e298, process=0x7fffffffddc0, name=0x8604a00 "coordinator", code=0x401299 , data=0x8605000, hostname=0x906ec00 "Tremblay", kill_time=-1, argc=1, argv=0x8604900, properties=0x0, auto_restart=0) at /home/corona/workspace/simgrid/src/simix/smx_process.c:208 #11 0x00007ffff795c1d2 in SIMIX_simcall_pre (simcall=0x865e298, value=0) at /home/corona/workspace/simgrid/src/simix/simcalls_generated_case.c:218 #12 0x00007ffff7965842 in simcall_BODY_process_create (process=0x7fffffffddc0, name=0x8604a00 "coordinator", code=0x401299 , data=0x8605000, hostname=0x906ec00 "Tremblay", kill_time=-1, argc=1, argv=0x8604900, properties=0x0, auto_restart=0) at /home/corona/workspace/simgrid/src/simix/simcalls_generated_body.c:682 #13 0x00007ffff796f758 in simcall_process_create (process=0x7fffffffddc0, name=0x8604a00 "coordinator", code=0x401299 , data=0x8605000, hostname=0x906ec00 "Tremblay", kill_time=-1, argc=1, argv=0x8604900, properties=0x0, auto_restart=0) at /home/corona/workspace/simgrid/src/simix/smx_user.c:581 #14 0x00007ffff7927fff in MSG_process_create_with_environment (name=0x8604a00 "coordinator", code=0x401299 , data=0x0, host=0x906eb00, argc=1, argv=0x8604900, properties=0x0) at /home/corona/workspace/simgrid/src/msg/msg_process.c:171 #15 0x00007ffff7927c8c in MSG_process_create_from_SIMIX (process=0x7fffffffdf00, name=0x8604a00 "coordinator", code=0x401299 , data=0x0, hostname=0x906ec00 "Tremblay", kill_time=-1, argc=1, argv=0x8604900, properties=0x0, auto_restart=0, parent_process=0x0) at /home/corona/workspace/simgrid/src/msg/msg_process.c:66 #16 0x00007ffff7949fd8 in parse_process (process=0x7fffffffe000) at /home/corona/workspace/simgrid/src/simix/smx_deployment.c:74 #17 0x00007ffff79bb589 in sg_platf_new_process (process=0x7fffffffe000) at /home/corona/workspace/simgrid/src/surf/sg_platf.c:265 #18 0x00007ffff79fa331 in ETag_surfxml_process () at /home/corona/workspace/simgrid/src/surf/surfxml_parse.c:946 #19 0x00007ffff79f2fcc in surf_parse_lex () at src/surf/simgrid_dtd.c:9166 #20 0x00007ffff79fac4e in _surf_parse () at /home/corona/workspace/simgrid/src/surf/surfxml_parse.c:1045 #21 0x00007ffff794a220 in SIMIX_launch_application (file=0x7fffffffe63b "deploy_bugged1_liveness.xml") at /home/corona/workspace/simgrid/src/simix/smx_deployment.c:125 #22 0x00007ffff7921cb1 in MSG_launch_application (file=0x7fffffffe63b "deploy_bugged1_liveness.xml") at /home/corona/workspace/simgrid/src/msg/msg_deployment.c:32 #23 0x0000000000401cf2 in main (argc=3, argv=0x7fffffffe348) at /home/corona/workspace/simgrid/examples/msg/mc/bugged1_liveness.c:150 The bug is triggered by commit 5a37cda with increases the default stack size (the stacks are allocated in the application heap). Things to change: Solution a: 1) add an (optional) "hard address" to a given heap in order to a heap to overflow on something else; 2) and do not place the heaps next to each other. The problem is that someone might mmap something in the area of the heaps. Solution b: mmap a very big area of memory (say 1TiB) for eac heaps and never grow again. Let the system allocate memory lazily. It might be necessary, to add a parameter fo choose this value in order to be able to handle very large problems. Solution c: Use segmented heaps. But we would need to change both the model-checker and the malloc implementation to do this (i.e. We don't want to do this). Temporary workarounds: * either increase STD_HEAP_SIZE (in the source code); * or use a smaller value for the stack size (--cfg:contexts/stack_size=128). --- src/include/mc/mc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/mc/mc.h b/src/include/mc/mc.h index 494b6516cd..84169bdc77 100644 --- a/src/include/mc/mc.h +++ b/src/include/mc/mc.h @@ -17,7 +17,7 @@ #include "xbt/automaton.h" #include "xbt/dynar.h" -#define STD_HEAP_SIZE 20480000 /* Maximum size of the system's heap */ +#define STD_HEAP_SIZE (50*1024*1024) /* Maximum size of the system's heap */ SG_BEGIN_DECL() -- 2.20.1