1 -- A SimGrid Lua implementation of the Chord DHT
3 -- Copyright (c) 2011-2012, 2014. The SimGrid Team.
4 -- All rights reserved.
6 -- This program is free software; you can redistribute it and/or modify it
7 -- under the terms of the license (GNU LGPL) which comes with this package.
16 max_simulation_time = 1000
18 fix_fingers_delay = 120
19 check_predecessor_delay = 120
22 -- current node (don't worry, globals are duplicated in each simulated process)
24 -- FIXME: my_id does not exist.
26 next_finger_to_fix = 1,
32 -- Main function of each Chord process
35 -- - the id of a guy I know in the system (except for the first node)
38 simgrid.debug("Hi! This is my first message; I just entered the program!")
39 -- TODO simplify the deployment file
42 my_node.id = math.tointeger(args[1])
43 simgrid.debug("My id is now " .. my_node.id)
45 known_id = math.tointeger(args[2])
46 simgrid.info("Updated my known_id to " .. known_id)
49 -- initialize the node and the fingertable;
50 -- at the beginning, this node only knows itself (we need to discover others)
51 for i = 1, nb_bits, 1 do
52 my_node.fingers[i] = my_node.id
55 -- Let's make sure we can receive messages!
56 my_node.comm_recv = simgrid.task.irecv(my_node.id)
59 local join_success = false
60 --simgrid.info(known_id)
62 if known_id == nil then
63 -- only the first node ("Jacqueline") will enter here
64 -- as configured in file ../../msg/chord/chord.xml
65 simgrid.debug("I'm the node that is in charge. Going to create everything.")
69 -- Communicate to the first node and join the ring
70 -- This will also initialize
71 -- my_node.predecessor and my_node.successor
72 join_success = join(known_id)
75 -- At this point, finger[1] does not necessarily actually point
76 -- to the *real* successor; it might be that the first node still
77 -- didn't notify us that another node joined with
78 -- an ID that satisfies my_id <= ID <= current_successorId
80 -- TODO Remove this, but make sure my_node.predecessor is initialized somewhere
81 --if my_node.id == 1 then
82 --my_node.predecessor = 1
88 local now = simgrid.get_clock()
89 local next_stabilize_date = now + stabilize_delay
90 local next_fix_fingers_date = now + fix_fingers_delay
91 local next_check_predecessor_date = now + check_predecessor_delay
92 local next_lookup_date = now + lookup_delay
96 simgrid.debug("I'm now entering the main loop.")
98 while now < max_simulation_time do
100 task, err = my_node.comm_recv:test()
101 simgrid.info(now .. " " .. next_stabilize_date .. " " .. now + stabilize_delay .. " " .. next_fix_fingers_date .. " " .. next_check_predecessor_date .. " " .. next_lookup_date)
104 -- I received a task: answer it
105 simgrid.info("I received a task of type '" .. task.type .."'! My id is " .. my_node.id)
106 my_node.comm_recv = simgrid.task.irecv(my_node.id)
109 -- the communication has failed: nevermind, we'll try again
110 simgrid.info("Error while receiving a task! My id is " .. my_node.id)
111 my_node.comm_recv = simgrid.task.irecv(my_node.id)
113 -- no task was received: do periodic calls
115 if now >= next_stabilize_date then
116 simgrid.debug("Stabilizing...")
118 simgrid.debug("Finished stabilizing!")
119 next_stabilize_date = simgrid.get_clock() + stabilize_delay
121 --elseif now >= next_fix_fingers_date then
123 --next_fix_fingers_date = simgrid.get_clock() + fix_fingers_delay
125 --elseif now >= next_check_predecessor_date then
126 --check_predecessor()
127 --next_check_predecessor_date = simgrid.get_clock() + check_predecessor_delay
129 --elseif now >= next_lookup_date then
131 --simgrid.debug("I'm now executing a lookup, as lookup_delay makes me do this. " .. simgrid.get_clock())
132 --next_lookup_date = simgrid.get_clock() + lookup_delay
135 ---- nothing to do: sleep for a while
136 simgrid.debug("Didn't have to stabilize, update my fingers, check my predecessors or do a random lookup; hence, I'm starting to sleep now...")
137 simgrid.process.sleep(5)
138 simgrid.debug("Slept for 5s")
141 now = simgrid.get_clock()
149 -- Makes the current node leave the ring
152 simgrid.info("Leaving the ring, because max_simulation_time was reached.")
153 -- TODO: notify others
156 -- This function is called when the current node receives a task
157 -- and can not immediately deal with it; for instance, if the host
158 -- waits on a response for a 'find successor' query but receives a
159 -- 'get predecessor' message instead; we cannot just discard this
160 -- message so we deal with it here.
162 -- - task: the task received
163 function handle_task(task)
165 simgrid.debug("Handling task in handle_task()")
166 local type = task.type
168 if type == "find successor" then
170 task.answer_to = math.tointeger(task.answer_to)
171 task.request_id = math.tointeger(task.request_id)
173 simgrid.info("Received a 'find successor' request from " .. string.format("%d", task.answer_to) ..
174 " for id " .. string.format("%d", task.request_id))
176 -- Is my successor have the right host? This can happen if there are holes
177 -- in the ring; for instance, if my id is 13 and my successor is 17 and
178 -- 14,15,16 don't exist but I'm asked for 15, then yes, 17 is the right
179 -- answer to the request.
181 -- Test: my_node.id + 1 <= task.request_id <= my_node.fingers[1]
183 -- TODO: Why the +1? We could receive this message from a host that forwarded
184 -- this message (and the original sender doesn't know us),
185 -- so why do we exclude ourselves?
186 if is_in_interval(task.request_id, my_node.id + 1, my_node.fingers[1]) then
188 simgrid.info("Sending back a 'find successor answer' to " ..
189 string.format("%d", task.answer_to) .. ": the successor of " .. string.format("%d", task.request_id) ..
190 " is " .. string.format("%d", my_node.fingers[1]))
192 task.type = "find successor answer"
193 -- TODO: Can we remove the "" here?
194 task.answer = math.tointeger(my_node.fingers[1])
195 simgrid.info("Answer" .. task.answer)
196 task:dsend(math.tointeger(task.answer_to))
198 -- forward the request to the closest preceding finger in my table
200 simgrid.info("Forwarding the 'find successor' request to my closest preceding finger")
201 task:dsend(closest_preceding_node(task.request_id))
204 elseif type == "get predecessor" then
205 simgrid.info("Received a 'find predecessor' request from " .. string.format("%d", task.answer_to) ..
206 " for id. Sending back an answer.")
208 task.type = "get predecessor answer"
210 --for i,v in pairs(my_node) do
211 --print(my_node.id, i, v)
213 --print(my_node.predecessor)
214 if my_node.predecessor ~= nil then
215 task.answer = math.tointeger(my_node.predecessor)
216 --print(my_node.predecessor)
218 -- FIXME: This is completely wrong here. Fix this;
219 -- we need to figure out what to send if we don't know our
220 -- predecessor yet (this DOES happen and this means that task.answer
221 -- is initialised with nil and when task.answer is accessed (not here), it will
222 -- break in Lua 5.3 (because it is nil).
223 simgrid.critical("Don't know my predecessor yet!")
224 my_node.predecessor = remote_get_predecessor(my_node.fingers[1])
225 task.answer = my_node.predecessor
228 --print("It will break now, since task.answer is nil here.")
229 --task.answer = my_node.predecessor
232 task:dsend(math.tointeger(task.answer_to))
233 --print("After dsend returned")
235 elseif type == "notify" then
236 -- someone is telling me that he may be my new predecessor
237 simgrid.info("Host id " .. task.request_id .. " wants to be my predecessor")
238 notify(math.tointeger(task.request_id))
240 elseif type == "predecessor leaving" then
242 simgrid.debug("predecessor leaving")
244 elseif type == "successor_leaving" then
245 -- TODO: We could / should use something like table.remove(my_node.fingers, 1) here
248 elseif type == "find successor answer" then
249 -- ignoring, this is handled in remote_find_successor
252 elseif type == "get predecessor answer" then
253 -- ignoring, this is already handled in
256 error("Unknown type of task received: " .. task.type)
259 simgrid.info("I'm leaving handle_task() now.")
262 -- Returns whether an id belongs to the interval [a, b[.
263 -- The parameters are normalized to make sure they are between 0 and nb_keys - 1.
264 -- 1 belongs to [62, 3].
265 -- 1 does not belong to [3, 62].
266 -- 63 belongs to [62, 3].
267 -- 63 does not belong to [3, 62].
268 -- 24 belongs to [21, 29].
269 -- 24 does not belong to [29, 21].
270 function is_in_interval(id, a, b)
271 -- normalize the parameters
272 -- TODO: Currently, nb_bits = 24; so a,b,id < 24! Really?
277 -- make sure a <= b and a <= id
289 -- Returns whether the current node is in the ring.
290 function has_joined()
292 return my_node.fingers[my_node.id] ~= nil
295 -- Creates a new Chord ring.
297 simgrid.debug("I've now initialized my predecessor and fingertable.")
298 --my_node.predecessor = my_node.id
299 my_node.predecessor = nil
300 my_node.fingers[1] = my_node.id
303 -- Attemps to join the Chord ring.
304 -- - known_id: id of a node already in the ring
305 -- - return value: true if the join was successful
306 function join(known_id)
308 simgrid.info("Joining the ring with id " .. my_node.id .. ", knowing node " .. known_id)
310 local successor = remote_find_successor(known_id, my_node.id)
311 simgrid.info("Returned from remote_find_successor; my successor is " .. successor)
312 if successor == nil then
313 simgrid.critical("Cannot join the ring.")
317 -- We don't know the predecessor yet, so we initialize it with NULL
318 my_node.predecessor = nil
319 my_node.fingers[1] = successor
321 -- Everything was successfull!
325 -- Returns the closest preceding finger of an id with respect to the finger
326 -- table of the current node.
327 -- - id: the id to find
328 -- - return value: the closest preceding finger of that id
329 function closest_preceding_node(id)
331 for i = nb_bits, 1, -1 do
332 if is_in_interval(my_node.fingers[i], my_node.id + 1, id - 1) then
333 -- finger i is the first one before id
334 simgrid.info("fix_fingers: The closest preceding node for " .. id .. " is finger " .. i .. " (node " .. my_node.fingers[i] .. ")")
335 return my_node.fingers[i]
340 -- Finds the successor of an id
341 -- id: the id to find
342 -- return value: the id of the successor, or nil if the request failed
343 function find_successor(id)
345 if is_in_interval(id, my_node.id + 1, my_node.fingers[1]) then
346 -- my successor is the successor
347 simgrid.info("Looking for successor of " .. id .. ", but I determined it's my own successor: " .. my_node.fingers[1])
348 return my_node.fingers[1]
350 -- ask to the closest preceding finger in my table
351 simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
352 local ask_to = closest_preceding_node(id)
353 simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
354 return remote_find_successor(ask_to, id)
359 -- Asks a remote node the successor of an id.
360 -- ask_to: id of a remote node to ask to
361 -- id: the id to find
362 -- return value: the id of the successor, or nil if the request failed
363 function remote_find_successor(ask_to, id)
365 local task = simgrid.task.new("", comp_size, comm_size)
366 task.type = "find successor"
367 task.request_id = id -- This is the id we want to find
368 task.answer_to = my_node.id -- This is where the answer needs to go
371 simgrid.info("Sending a 'find successor' request to " .. ask_to .. " for id " .. id .. " (timeout=".. timeout .. ")")
372 if task:send(ask_to, timeout) then
373 -- request successfully sent: wait for an answer
376 simgrid.info("New iteration in while loop of remote_find_successor(); I'm still waiting for a response!")
377 --print(task.request_id)
378 simgrid.info("Starting to wait for a message; timeout=" .. timeout)
379 task = my_node.comm_recv:wait(timeout)
380 simgrid.info("Finished to wait")
381 -- TODO Do we need this?
382 --for i,v in pairs(task) do
385 --simgrid.info("I will crash!")
386 --task.answer = math.tointeger(task.answer)
387 --simgrid.info("Ich denke task.type ist leer")
388 --simgrid.info("Before irecv: " .. my_node.id)
390 -- Even if the recv above failed (timeout occurred) -- we want to be
391 -- able to receive a message if it comes in, even without us explicitly
392 -- calling the recv() method.
393 my_node.comm_recv = simgrid.task.irecv(my_node.id)
396 -- failed to receive the answer
399 -- a task was received: is it the expected answer (i.e., the response to
400 -- our query and for the id we're interested in)
401 if task.type ~= "find successor answer" or task.request_id ~= id then
402 -- this is not our answer, but we still need to handle it.
403 simgrid.info("Wrong request of type " .. task.type .. " received, expected 'find successor answer'")
407 -- this is our answer
408 simgrid.info("Received the answer to my 'find successor' request for id " ..
409 id .. ": the successor is " .. task.answer)
411 -- TODO: Do we need math.tointeger here?
412 return math.tointeger(task.answer)
417 simgrid.info("Failed to send the 'find successor' request to " .. ask_to ..
421 -- This can never be reached, because if this host finds the successor, it
422 -- will return it right away!
423 simgrid.info("Whooops! I should never reach this statement, because I didn't find a successor!")
425 -- We need to return the successor here
428 -- Asks a remote node its predecessor.
429 -- ask_to: id of a remote node to ask to
430 -- return value: the id of its predecessor, or nil if the request failed
431 function remote_get_predecessor(ask_to)
433 local task = simgrid.task.new("", comp_size, comm_size)
434 task.type = "get predecessor"
435 task.answer_to = math.tointeger(my_node.id)
436 -- TODO c.heinrich: Remove this
437 --task.note = "Bla " .. ask_to .. " at time " .. simgrid.get_clock()
439 simgrid.info("Sending request for '" .. task.type .."' to id '" .. ask_to .. "'")
440 if task:send(ask_to, timeout) then
441 simgrid.info("Done sending the request to " .. ask_to)
442 -- request successfully sent: wait for an answer
443 -- We need to iterate here because we might receive other
444 -- messages too (but not the answer to the request we just sent);
445 -- hence, we loop here.
447 simgrid.info("Starting to wait. My id: " .. my_node.id)
448 task = my_node.comm_recv:wait(timeout)
449 simgrid.info("Finished to wait. My id: " .. my_node.id .. " ask_to is " .. ask_to)
450 my_node.comm_recv = simgrid.task.irecv(my_node.id)
453 -- failed to receive the answer
454 simgrid.info("Task not received - is null?")
457 -- a task was received: is it the expected answer?
458 if task.type ~= "get predecessor answer" then
459 -- this is not our answer
460 simgrid.info("Task is NOT 'get predecessor answer'")
463 -- this is our answer
464 -- FIXME make sure the message answers to this particular request
465 --simgrid.info(task.answer)
466 for i,v in pairs(task) do
467 print(my_node.id, i, v)
469 simgrid.info("Task is answer for predecessor! The answer is: ")
470 if (task.answer) then print("NIL!\n") else print("Not NIL\n") end
480 -- Checks the immediate successor of the current node.
483 local successor = my_node.fingers[1]
484 if successor ~= my_node.id then
485 simgrid.info("Getting remote predecessor from ".. successor)
486 candidate = remote_get_predecessor(successor)
487 simgrid.info("Received ".. candidate .. " as candidate")
489 candidate = my_node.predecessor
492 simgrid.info("Still stabilizing")
493 -- candidate might become my new successor
494 if candidate ~= nil and is_in_interval(candidate, my_node.id + 1, successor - 1) then
495 simgrid.info("I'm updating my successor to " .. math.tointeger(candidate))
496 my_node.fingers[1] = math.tointeger(candidate)
498 -- If candidate is not my_node.id, then I should notify candidate that I'm here.
499 -- (So this node updates it's predecessor to me)
500 --remote_notify(candidate, my_node.id)
503 simgrid.info("Successor: " .. successor .. " and my id: " .. my_node.id)
504 -- If candidate is nil, this means that our successor has no predecessor.
505 -- So we should tell him about us...
506 -- TODO: I think a host that receives a message could automatically add
507 -- this other node as a predecessor if it doesn't have any... needs to
508 -- be implemented somewhere else, not here.
509 if candidate == nil and successor ~= my_node.id then
510 remote_notify(successor, my_node.id)
514 -- Notifies the current node that its predecessor may have changed
515 -- - candidate_predecessor: the possible new predecessor
516 function notify(candidate_predecessor)
517 if my_node.predecessor == nil or is_in_interval(candidate_predecessor,
518 my_node.predecessor + 1, my_node.id - 1) then
519 simgrid.info("Updated my predecessor to " .. candidate_predecessor)
520 my_node.predecessor = math.tointeger(candidate_predecessor)
524 -- Notifies a remote node that its predecessor my have changed.
526 -- - candidate the possible new predecessor
527 function remote_notify(notify_to, candidate_predecessor)
529 simgrid.info("Updating someone else's predecessor (id: " .. notify_to .. " predecessor to ".. candidate_predecessor .. ")")
530 local task = simgrid.task.new("", comp_size, comm_size)
532 task.request_id = candidate_predecessor
533 task:dsend(notify_to)
536 -- Refreshes the finger table of the current node,
537 -- one finger per call.
538 function fix_fingers()
540 local i = math.tointeger(my_node.next_finger_to_fix)
541 local id = find_successor(math.tointeger(my_node.id + 2^i))
542 simgrid.info("Called fix_fingers(). Next finger to fix: " .. i .. " and I will check " .. my_node.id + 2^i .. ". Request returned " .. id)
545 if id ~= my_node.fingers[i] then
546 my_node.fingers[i] = id
547 simgrid.info("fix_fingers: Updated finger " .. i .. " to " .. id)
549 simgrid.info("fix_fingers: id is " .. id)
551 my_node.next_finger_to_fix = (i % nb_bits) + 1
555 -- Checks whether the predecessor of the current node has failed.
556 function check_predecessor()
560 -- Performs a find successor request to an arbitrary id.
561 function random_lookup()
566 simgrid.platform(arg[1] or "../../msg/msg_platform.xml")
567 simgrid.application(arg[2] or "../../msg/chord/chord90.xml")