1 -- A SimGrid Lua implementation of the Chord DHT
3 -- Copyright (c) 2011-2012, 2014. The SimGrid Team.
4 -- All rights reserved.
6 -- This program is free software; you can redistribute it and/or modify it
7 -- under the terms of the license (GNU LGPL) which comes with this package.
16 max_simulation_time = 1000
18 fix_fingers_delay = 120
19 check_predecessor_delay = 120
22 -- current node (don't worry, globals are duplicated in each simulated process)
24 -- FIXME: my_id does not exist.
26 next_finger_to_fix = 1,
32 -- Main function of each Chord process
35 -- - the id of a guy I know in the system (except for the first node)
38 simgrid.debug("Hi! This is my first message; I just entered the program!")
39 -- TODO simplify the deployment file
42 my_node.id = math.tointeger(args[1])
43 simgrid.debug("My id is now " .. my_node.id)
45 known_id = math.tointeger(args[2])
46 simgrid.info("Updated my known_id to " .. known_id)
49 -- initialize the node and the fingertable;
50 -- at the beginning, this node only knows itself (we need to discover others)
51 for i = 1, nb_bits, 1 do
52 my_node.fingers[i] = my_node.id
55 -- Let's make sure we can receive messages!
56 my_node.comm_recv = simgrid.task.irecv(my_node.id)
59 local join_success = false
60 --simgrid.info(known_id)
62 if known_id == nil then
63 -- only the first node ("Jacqueline") will enter here
64 -- as configured in file ../../msg/chord/chord.xml
65 simgrid.debug("I'm the node that is in charge. Going to create everything.")
69 -- Communicate to the first node and join the ring
70 -- This will also initialize
71 -- my_node.predecessor and my_node.successor
72 join_success = join(known_id)
75 -- At this point, finger[1] does not necessarily actually point
76 -- to the *real* successor; it might be that the first node still
77 -- didn't notify us that another node joined with
78 -- an ID that satisfies my_id <= ID <= current_successorId
80 -- TODO Remove this, but make sure my_node.predecessor is initialized somewhere
81 --if my_node.id == 1 then
82 --my_node.predecessor = 1
88 local now = simgrid.get_clock()
89 local next_stabilize_date = now + stabilize_delay
90 local next_fix_fingers_date = now + fix_fingers_delay
91 local next_check_predecessor_date = now + check_predecessor_delay
92 local next_lookup_date = now + lookup_delay
96 simgrid.debug("I'm now entering the main loop.")
98 while now < max_simulation_time do
100 task, err = my_node.comm_recv:test()
101 simgrid.info(now .. " " .. next_stabilize_date .. " " .. now + stabilize_delay .. " " .. next_fix_fingers_date .. " " .. next_check_predecessor_date .. " " .. next_lookup_date)
104 -- I received a task: answer it
105 simgrid.info("I received a task of type '" .. task.type .."'! My id is " .. my_node.id)
106 my_node.comm_recv = simgrid.task.irecv(my_node.id)
109 -- the communication has failed: nevermind, we'll try again
110 simgrid.info("Error while receiving a task! My id is " .. my_node.id)
111 my_node.comm_recv = simgrid.task.irecv(my_node.id)
113 -- no task was received: do periodic calls
115 if now >= next_stabilize_date then
116 simgrid.debug("Stabilizing...")
118 simgrid.debug("Finished stabilizing!")
119 next_stabilize_date = simgrid.get_clock() + stabilize_delay
121 --elseif now >= next_fix_fingers_date then
123 --next_fix_fingers_date = simgrid.get_clock() + fix_fingers_delay
125 --elseif now >= next_check_predecessor_date then
126 --check_predecessor()
127 --next_check_predecessor_date = simgrid.get_clock() + check_predecessor_delay
129 --elseif now >= next_lookup_date then
131 --simgrid.debug("I'm now executing a lookup, as lookup_delay makes me do this. " .. simgrid.get_clock())
132 --next_lookup_date = simgrid.get_clock() + lookup_delay
135 ---- nothing to do: sleep for a while
136 simgrid.debug("Didn't have to stabilize, update my fingers, check my predecessors or do a random lookup; hence, I'm starting to sleep now...")
137 simgrid.process.sleep(5)
138 simgrid.debug("Slept for 5s")
141 now = simgrid.get_clock()
149 -- Makes the current node leave the ring
152 simgrid.info("Leaving the ring, because max_simulation_time was reached.")
153 -- TODO: notify others
156 -- This function is called when the current node receives a task
157 -- and can not immediately deal with it; for instance, if the host
158 -- waits on a response for a 'find successor' query but receives a
159 -- 'get predecessor' message instead; we cannot just discard this
160 -- message so we deal with it here.
162 -- - task: the task received
163 function handle_task(task)
165 simgrid.debug("Handling task in handle_task()")
166 local type = task.type
168 if type == "find successor" then
170 task.answer_to = math.tointeger(task.answer_to)
171 task.request_id = math.tointeger(task.request_id)
173 simgrid.info("Received a 'find successor' request from " .. string.format("%d", task.answer_to) ..
174 " for id " .. string.format("%d", task.request_id))
176 -- Is my successor have the right host? This can happen if there are holes
177 -- in the ring; for instance, if my id is 13 and my successor is 17 and
178 -- 14,15,16 don't exist but I'm asked for 15, then yes, 17 is the right
179 -- answer to the request.
181 -- Test: my_node.id + 1 <= task.request_id <= my_node.fingers[1]
183 -- TODO: Why the +1? We could receive this message from a host that forwarded
184 -- this message (and the original sender doesn't know us),
185 -- so why do we exclude ourselves?
186 if is_in_interval(task.request_id, my_node.id + 1, my_node.fingers[1]) then
188 simgrid.info("Sending back a 'find successor answer' to " ..
189 string.format("%d", task.answer_to) .. ": the successor of " .. string.format("%d", task.request_id) ..
190 " is " .. string.format("%d", my_node.fingers[1]))
192 task.type = "find successor answer"
193 -- TODO: Can we remove the "" here?
194 task.answer = math.tointeger(my_node.fingers[1])
195 simgrid.info("Answer" .. task.answer)
196 task:dsend(math.tointeger(task.answer_to))
198 -- forward the request to the closest preceding finger in my table
200 simgrid.info("Forwarding the 'find successor' request to my closest preceding finger")
201 task:dsend(closest_preceding_node(task.request_id))
204 elseif type == "get predecessor" then
205 simgrid.info("Received a 'find predecessor' request from " .. string.format("%d", task.answer_to) ..
206 " for id. Sending back an answer.")
208 task.type = "get predecessor answer"
210 --for i,v in pairs(my_node) do
211 --print(my_node.id, i, v)
213 --print(my_node.predecessor)
214 if my_node.predecessor ~= nil then
215 task.answer = math.tointeger(my_node.predecessor)
216 --print(my_node.predecessor)
218 -- FIXME: This is completely wrong here. Fix this;
219 -- we need to figure out what to send if we don't know our
221 simgrid.critical("Don't know my predecessor yet!")
222 my_node.predecessor = remote_get_predecessor(my_node.fingers[1])
223 task.answer = my_node.predecessor
226 --print("It will break now, since task.answer is nil here.")
227 --task.answer = my_node.predecessor
230 task:dsend(math.tointeger(task.answer_to))
231 --print("After dsend returned")
233 elseif type == "notify" then
234 -- someone is telling me that he may be my new predecessor
235 simgrid.info("Host id " .. task.request_id .. " wants to be my predecessor")
236 notify(math.tointeger(task.request_id))
238 elseif type == "predecessor leaving" then
240 simgrid.debug("predecessor leaving")
242 elseif type == "successor_leaving" then
243 -- TODO: We could / should use something like table.remove(my_node.fingers, 1) here
246 elseif type == "find successor answer" then
247 -- ignoring, this is handled in remote_find_successor
250 elseif type == "get predecessor answer" then
251 -- ignoring, this is already handled in
254 error("Unknown type of task received: " .. task.type)
257 simgrid.info("I'm leaving handle_task() now.")
260 -- Returns whether an id belongs to the interval [a, b[.
261 -- The parameters are normalized to make sure they are between 0 and nb_keys - 1.
262 -- 1 belongs to [62, 3].
263 -- 1 does not belong to [3, 62].
264 -- 63 belongs to [62, 3].
265 -- 63 does not belong to [3, 62].
266 -- 24 belongs to [21, 29].
267 -- 24 does not belong to [29, 21].
268 function is_in_interval(id, a, b)
269 -- normalize the parameters
270 -- TODO: Currently, nb_bits = 24; so a,b,id < 24! Really?
275 -- make sure a <= b and a <= id
287 -- Returns whether the current node is in the ring.
288 function has_joined()
290 return my_node.fingers[my_node.id] ~= nil
293 -- Creates a new Chord ring.
295 simgrid.debug("I've now initialized my predecessor and fingertable.")
296 --my_node.predecessor = my_node.id
297 my_node.predecessor = nil
298 my_node.fingers[1] = my_node.id
301 -- Attemps to join the Chord ring.
302 -- - known_id: id of a node already in the ring
303 -- - return value: true if the join was successful
304 function join(known_id)
306 simgrid.info("Joining the ring with id " .. my_node.id .. ", knowing node " .. known_id)
308 local successor = remote_find_successor(known_id, my_node.id)
309 simgrid.info("Returned from remote_find_successor; my successor is " .. successor)
310 if successor == nil then
311 simgrid.critical("Cannot join the ring.")
315 -- We don't know the predecessor yet, so we initialize it with NULL
316 my_node.predecessor = nil
317 my_node.fingers[1] = successor
319 -- Everything was successfull!
323 -- Returns the closest preceding finger of an id with respect to the finger
324 -- table of the current node.
325 -- - id: the id to find
326 -- - return value: the closest preceding finger of that id
327 function closest_preceding_node(id)
329 for i = nb_bits, 1, -1 do
330 if is_in_interval(my_node.fingers[i], my_node.id + 1, id - 1) then
331 -- finger i is the first one before id
332 simgrid.info("fix_fingers: The closest preceding node for " .. id .. " is finger " .. i .. " (node " .. my_node.fingers[i] .. ")")
333 return my_node.fingers[i]
338 -- Finds the successor of an id
339 -- id: the id to find
340 -- return value: the id of the successor, or nil if the request failed
341 function find_successor(id)
343 if is_in_interval(id, my_node.id + 1, my_node.fingers[1]) then
344 -- my successor is the successor
345 simgrid.info("Looking for successor of " .. id .. ", but I determined it's my own successor: " .. my_node.fingers[1])
346 return my_node.fingers[1]
348 -- ask to the closest preceding finger in my table
349 simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
350 local ask_to = closest_preceding_node(id)
351 simgrid.info("fix_fingers: Looking for successor of " .. id .. ", checking closest preceding node")
352 return remote_find_successor(ask_to, id)
357 -- Asks a remote node the successor of an id.
358 -- ask_to: id of a remote node to ask to
359 -- id: the id to find
360 -- return value: the id of the successor, or nil if the request failed
361 function remote_find_successor(ask_to, id)
363 local task = simgrid.task.new("", comp_size, comm_size)
364 task.type = "find successor"
365 task.request_id = id -- This is the id we want to find
366 task.answer_to = my_node.id -- This is where the answer needs to go
369 simgrid.info("Sending a 'find successor' request to " .. ask_to .. " for id " .. id .. " (timeout=".. timeout .. ")")
370 if task:send(ask_to, timeout) then
371 -- request successfully sent: wait for an answer
374 simgrid.info("New iteration in while loop of remote_find_successor(); I'm still waiting for a response!")
375 --print(task.request_id)
376 simgrid.info("Starting to wait for a message; timeout=" .. timeout)
377 task = my_node.comm_recv:wait(timeout)
378 simgrid.info("Finished to wait")
379 -- TODO Do we need this?
380 --for i,v in pairs(task) do
383 --simgrid.info("I will crash!")
384 --task.answer = math.tointeger(task.answer)
385 --simgrid.info("Ich denke task.type ist leer")
386 --simgrid.info("Before irecv: " .. my_node.id)
388 -- Even if the recv above failed (timeout occurred) -- we want to be
389 -- able to receive a message if it comes in, even without us explicitly
390 -- calling the recv() method.
391 my_node.comm_recv = simgrid.task.irecv(my_node.id)
394 -- failed to receive the answer
397 -- a task was received: is it the expected answer (i.e., the response to
398 -- our query and for the id we're interested in)
399 if task.type ~= "find successor answer" or task.request_id ~= id then
400 -- this is not our answer, but we still need to handle it.
401 simgrid.info("Wrong request of type " .. task.type .. " received, expected 'find successor answer'")
405 -- this is our answer
406 simgrid.info("Received the answer to my 'find successor' request for id " ..
407 id .. ": the successor is " .. task.answer)
409 -- TODO: Do we need math.tointeger here?
410 return math.tointeger(task.answer)
415 simgrid.info("Failed to send the 'find successor' request to " .. ask_to ..
419 -- This can never be reached, because if this host finds the successor, it
420 -- will return it right away!
421 simgrid.info("Whooops! I should never reach this statement, because I didn't find a successor!")
423 -- We need to return the successor here
426 -- Asks a remote node its predecessor.
427 -- ask_to: id of a remote node to ask to
428 -- return value: the id of its predecessor, or nil if the request failed
429 function remote_get_predecessor(ask_to)
431 local task = simgrid.task.new("", comp_size, comm_size)
432 task.type = "get predecessor"
433 task.answer_to = math.tointeger(my_node.id)
434 -- TODO c.heinrich: Remove this
435 --task.note = "Bla " .. ask_to .. " at time " .. simgrid.get_clock()
437 simgrid.info("Sending request for '" .. task.type .."' to id '" .. ask_to .. "'")
438 if task:send(ask_to, timeout) then
439 simgrid.info("Done sending the request to " .. ask_to)
440 -- request successfully sent: wait for an answer
441 -- We need to iterate here because we might receive other
442 -- messages too (but not the answer to the request we just sent);
443 -- hence, we loop here.
445 simgrid.info("Starting to wait. My id: " .. my_node.id)
446 task = my_node.comm_recv:wait(timeout)
447 simgrid.info("Finished to wait. My id: " .. my_node.id .. " ask_to is " .. ask_to)
448 my_node.comm_recv = simgrid.task.irecv(my_node.id)
451 -- failed to receive the answer
452 simgrid.info("Task not received - is null?")
455 -- a task was received: is it the expected answer?
456 if task.type ~= "get predecessor answer" then
457 -- this is not our answer
458 simgrid.info("Task is NOT 'get predecessor answer'")
461 -- this is our answer
462 -- FIXME make sure the message answers to this particular request
463 --simgrid.info(task.answer)
464 for i,v in pairs(task) do
465 print(my_node.id, i, v)
467 simgrid.info("Task is answer for predecessor! The answer is: ")
468 if (task.answer) then print("NIL!\n") else print("Not NIL\n") end
478 -- Checks the immediate successor of the current node.
481 local successor = my_node.fingers[1]
482 if successor ~= my_node.id then
483 simgrid.info("Getting remote predecessor from ".. successor)
484 candidate = remote_get_predecessor(successor)
485 simgrid.info("Received ".. candidate .. " as candidate")
487 candidate = my_node.predecessor
490 simgrid.info("Still stabilizing")
491 -- candidate might become my new successor
492 if candidate ~= nil and is_in_interval(candidate, my_node.id + 1, successor - 1) then
493 simgrid.info("I'm updating my successor to " .. math.tointeger(candidate))
494 my_node.fingers[1] = math.tointeger(candidate)
496 -- If candidate is not my_node.id, then I should notify candidate that I'm here.
497 -- (So this node updates it's predecessor to me)
498 --remote_notify(candidate, my_node.id)
501 simgrid.info("Successor: " .. successor .. " and my id: " .. my_node.id)
502 -- If candidate is nil, this means that our successor has no predecessor.
503 -- So we should tell him about us...
504 -- TODO: I think a host that receives a message could automatically add
505 -- this other node as a predecessor if it doesn't have any... needs to
506 -- be implemented somewhere else, not here.
507 if candidate == nil and successor ~= my_node.id then
508 remote_notify(successor, my_node.id)
512 -- Notifies the current node that its predecessor may have changed
513 -- - candidate_predecessor: the possible new predecessor
514 function notify(candidate_predecessor)
515 if my_node.predecessor == nil or is_in_interval(candidate_predecessor,
516 my_node.predecessor + 1, my_node.id - 1) then
517 simgrid.info("Updated my predecessor to " .. candidate_predecessor)
518 my_node.predecessor = math.tointeger(candidate_predecessor)
522 -- Notifies a remote node that its predecessor my have changed.
524 -- - candidate the possible new predecessor
525 function remote_notify(notify_to, candidate_predecessor)
527 simgrid.info("Updating someone else's predecessor (id: " .. notify_to .. " predecessor to ".. candidate_predecessor .. ")")
528 local task = simgrid.task.new("", comp_size, comm_size)
530 task.request_id = candidate_predecessor
531 task:dsend(notify_to)
534 -- Refreshes the finger table of the current node,
535 -- one finger per call.
536 function fix_fingers()
538 local i = math.tointeger(my_node.next_finger_to_fix)
539 local id = find_successor(math.tointeger(my_node.id + 2^i))
540 simgrid.info("Called fix_fingers(). Next finger to fix: " .. i .. " and I will check " .. my_node.id + 2^i .. ". Request returned " .. id)
543 if id ~= my_node.fingers[i] then
544 my_node.fingers[i] = id
545 simgrid.info("fix_fingers: Updated finger " .. i .. " to " .. id)
547 simgrid.info("fix_fingers: id is " .. id)
549 my_node.next_finger_to_fix = (i % nb_bits) + 1
553 -- Checks whether the predecessor of the current node has failed.
554 function check_predecessor()
558 -- Performs a find successor request to an arbitrary id.
559 function random_lookup()
564 simgrid.platform(arg[1] or "../../msg/msg_platform.xml")
565 simgrid.application(arg[2] or "../../msg/chord/chord90.xml")