1 # Copyright (c) 2006-2022. The SimGrid Team. All rights reserved.
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the license (GNU LGPL) which comes with this package.
6 # This example shows how to build a torus cluster with multi-core hosts.
8 # However, each leaf in the torus is a StarZone, composed of several CPUs
10 # Each actor runs in a specific CPU. One sender broadcasts a message to all receivers.
19 Send a msg for each host in its host list
22 def __init__(self, hosts, msg_size=int(1e6)):
24 self.msg_size = msg_size
26 # Actors that are created as object will execute their __call__ method.
27 # So, the following constitutes the main function of the Sender actor.
32 for host in self.hosts:
33 msg = "Hello, I'm alive and running on " + simgrid.this_actor.get_host().name
34 mbox = simgrid.Mailbox.by_name(host.name)
36 pending_comms.append(mbox.put_async(msg, self.msg_size))
38 simgrid.this_actor.info("Done dispatching all messages")
40 # Now that all message exchanges were initiated, wait for their completion in one single call
41 simgrid.Comm.wait_all(pending_comms)
43 simgrid.this_actor.info("Goodbye now!")
48 Receiver actor: wait for 1 message on the mailbox identified by the hostname
52 mbox = simgrid.Mailbox.by_name(simgrid.this_actor.get_host().name)
54 simgrid.this_actor.info("I got a '%s'." % received)
56 #####################################################################################################
59 def create_hostzone(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> typing.Tuple[simgrid.NetPoint, simgrid.NetPoint]:
61 Callback to set a cluster leaf/element
63 In our example, each leaf if a StarZone, composed of 8 CPUs.
64 Each CPU is modeled as a host, connected to the outer world through a high-speed PCI link.
65 Obs.: CPU0 is the gateway for this zone
73 / / \ \<-- 100Gbs, 10us link (1 link UP and 1 link DOWN for full-duplex)
78 :param zone: Cluster netzone being created (usefull to create the hosts/links inside it)
79 :param coord: Coordinates in the cluster
80 :param ident: Internal identifier in the torus (for information)
81 :return netpoint, gateway: the netpoint to the StarZone and CPU0 as gateway
83 num_cpus = 8 # Number of CPUs in the zone
84 speed = "1Gf" # Speed of each CPU
85 link_bw = "100GBps" # Link bw connecting the CPU
86 link_lat = "1ns" # Link latency
88 hostname = "host" + str(ident)
90 host_zone = simgrid.NetZone.create_star_zone(hostname)
91 # setting my Torus parent zone
92 host_zone.set_parent(zone)
96 for i in range(num_cpus):
97 cpu_name = hostname + "-cpu" + str(i)
98 host = host_zone.create_host(cpu_name, speed).seal()
99 # the first CPU is the gateway
102 # create split-duplex link
103 link = host_zone.create_split_duplex_link("link-" + cpu_name, link_bw)
104 link.set_latency(link_lat).seal()
105 # connecting CPU to outer world
106 host_zone.add_route(host.netpoint, None, None, None, [
107 simgrid.LinkInRoute(link, simgrid.LinkInRoute.Direction.UP)], True)
109 # seal newly created netzone
111 return host_zone.netpoint, gateway.netpoint
113 #####################################################################################################
116 def create_limiter(zone: simgrid.NetZone, coord: typing.List[int], ident: int) -> simgrid.Link:
118 Callback to create limiter link (1Gbs) for each netpoint
120 The coord parameter depends on the cluster being created:
121 - Torus: Direct translation of the Torus' dimensions, e.g. (0, 0, 0) for a 3-D Torus
122 - Fat-Tree: A pair (level in the tree, ident), e.g. (0, 0) for first leaf in the tree and (1,0) for the first switch at
124 - Dragonfly: a tuple (group, chassis, blades/routers, nodes), e.g. (0, 0, 0, 0) for first node in the cluster. To
125 identify the router inside a (group, chassis, blade), we use MAX_UINT in the last parameter (e.g. 0, 0, 0,
128 :param zone: Torus netzone being created (usefull to create the hosts/links inside it)
129 :param coord: Coordinates in the cluster
130 :param ident: Internal identifier in the torus (for information)
131 :return: Limiter link
133 return zone.create_link("limiter-" + str(ident), [1e9]).seal()
136 def create_torus_cluster():
138 Creates a TORUS cluster
140 Creates a TORUS cluster with dimensions 2x2x2
142 The cluster has 8 elements/leaves in total. Each element is a StarZone containing 8 Hosts.
143 Each pair in the torus is connected through 2 links:
144 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
145 2) link: 10Gbs link connecting the components (created automatically)
154 B-----+----C (X-axis=2)
156 For example, a communication from A to C goes through:
157 <tt> A->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C </tt>
159 More precisely, considering that A and C are StarZones, a
160 communication from A-CPU-3 to C-CPU-7 goes through:
161 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
162 2) A-CPU-0->limiter(A)->link(A-B)->limiter(B)->link(B-C)->limiter(C)->C-CPU-0
163 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
165 Note that we don't have limiter links inside the StarZones(A, B, C),
166 but we have limiters in the Torus that are added to the links in the path (as we can see in "2)")
168 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html?highlight=torus#torus-cluster">Torus
171 # create the torus cluster, 10Gbs link between elements in the cluster
172 simgrid.NetZone.create_torus_zone("cluster", None, [2, 2, 2], simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9, 10e-6,
173 simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
175 #####################################################################################################
178 def create_fat_tree_cluster():
180 Creates a Fat-Tree cluster
182 Creates a Fat-Tree cluster with 2 levels and 6 nodes
183 The following parameters are used to create this cluster:
184 - Levels: 2 - two-level of switches in the cluster
185 - Down links: 2, 3 - L2 routers is connected to 2 elements, L1 routers to 3 elements
186 - Up links: 1, 2 - Each node (A-F) is connected to 1 L1 router, L1 routers are connected to 2 L2
187 - Link count: 1, 1 - Use 1 link in each level
189 The first parameter describes how many levels we have.
190 The following ones describe the connection between the elements and must have exactly n_levels components.
193 S3 S4 <-- Level 2 routers
194 link:limiter - / \ / \
196 link: 10GBps --> | / \ |
197 (full-duplex) | / \ |
200 S1 S2 <-- Level 1 routers
203 link:10GBps --> /|\ /|\
206 link:limiter -> / | \ / | \
207 A B C D E F <-- level 0 Nodes
209 Each element (A to F) is a StarZone containing 8 Hosts.
210 The connection uses 2 links:
211 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
212 2) link: 10Gbs link connecting the components (created automatically)
214 For example, a communication from A to C goes through:
215 <tt> A->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->->limiter(C)->C</tt>
217 More precisely, considering that A and C are StarZones, a
218 communication from A-CPU-3 to C-CPU-7 goes through:
219 1) StarZone A: A-CPU-3 -> link-up-A-CPU-3 -> A-CPU-0
220 2) A-CPU-0->limiter(A)->link(A-S1)->limiter(S1)->link(S1-C)->limiter(C)->C-CPU-0
221 3) StarZone C: C-CPU-0-> link-down-C-CPU-7 -> C-CPU-7
223 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#fat-tree-cluster">Fat-Tree
226 # create the fat tree cluster, 10Gbs link between elements in the cluster
227 simgrid.NetZone.create_fatTree_zone("cluster", None, simgrid.FatTreeParams(2, [2, 3], [1, 2], [1, 1]), simgrid.ClusterCallbacks(create_hostzone, None, create_limiter), 10e9,
228 10e-6, simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
230 #####################################################################################################
233 def create_dragonfly_cluster():
235 Creates a Dragonfly cluster
237 Creates a Dragonfly cluster with 2 groups and 16 nodes
238 The following parameters are used to create this cluster:
239 - Groups: 2 groups, connected with 2 links (blue links)
240 - Chassis: 2 chassis, connected with a single link (black links)
241 - Routers: 2 routers, connected with 2 links (green links)
242 - Nodes: 2 leaves per router, single link
244 The diagram below illustrates a group in the dragonfly cluster
246 +------------------------------------------------+
248 | +------------------------+ |
249 | +---|--------------+ +---|--------------+ |
250 | | | green | | | green | |
251 | | | links (2) | | | links (2) | | blue links(2)
252 | | R1 ====== R2 | | R3 ----- R4 ======================> "Group 2"
253 | | / \ / \ | | / \ / \ | |
254 | | A B C D | | E F G H | |
255 | +------------------+ +------------------+ |
256 | Chassis 1 Chassis 2 |
257 +------------------------------------------------+
260 Each element (A, B, C, etc) is a StarZone containing 8 Hosts.
261 The connection between elements (e.g. A->R1) uses 2 links:
262 1) limiter: a 1Gbs limiter link (set by user through the set_limiter callback)
263 2) link: 10Gbs link connecting the components (created automatically)
265 For example, a communication from A to C goes through:
266 <tt> A->limiter(A)->link(A-R1)->limiter(R1)->link(R1-R2)->limiter(R2)->link(R2-C)limiter(C)->C</tt>
268 More details in: <a href="https://simgrid.org/doc/latest/Platform_examples.html#dragonfly-cluster">Dragonfly
271 # create the dragonfly cluster, 10Gbs link between elements in the cluster
272 simgrid.NetZone.create_dragonfly_zone("cluster", None, simgrid.DragonflyParams([2, 2], [2, 1], [2, 2], 2), simgrid.ClusterCallbacks(
273 create_hostzone, None, create_limiter), 10e9, 10e-6, simgrid.Link.SharingPolicy.SPLITDUPLEX).seal()
275 ###################################################################################################
278 if __name__ == '__main__':
279 e = simgrid.Engine(sys.argv)
280 platform = sys.argv[1]
283 if platform == "torus":
284 create_torus_cluster()
285 elif platform == "fatTree":
286 create_fat_tree_cluster()
287 elif platform == "dragonfly":
288 create_dragonfly_cluster()
290 sys.exit("invalid param")
292 host_list = e.all_hosts
293 # create the sender actor running on first host
294 simgrid.Actor.create("sender", host_list[0], Sender(host_list))
295 # create receiver in every host
296 for host in host_list:
297 simgrid.Actor.create("receiver-" + host.name, host, Receiver())
299 # runs the simulation