| rfc8775xml2.original.xml | rfc8775.xml | |||
|---|---|---|---|---|
| <?xml version='1.0'?> | <?xml version="1.0" encoding="UTF-8"?> | |||
| <!DOCTYPE rfc SYSTEM 'rfc2629.dtd' [ | <!DOCTYPE rfc SYSTEM "rfc2629-xhtml.ent"> | |||
| ]> | ||||
| <?rfc toc="yes"?> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="std" | |||
| <?rfc tocompact="no"?> | consensus="true" docName="draft-ietf-pim-drlb-15" number="8775" | |||
| <?rfc tocdepth="6"?> | ipr="trust200902" obsoletes="" updates="" submissionType="IETF" | |||
| <?rfc symrefs="yes"?> | xml:lang="en" tocInclude="true" tocDepth="6" symRefs="true" | |||
| <?rfc sortrefs="yes"?> | sortRefs="true" version="3"> | |||
| <?rfc compact="yes"?> | ||||
| <?rfc subcompact="no"?> | <!-- xml2rfc v2v3 conversion 2.39.0 --> | |||
| <?rfc strict="yes" ?> | ||||
| <rfc category="std" docName="draft-ietf-pim-drlb-15" | ||||
| ipr="trust200902"> | ||||
| <!-- ***** FRONT MATTER ***** --> | <!-- ***** FRONT MATTER ***** --> | |||
| <front> | <front> | |||
| <title abbrev="PIM Designated Router Load Balancing">PIM Designated Router | <title abbrev="PIM Designated Router Load Balancing">PIM Designated Router | |||
| Load Balancing</title> | Load Balancing</title> | |||
| <seriesInfo name="RFC" value="8775"/> | ||||
| <author fullname="Yiqun Cai" initials="Y" surname="Cai"> | <author fullname="Yiqun Cai" initials="Y" surname="Cai"> | |||
| <organization>Alibaba Group</organization> | <organization>Alibaba Group</organization> | |||
| <address> | <address> | |||
| <postal> | ||||
| <street>520 Almanor Avenue</street> | ||||
| <city>Sunnyvale</city><region>CA</region> | ||||
| <code>94085</code> | ||||
| <country>United States of America</country> | ||||
| </postal> | ||||
| <email>yiqun.cai@alibaba-inc.com</email> | <email>yiqun.cai@alibaba-inc.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials="H" surname="Ou" fullname="Heidi Ou"> | <author initials="H" surname="Ou" fullname="Heidi Ou"> | |||
| <organization>Alibaba Group</organization> | <organization>Alibaba Group</organization> | |||
| <address> | <address> | |||
| <email>heidi.ou@alibaba-inc.com</email> | <postal> | |||
| <street>520 Almanor Avenue</street> | ||||
| <city>Sunnyvale</city><region>CA</region> | ||||
| <code>94085</code> | ||||
| <country>United States of America</country> | ||||
| </postal> | ||||
| <email>heidi.ou@alibaba-inc.com</email> | ||||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials="S" surname="Vallepalli" fullname="Sri Vallepalli"> | <author initials="S" surname="Vallepalli" fullname="Sri Vallepalli"> | |||
| <organization>Cisco Systems, Inc.</organization> | ||||
| <address> | <address> | |||
| <postal> | <email>vallepal@yahoo.com</email> | |||
| <street>3625 Cisco Way</street> | ||||
| <city>San Jose</city> | ||||
| <code>CA 95134</code> | ||||
| <country>USA</country> | ||||
| </postal> | ||||
| <email>svallepa@cisco.com</email> | ||||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials="M" surname="Mishra" fullname="Mankamana Mishra"> | <author initials="M" surname="Mishra" fullname="Mankamana Mishra"> | |||
| <organization>Cisco Systems, Inc.</organization> | <organization>Cisco Systems, Inc.</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>821 Alder Drive,</street> | <street>821 Alder Drive,</street> | |||
| <city>Milpitas</city> | <city>Milpitas</city> | |||
| <code>CA 95035</code> | <region>CA</region> | |||
| <country>USA</country> | <code>95035</code> | |||
| </postal> | <country>United States of America</country> | |||
| <email>mankamis@cisco.com</email> | </postal> | |||
| <email>mankamis@cisco.com</email> | ||||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials="S" surname="Venaas" fullname="Stig Venaas"> | <author initials="S" surname="Venaas" fullname="Stig Venaas"> | |||
| <organization>Cisco Systems, Inc.</organization> | <organization>Cisco Systems, Inc.</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>Tasman Drive</street> | <street>Tasman Drive</street> | |||
| <city>San Jose</city> | <city>San Jose</city> | |||
| <code>CA 95134</code> | <region>CA</region> | |||
| <country>USA</country> | <code>95134</code> | |||
| <country>United States of America</country> | ||||
| </postal> | </postal> | |||
| <email>stig@cisco.com</email> | <email>stig@cisco.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials="A" surname="Green" fullname="Andy Green"> | <author initials="A" surname="Green" fullname="Andy Green"> | |||
| <organization>British Telecom</organization> | <organization>British Telecom</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>Adastral Park</street> | <street>Adastral Park</street> | |||
| <city>Ipswich</city> | <city>Ipswich</city> | |||
| <code>IP5 2RE</code> | <code>IP5 2RE</code> | |||
| <country>United Kingdom</country> | <country>United Kingdom</country> | |||
| </postal> | </postal> | |||
| <email>andy.da.green@bt.com</email> | <email>andy.da.green@bt.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <date year="2020" month="April" /> | ||||
| <date/> | ||||
| <area>Routing</area> | <area>Routing</area> | |||
| <keyword>Multicast</keyword> | <keyword>Multicast</keyword> | |||
| <abstract> | <abstract> | |||
| <t>On a multi-access network, one of the PIM-SM (PIM Sparse Mode) | <t>On a multi-access network, one of the PIM-SM (PIM Sparse Mode) | |||
| routers is elected as a | routers is elected as a | |||
| Designated Router. One of the responsibilities of the Designated Router | Designated Router. One of the responsibilities of the Designated Router | |||
| is to track local multicast listeners and forward data to these | is to track local multicast listeners and forward data to these | |||
| listeners if the group is operating in PIM-SM. This | listeners if the group is operating in PIM-SM. This | |||
| document specifies a modification to the PIM-SM protocol that | document specifies a modification to the PIM-SM protocol that | |||
| allows more than one of the PIM-SM routers to take on this responsibility | allows more than one of the PIM-SM routers to take on this responsibility | |||
| so that the forwarding load can be distributed among multiple routers. | so that the forwarding load can be distributed among multiple routers. | |||
| </t> | </t> | |||
| </abstract> | </abstract> | |||
| </front> | </front> | |||
| <!-- ***** MIDDLE MATTER ***** --> | <!-- ***** MIDDLE MATTER ***** --> | |||
| <middle> | <middle> | |||
| <section title="Introduction"> | <section numbered="true" toc="default"> | |||
| <t>On a multi-access LAN, such as an Ethernet, with one or more PIM-SM | <name>Introduction</name> | |||
| (PIM Sparse Mode) <xref target="RFC7761"/> routers, one of the PIM-SM | <t>On a multi-access LAN (such as an Ethernet) with one or more PIM-SM | |||
| (PIM Sparse Mode) <xref target="RFC7761" format="default"/> routers, one | ||||
| of the PIM-SM | ||||
| routers is elected as a Designated Router (DR). The PIM DR has two | routers is elected as a Designated Router (DR). The PIM DR has two | |||
| responsibilities in the PIM-SM protocol. For any active sources on a LAN, | responsibilities in the PIM-SM protocol. For any active sources on a LAN, | |||
| the PIM DR is responsible for registering with the Rendezvous Point (RP) | the PIM DR is responsible for registering with the Rendezvous Point (RP) | |||
| if the group is operating in PIM-SM. Also, the PIM DR is responsible for | if the group is operating in PIM-SM. Also, the PIM DR is responsible for | |||
| tracking local multicast listeners and forwarding to these listeners if | tracking local multicast listeners and forwarding data to these | |||
| the group is operating in PIM-SM. | listeners if the group is operating in PIM-SM. | |||
| </t> | ||||
| <t>Consider the following LAN in Figure 1: | ||||
| </t> | </t> | |||
| <figure > | <t>Consider the following LAN in <xref target="LAN-REC" | |||
| <preamble/> | format="default"/>:</t> | |||
| <artwork ><![CDATA[ | <figure anchor="LAN-REC"> | |||
| <name>LAN with Receivers</name> | ||||
| <artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
| (core networks) | (core networks) | |||
| | | | | | | | | |||
| | | | | | | | | |||
| R1 R2 R3 | R1 R2 R3 | |||
| | | | | | | | | |||
| ----(LAN)---- | ----(LAN)---- | |||
| | | | | |||
| | | | | |||
| (many receivers) | (many receivers) | |||
| ]]></artwork> | ||||
| Figure 1: LAN with receivers | </figure> | |||
| ]]></artwork> | ||||
| <postamble></postamble> | ||||
| </figure> | ||||
| <t>Assume R1 is elected as the DR. According to the | <t>Assume R1 is elected as the DR. According to the | |||
| PIM-SM protocol, R1 will be responsible for forwarding traffic | PIM-SM protocol, R1 will be responsible for forwarding traffic | |||
| to that LAN on behalf of all local members. In addition to keeping | to that LAN on behalf of all local members. In addition to keeping | |||
| track of membership reports, R1 is also responsible for | track of membership reports, R1 is also responsible for | |||
| initiating the creation of source and/or shared trees towards the | initiating the creation of source and/or shared trees towards the | |||
| senders or the RPs. The membership reports would be IGMP or MLD | senders or the RPs. The membership reports would be IGMP or Multicast | |||
| Listener Discovery (MLD) | ||||
| messages. This applies to any versions of the IGMP and MLD protocols. | messages. This applies to any versions of the IGMP and MLD protocols. | |||
| The most recent versions are IGMPv3 <xref target="RFC3376"/> and | The most recent versions are IGMPv3 <xref target="RFC3376" format="default | |||
| MLDv2 <xref target="RFC3810"/>. | "/> and | |||
| MLDv2 <xref target="RFC3810" format="default"/>. | ||||
| </t> | </t> | |||
| <t>Having a single router acting as DR and being responsible for | ||||
| <t>Having a single router acting as DR and being responsible for data | data-plane forwarding leads to several issues. One of the issues is | |||
| plane forwarding leads to several issues. One of the issues is that the | that the | |||
| aggregated bandwidth will be limited to what R1 can handle with | aggregated bandwidth will be limited to what R1 can handle with | |||
| regards to capacity of incoming links, the interface on the LAN, | regards to capacity of incoming links, the interface on the LAN, | |||
| and total forwarding capacity. It is very common that a LAN consists of | and total forwarding capacity. It is very common that a LAN consists of | |||
| switches that run IGMP/MLD or PIM snooping <xref target="RFC4541"/>. | switches that run IGMP/MLD or PIM snooping <xref target="RFC4541" | |||
| format="default"/>. | ||||
| This allows the forwarding of multicast packets to be | This allows the forwarding of multicast packets to be | |||
| restricted only to segments leading to receivers that have indicated | restricted only to segments leading to receivers that have indicated | |||
| their interest in multicast groups using either IGMP or MLD. The | their interest in multicast groups using either IGMP or MLD. The | |||
| emergence of the switched Ethernet allows the aggregated bandwidth to | emergence of the switched Ethernet allows the aggregated bandwidth to | |||
| exceed, sometimes by a large number, that of a single link. For | exceed, sometimes by a large number, that of a single link. For | |||
| example, let us modify Figure 1 and introduce an Ethernet switch in | example, let us modify <xref target="LAN-REC" format="default"/> and | |||
| Figure 2. | introduce an Ethernet switch in <xref target="LAN-SWITCH" | |||
| format="default"/>. | ||||
| </t> | </t> | |||
| <figure> | <figure anchor="LAN-SWITCH"> | |||
| <preamble/> | <name>LAN with Ethernet Switch</name> | |||
| <artwork> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
| <![CDATA[ | ||||
| (core networks) | (core networks) | |||
| | | | | | | | | |||
| | | | | | | | | |||
| R1 R2 R3 | R1 R2 R3 | |||
| | | | | | | | | |||
| +=gi1===gi2===gi3=+ | +=gi1===gi2===gi3=+ | |||
| + + | + + | |||
| + switch + | + switch + | |||
| + + | + + | |||
| +=gi4===gi5===gi6=+ | +=gi4===gi5===gi6=+ | |||
| | | | | | | | | |||
| H1 H2 H3 | H1 H2 H3 | |||
| ]]></artwork> | ||||
| Figure 2: LAN with Ethernet Switch | ||||
| ]]> | ||||
| </artwork> | ||||
| <postamble></postamble> | ||||
| </figure> | </figure> | |||
| <t>Let us assume that each individual link is a Gigabit Ethernet. Each | <t>Let us assume that each individual link is a Gigabit Ethernet. Each | |||
| router, R1, R2 and R3, and the switch have enough forwarding capacity | router (R1, R2, and R3) and the switch have enough forwarding capacity | |||
| to handle hundreds of Gigabits of data. | to handle hundreds of gigabits of data. | |||
| </t> | </t> | |||
| <t>Let us further assume that each of the hosts requests 500 Mbps of | <t>Let us further assume that each of the hosts requests 500 Mbps of | |||
| unique multicast data. This totals to 1.5 Gbps of data, which is less | unique multicast data. This totals to 1.5 Gbps of data, which is less | |||
| than what each switch or the combined uplink bandwidth across the | than what each switch or the combined uplink bandwidth across the | |||
| routers can handle, even under failure of a single router. | routers can handle, even under failure of a single router. | |||
| </t> | </t> | |||
| <t> On the other hand, the link between R1 and switch, via port gi1, can | <t> On the other hand, the link between R1 and switch, via port gi1, can | |||
| only handle a throughput of 1Gbps. And if R1 is the only DR (the | only handle a throughput of 1 Gbps. And if R1 is the only DR (the | |||
| PIM DR elected using the procedure defined by <xref target="RFC7761"/>) | PIM DR elected using the procedure defined by <xref target="RFC7761" | |||
| format="default"/>), | ||||
| at least 500 Mbps worth of data will be lost because the only link that | at least 500 Mbps worth of data will be lost because the only link that | |||
| can be used to draw the traffic from the routers to the switch is via | can be used to draw the traffic from the routers to the switch is via | |||
| gi1. In other words, the entire network's throughput is limited by the | gi1. In other words, the entire network's throughput is limited by the | |||
| single connection between the PIM DR and the switch (or LAN as in | single connection between the PIM DR and the switch (or LAN, as in | |||
| Figure 1). | <xref target="LAN-REC" format="default"/>). | |||
| </t> | </t> | |||
| <t>Another important issue is related to failover. If R1 is the only | <t>Another important issue is related to failover. If R1 is the only | |||
| forwarder on a shared LAN, when R1 | forwarder on a shared LAN, when R1 | |||
| goes out of service, multicast forwarding for the entire LAN has | goes out of service, multicast forwarding for the entire LAN has | |||
| to be rebuilt by the newly elected PIM DR. However, if there were a | to be rebuilt by the newly elected PIM DR. However, if there were a | |||
| way that allowed multiple routers to forward to the LAN for | way that allowed multiple routers to forward to the LAN for | |||
| different groups, failure of one of the routers would only lead to | different groups, failure of one of the routers would only lead to | |||
| disruption to a subset of the flows, therefore improving the overall | disruption to a subset of the flows, therefore improving the overall | |||
| resilience of the network. | resilience of the network. | |||
| </t> | </t> | |||
| <t>This document specifies a modification to the PIM-SM protocol | <t>This document specifies a modification to the PIM-SM protocol | |||
| that allows more than one of these routers, called Group Designated | that allows more than one of these routers, called Group Designated | |||
| Routers (GDR) to be selected so that the forwarding load can be | Routers (GDRs), to be selected so that the forwarding load can be | |||
| distributed among a number of routers. | distributed among a number of routers. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="Terminology"> | <name>Terminology</name> | |||
| <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL | <t> | |||
| NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
| "MAY", and "OPTIONAL" in this document are to be interpreted as | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL | |||
| described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> | NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", | |||
| when, and only when, they appear in all capitals, as shown here. | "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | |||
| </t> | "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are | |||
| to be interpreted as | ||||
| described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> | ||||
| when, and only when, they appear in all capitals, as shown here. | ||||
| </t> | ||||
| <t>With respect to PIM-SM, this document follows the terminology that | <t>With respect to PIM-SM, this document follows the terminology that | |||
| has been defined in <xref target="RFC7761"/>. | has been defined in <xref target="RFC7761" format="default"/>. | |||
| </t> | </t> | |||
| <t> This document also introduces the following new acronyms: | <t> This document also introduces the following new acronyms: | |||
| </t> | </t> | |||
| <t> | <dl newline="false" spacing="normal"> | |||
| <list style="symbols"> | <dt> GDR: Group Designated Router.</dt> | |||
| <t> GDR: Group Designated Router. For each multicast | <dd>For each multicast | |||
| flow, either a (*,G) for Any-Source Multicast (ASM), or an (S,G) | flow, either a (*,G) for Any-Source Multicast (ASM) or an (S,G) | |||
| for Source-Specific Multicast (SSM) <xref target="RFC4607"/>, | for Source-Specific Multicast (SSM) <xref target="RFC4607" | |||
| a Hash Algorithm (described below) is used to select one of the | format="default"/>, | |||
| a hash algorithm (described below) is used to select one of the | ||||
| routers as a GDR. The GDR is responsible for initiating the | routers as a GDR. The GDR is responsible for initiating the | |||
| forwarding tree building process for the corresponding multicast | forwarding tree building process for the corresponding multicast | |||
| flow. | flow. | |||
| </t> | </dd> | |||
| <t>GDR Candidate: a router that has the potential to | <dt>GDR Candidate:</dt> | |||
| <dd>a router that has the potential to | ||||
| become a GDR. There might be multiple GDR Candidates on a LAN, | become a GDR. There might be multiple GDR Candidates on a LAN, | |||
| but only one can become the GDR for a specific multicast flow. | but only one can become the GDR for a specific multicast flow. | |||
| </t> | </dd> | |||
| </list> | </dl> | |||
| </t> | ||||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="Applicability"> | <name>Applicability</name> | |||
| <t>The extension specified in this document applies to | <t>The extension specified in this document applies to | |||
| PIM-SM routers acting as last hop routers (there are directly connected | PIM-SM routers acting as last-hop routers (there are directly connected | |||
| receivers). It does not alter the behavior of a PIM DR, or any other | receivers). It does not alter the behavior of a PIM DR or any other | |||
| routers, on the first hop network (directly connected sources). | routers on the first-hop network (directly connected sources). | |||
| This is because the source tree is built using the IP address of the | This is because the source tree is built using the IP address of the | |||
| sender, not the IP address of the PIM DR that sends PIM registers | sender, not the IP address of the PIM DR that sends PIM registers | |||
| towards the RP. The load balancing between first hop routers can be | towards the RP. The load balancing between first-hop routers can be | |||
| achieved naturally if an IGP provides equal cost multiple paths | achieved naturally if an IGP provides equal cost multiple paths | |||
| (which it usually does in practice). Also distributing the load to do | (which it usually does in practice). Also, distributing the load to do | |||
| source registration does not justify the additional complexity required | source registration does not justify the additional complexity required | |||
| to support it. | to support it. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="Functional Overview"> | <name>Functional Overview</name> | |||
| <t>In the PIM DR election as defined in <xref target="RFC7761"/>, when | <t>In the PIM DR election as defined in <xref target="RFC7761" | |||
| format="default"/>, when | ||||
| multiple routers are connected to a multi-access LAN (for | multiple routers are connected to a multi-access LAN (for | |||
| example, an Ethernet), one of them is elected to act as PIM DR. The | example, an Ethernet), one of them is elected to act as PIM DR. The | |||
| PIM DR is responsible for sending local Join/Prune messages towards the | PIM DR is responsible for sending local Join/Prune messages towards the | |||
| RP or source. In order to elect the PIM DR, each PIM router on the LAN | RP or source. In order to elect the PIM DR, each PIM router on the LAN | |||
| examines the received PIM Hello messages and compares its own DR | examines the received PIM Hello messages and compares its own DR | |||
| priority and IP address with those of its neighbors. The router with | priority and IP address with those of its neighbors. The router with | |||
| the highest DR priority is the PIM DR. If there are multiple such | the highest DR priority is the PIM DR. If there are multiple such | |||
| routers, their IP addresses are used as the tie-breaker, as described | routers, their IP addresses are used as the tiebreaker, as described | |||
| in <xref target="RFC7761"/>. | in <xref target="RFC7761" format="default"/>. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| In order to share forwarding load among last hop routers, besides the | In order to share forwarding load among last-hop routers, besides the | |||
| normal PIM DR election, one or more GDRs are elected on the | normal PIM DR election, one or more GDRs are elected on the | |||
| multi-access LAN. There is only one PIM DR on the multi-access | multi-access LAN. There is only one PIM DR on the multi-access | |||
| LAN, but there might be multiple GDR Candidates. | LAN, but there might be multiple GDR Candidates. | |||
| </t> | </t> | |||
| <t>For each multicast flow, that is, (*,G) for ASM and (S,G) for SSM, | <t>For each multicast flow, that is, (*,G) for ASM and (S,G) for SSM, | |||
| a Hash Algorithm [<xref target="maskalgo"/>] is used to select one of | a hash algorithm (<xref target="maskalgo" format="default"/>) is used to | |||
| the routers to be the GDR. | select one of the routers to be the GDR. | |||
| The new DR Load Balancing Capability (DRLB-Cap) PIM Hello Option is | The new DR Load-Balancing Capability (DRLB-Cap) PIM Hello Option is | |||
| used to announce the Capability as well as the Hash Algorithm type. | used to announce the Capability, as well as the hash algorithm type. | |||
| Routers with the new DRLB-Cap Option advertised in their PIM Hello, | Routers with the new DRLB-Cap Option advertised in their PIM Hello, | |||
| using the same GDR election Hash Algorithm and the same DR priority as | using the same GDR election hash algorithm and the same DR priority as | |||
| the PIM DR, are considered as GDR Candidates. | the PIM DR, are considered as GDR Candidates. | |||
| </t> | </t> | |||
| <t>Hash Masks are defined for Source, Group and RP separately, in | <t>Hash masks are defined for Source, Group, and RP, separately, in | |||
| order to handle PIM ASM/SSM. The masks, as well as a sorted list of | order to handle PIM ASM/SSM. The masks, as well as a sorted list of GDR | |||
| GDR Candidate Addresses, are announced by the DR in a new DR Load | Candidate addresses, are announced by the DR in a new DR Load-Balancing | |||
| Balancing List (DRLB-List) PIM Hello Option. | List (DRLB-List) PIM Hello Option. | |||
| </t> | </t> | |||
| <t>A Hash Algorithm based on the announced Source, Group, or RP masks | <t>A hash algorithm based on the announced Source, Group, or RP masks | |||
| allows one GDR to be assigned to a corresponding multicast state. | allows one GDR to be assigned to a corresponding multicast state. | |||
| That GDR is responsible for initiating the creation of the | That GDR is responsible for initiating the creation of the | |||
| multicast forwarding tree for multicast traffic. | multicast forwarding tree for multicast traffic. | |||
| </t> | </t> | |||
| <section title="GDR Candidates"> | <section numbered="true" toc="default"> | |||
| <name>GDR Candidates</name> | ||||
| <t>GDR is the new concept introduced by this specification. GDR | <t>GDR is the new concept introduced by this specification. GDR | |||
| Candidates are routers eligible for GDR election on the LAN. To | Candidates are routers eligible for GDR election on the LAN. To | |||
| become a GDR Candidate, a router must have the same DR priority and | become a GDR Candidate, a router must have the same DR priority and | |||
| run the same GDR election Hash Algorithm as the DR on the LAN. | run the same GDR election hash algorithm as the DR on the LAN. | |||
| </t> | </t> | |||
| <t>For example, assume there are 4 routers on the LAN: R1, R2, R3 and | <t>For example, assume there are 4 routers on the LAN: R1, R2, R3, and | |||
| R4, each announcing a DRLB-Cap option. R1, R2 and R3 have the same | R4, each announcing a DRLB-Cap Option. R1, R2, and R3 have the same | |||
| DR priority while R4's DR priority is less preferred. | DR priority, while R4's DR priority is less preferred. | |||
| In this example, R4 will not be eligible for GDR election, because R4 | In this example, R4 will not be eligible for GDR election, because R4 | |||
| will not become a PIM DR unless all of R1, R2 and R3 go out of | will not become a PIM DR unless all of R1, R2, and R3 go out of | |||
| service. | service. | |||
| </t> | </t> | |||
| <t>Furthermore, assume router R1 wins the PIM DR election, R1 and R2 | <t>Furthermore, assume router R1 wins the PIM DR election, R1 and R2 | |||
| advertise the same Hash Algorithm for GDR election, while R3 advertises | advertise the same hash algorithm for GDR election, while R3 advertises | |||
| a different one. In this case, only R1 and R2 will be eligible for GDR | a different one. In this case, only R1 and R2 will be eligible for GDR | |||
| election, while R3 will not. | election, while R3 will not. | |||
| </t> | </t> | |||
| <t>As a DR, R1 will include its own Load Balancing Hash Masks and | <t>As a DR, R1 will include its own Load-Balancing Hash Masks and | |||
| the identity of R1 and R2 (the GDR Candidates) in its DRLB-List Hello | the identity of R1 and R2 (the GDR Candidates) in its DRLB-List Hello | |||
| Option. | Option. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="Protocol Specification"> | <name>Protocol Specification</name> | |||
| <section title="Hash Mask and Hash Algorithm" anchor="maskalgo"> | <section anchor="maskalgo" numbered="true" toc="default"> | |||
| <t>A Hash Mask is used to extract a number of bits from the | <name>Hash Mask and Hash Algorithm</name> | |||
| <t>A hash mask is used to extract a number of bits from the | ||||
| corresponding IP address field (32 for IPv4, 128 for IPv6) and | corresponding IP address field (32 for IPv4, 128 for IPv6) and | |||
| calculate a hash value. A hash value is used to select a GDR from GDR | calculate a hash value. A hash value is used to select a GDR from GDR | |||
| Candidates advertised by the PIM DR. Hash masks allow for certain flows | Candidates advertised by the PIM DR. Hash masks allow for certain flows | |||
| to always be forwarded by the same GDR, by ignoring certain bits in the | to always be forwarded by the same GDR, by ignoring certain bits in the | |||
| hash value calculation, so that the hash values are the same. For | hash value calculation, so that the hash values are the same. For | |||
| example, 0.0.255.0 defines a | example, 0.0.255.0 defines a | |||
| Hash Mask for an IPv4 address that masks the first, the second, and | hash mask for an IPv4 address that masks the first, second, and | |||
| the fourth octets, which means that only the third octet will | fourth octets, which means that only the third octet will | |||
| influence the hash value computed. Note that the masks need not | influence the hash value computed. Note that the masks need not | |||
| be a contiguous set of bits. E.g, for IPv4, 15.15.15.15 would be a | be a contiguous set of bits. For example, for IPv4, 15.15.15.15 would be a | |||
| valid mask. | valid mask. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| In the text below, a hash mask is in some places said to be zero. | In the text below, a hash mask is, in some places, said to be zero. | |||
| A hash mask is zero if no bits are set. That is, | A hash mask is zero if no bits are set, that is, | |||
| 0.0.0.0 for IPv4 and :: for IPv6. Also, a hash mask is said to be | 0.0.0.0 for IPv4 and :: for IPv6. Also, a hash mask is said to be | |||
| an all-bits-set mask if it is 255.255.255.255 for IPv4 or | an all-bits-set mask if it is 255.255.255.255 for IPv4 or | |||
| ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff for IPv6. | ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff for IPv6. | |||
| </t> | ||||
| <t>There are three Hash Masks defined: | ||||
| </t> | </t> | |||
| <t> | <t>There are three hash masks defined: | |||
| <list style="symbols"> | ||||
| <t>RP Hash Mask</t> | ||||
| <t>Source Hash Mask</t> | ||||
| <t>Group Hash Mask</t> | ||||
| </list> | ||||
| </t> | </t> | |||
| <ul spacing="normal"> | ||||
| <li>RP Hash Mask</li> | ||||
| <li>Source Hash Mask</li> | ||||
| <li>Group Hash Mask</li> | ||||
| </ul> | ||||
| <t>The hash masks need to be configured on the PIM routers that can | <t>The hash masks need to be configured on the PIM routers that can | |||
| potentially become a PIM DR, unless the implementation provides | potentially become a PIM DR, unless the implementation provides | |||
| default hash mask values. | default hash mask values. | |||
| An implementation SHOULD have default hash mask values as follows. | An implementation <bcp14>SHOULD</bcp14> have default hash mask values as | |||
| The default RP Hash Mask SHOULD be zero (no bits set). The default | follows. | |||
| Source and Group Hash Masks SHOULD both be all-bits-set masks. | The default RP Hash Mask <bcp14>SHOULD</bcp14> be zero (no bits set). The | |||
| These default values are likely acceptable for most deployments, and | default | |||
| Source and Group Hash Masks <bcp14>SHOULD</bcp14> both be all-bits-set ma | ||||
| sks. | ||||
| These default values are likely acceptable for most deployments and | ||||
| simplify configuration. There is only a need to use other masks if | simplify configuration. There is only a need to use other masks if | |||
| one needs to ensure that certain flows are forwarded by the same GDR. | one needs to ensure that certain flows are forwarded by the same GDR. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| The DRLB-List Hello Option contains a list of GDR Candidates. | The DRLB-List Hello Option contains a list of GDR Candidates. | |||
| The first one listed has ordinal number 0, the second listed | The first one listed has ordinal number 0, the second listed | |||
| ordinal number 1, and the last one has ordinal number N - 1 if | ordinal number 1, and the last one has ordinal number N - 1 if | |||
| there are N candidates listed. The hash value computed will be | there are N candidates listed. The hash value computed will be | |||
| the ordinal number of the GDR Candidate that is acting as GDR for | the ordinal number of the GDR Candidate that is acting as GDR for | |||
| the flow in question. | the flow in question. | |||
| </t> | </t> | |||
| <t>The input to be hashed is determined as follows: | <t>The input to be hashed is determined as follows: | |||
| <list style="symbols"> | </t> | |||
| <t>If the group is in ASM mode and the RP Hash Mask announced by | <ul spacing="normal"> | |||
| <li>If the group is in ASM mode and the RP Hash Mask announced by | ||||
| the PIM DR is not zero (at least one bit is set), calculate the | the PIM DR is not zero (at least one bit is set), calculate the | |||
| value of hashvalue_RP [<xref target="algorithm"/>] to determine | value of hashvalue_RP (<xref target="algorithm" format="default"/>) t o determine | |||
| the GDR. | the GDR. | |||
| </t> | </li> | |||
| <t>If the group is in ASM mode and the RP Hash Mask announced by | <li>If the group is in ASM mode and the RP Hash Mask announced by | |||
| the PIM DR is zero (no bits are set), obtain the value of | the PIM DR is zero (no bits are set), obtain the value of | |||
| hashvalue_Group [<xref target="algorithm"/>] to determine the | hashvalue_Group (<xref target="algorithm" format="default"/>) to det ermine the | |||
| GDR. | GDR. | |||
| </t> | </li> | |||
| <t>If the group is in SSM mode, use | <li>If the group is in SSM mode, use | |||
| hashvalue_SG [<xref target="algorithm"/>] to determine the GDR. | hashvalue_SG (<xref target="algorithm" format="default"/>) to determ | |||
| </t> | ine the GDR. | |||
| </list> | </li> | |||
| </t> | </ul> | |||
| <t> | <t> | |||
| A simple Modulo Hash Algorithm is defined in this document. | A simple modulo hash algorithm is defined in this document. | |||
| However, to allow another Hash Algorithms to be used, a 1-octet | However, to allow another hash algorithm to be used, a 1-octet | |||
| "Hash Algorithm" field is included in the DRLB-Cap Hello Option to | "Hash Algorithm" field is included in the DRLB-Cap Hello Option to | |||
| specify the Hash Algorithm used by the router. | specify the hash algorithm used by the router. | |||
| </t> | </t> | |||
| <t>If different Hash Algorithms are advertised among the routers | <t>If different hash algorithms are advertised among the routers | |||
| on a LAN, only the routers advertising the same Hash Algorithm | on a LAN, only the routers advertising the same hash algorithm | |||
| as the DR (as well as having the same DR priority as the DR) are | as the DR (as well as having the same DR priority as the DR) are | |||
| eligible for GDR election. | eligible for GDR election. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title="Modulo Hash Algorithm" anchor="algorithm"> | <section anchor="algorithm" numbered="true" toc="default"> | |||
| <t> | <name>Modulo Hash Algorithm</name> | |||
| <t> | ||||
| As part of computing the hash, the notation LSZC(hash_mask) is used | As part of computing the hash, the notation LSZC(hash_mask) is used | |||
| to denote the number of zeroes | to denote the number of zeroes | |||
| counted from the least significant bit of a Hash Mask | counted from the least significant bit of a hash mask | |||
| hash_mask. As an example, LSZC(255.255.128) is 7 and | hash_mask. As an example, LSZC(255.255.128) is 7 and | |||
| also LSZC(ffff:8000::) is 111. If all bits are set, LSZC will | LSZC(ffff:8000::) is 111. If all bits are set, LSZC will | |||
| be 0. If the mask is zero, then | be 0. If the mask is zero, then | |||
| LSZC will be 32 for IPv4, and 128 for IPv6. | LSZC will be 32 for IPv4 and 128 for IPv6. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| The number of GDR Candidates is denoted as GDRC. | The number of GDR Candidates is denoted as GDRC. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| The idea behind the Modulo Hash Algorithm is in simple terms | The idea behind the modulo hash algorithm is, in simple terms, | |||
| that the corresponding mask is applied to a value, then the result | that the corresponding mask is applied to a value, then the result | |||
| is shifted right LSZC(mask) bits so that the least significant bits | is shifted right LSZC(mask) bits so that the least significant bits | |||
| that were masked out are not considered. Then this result is masked | that were masked out are not considered. Then, this result is masked | |||
| by 0xffffffff, keeping only the last 32 bits of the result | by 0xffffffff, keeping only the last 32 bits of the result | |||
| (this only makes a difference for IPv6). Finally, the hash value is | (this only makes a difference for IPv6). Finally, the hash value is | |||
| this result modulo the number of GDR Candidates (GDRC). | this result modulo the number of GDR Candidates (GDRC). | |||
| </t> | </t> | |||
| <t> | <t> | |||
| The Modulo Hash Algorithm for computing the values hashvalue_RP, | The modulo hash algorithm, for computing the values hashvalue_RP, | |||
| hashvalue_Group and hashvalue_SG is defined as follows. | hashvalue_Group, and hashvalue_SG, is defined as follows. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| hashvalue_RP is calculated as: | hashvalue_RP is calculated as: | |||
| <list style = "empty"> | ||||
| <t> | ||||
| (((RP_address & RP_mask) >> LSZC(RP_mask)) | ||||
| & 0xffffffff) % GDRC | ||||
| </t> | ||||
| <t>RP_address is the address of the RP defined for the group | ||||
| and RP_mask is the RP Hash Mask. | ||||
| </t> | ||||
| </list> | ||||
| </t> | </t> | |||
| <t> | <artwork> | |||
| (((RP_address & RP_mask) >> LSZC(RP_mask)) & 0xffffffff) % GDRC | ||||
| </artwork> | ||||
| <ul empty="true"> | ||||
| <li>RP_address is the address of the RP defined for the group, | ||||
| and RP_mask is the RP Hash Mask.</li> | ||||
| </ul> | ||||
| <t> | ||||
| hashvalue_Group is calculated as: | hashvalue_Group is calculated as: | |||
| <list style = "empty"> | </t> | |||
| <t> | <artwork> | |||
| (((Group_address & Group_mask) >> LSZC(Group_mask)) | (((Group_address & Group_mask) >> LSZC(Group_mask)) & 0xfffffff | |||
| & 0xffffffff) % GDRC | f) | |||
| </t> | % GDRC | |||
| <t> | </artwork> | |||
| Group_address is the group address and Group_mask is the | <ul empty="true"> | |||
| Group Hash Mask. | <li> | |||
| </t> | Group_address is the group address, and Group_mask is the | |||
| </list> | Group Hash Mask.</li> | |||
| </t> | </ul> | |||
| <t> | ||||
| <t> | ||||
| hashvalue_SG is calculated as: | hashvalue_SG is calculated as: | |||
| <list style = "empty"> | </t> | |||
| <t> | <artwork> | |||
| ((((Source_address & Source_mask) >> LSZC(Source_mask)) | ((((Source_address & Source_mask) >> LSZC(Source_mask)) & | |||
| & 0xffffffff) ^ | 0xffffffff) ^ (((Group_address & Group_mask) >> LSZC(Group_mask)) | |||
| (((Group_address & Group_mask) >> LSZC(Group_mask)) | & 0xffffffff)) % GDRC | |||
| & 0xffffffff)) % GDRC | </artwork> | |||
| </t> | <ul empty="true"> | |||
| <t> | <li> | |||
| Group_address is the group address and Group_mask is the | Group_address is the group address, and Group_mask is the | |||
| Group Hash Mask. | Group Hash Mask.</li> | |||
| </t> | </ul> | |||
| </list> | <section numbered="true" toc="default"> | |||
| </t> | <name>Modulo Hash Algorithm Examples</name> | |||
| <section title="Modulo Hash Algorithm Examples"> | <t>To help illustrate the algorithm, consider this example. | |||
| <t>To help illustrate the algorithm, consider this example. | ||||
| Router X with IPv4 address 203.0.113.1 receives a DRLB-List | Router X with IPv4 address 203.0.113.1 receives a DRLB-List | |||
| Hello Option from the DR, which announces RP Hash | Hello Option from the DR that announces RP Hash | |||
| Mask 0.0.255.0 and a list of GDR Candidates, sorted by IP | Mask 0.0.255.0 and a list of GDR Candidates, sorted by IP | |||
| addresses from high to low: 203.0.113.3, 203.0.113.2 and | addresses from high to low: 203.0.113.3, 203.0.113.2, and | |||
| 203.0.113.1. The ordinal number assigned to those addresses | 203.0.113.1. The ordinal number assigned to those addresses | |||
| would be: | would be: | |||
| </t> | </t> | |||
| <t>0 for 203.0.113.3; 1 for 203.0.113.2; 2 for 203.0.113.1 | <t> | |||
| (Router X). | 0 for 203.0.113.3; 1 for 203.0.113.2; 2 for 203.0.113.1 | |||
| </t> | (Router X).</t> | |||
| <t>Assume there are 2 RPs: RP1 192.0.2.1 for Group1 and RP2 | ||||
| 198.51.100.2 for Group2. Following the modulo Hash Algorithm: | <t>Assume there are 2 RPs: RP1 192.0.2.1 for Group1 and RP2 | |||
| </t> | 198.51.100.2 for Group2. Following the modulo hash algorithm: | |||
| <t>LSZC(0.0.255.0) is 8 and GDRC is 3. | </t> | |||
| <ul spacing="normal"> | ||||
| <li>LSZC(0.0.255.0) is 8, and GDRC is 3. | ||||
| The hashvalue_RP for Group1 with RP RP1 is: | The hashvalue_RP for Group1 with RP RP1 is: | |||
| </t> | </li> | |||
| <t>(((192.0.2.1 & 0.0.255.0) >> 8) & 0xffffffff % 3) = | </ul> | |||
| 2 % 3 = 2 | <ul empty="true"> | |||
| </t> | <li> | |||
| <t>which matches the ordinal number assigned to Router X. | <artwork> | |||
| Router X will be the GDR for Group1. | (((192.0.2.1 & 0.0.255.0) >> 8) & 0xffffffff % 3) | |||
| </t> | = 2 % 3 | |||
| <t>The hashvalue_RP for Group2 with RP RP2 is: | = 2 | |||
| </t> | </artwork> | |||
| <t>(((198.51.100.2 & 0.0.255.0) >> 8) & 0xffffffff % 3) = | </li> | |||
| 100 % 3 = 1 | <li>This matches the ordinal number assigned to Router X. | |||
| </t> | Router X will be the GDR for Group1.</li> | |||
| <t>which is different from the ordinal number of Router X (2). | </ul> | |||
| Hence, Router X will not be GDR for Group2. | <ul spacing="normal"> | |||
| </t> | <li>The hashvalue_RP for Group2 with RP RP2 is:</li> | |||
| <t>For IPv6 consider this example, similar to the above. | </ul> | |||
| <ul empty="true"> | ||||
| <li> | ||||
| <artwork> | ||||
| (((198.51.100.2 & 0.0.255.0) >> 8) & 0xffffffff % 3) | ||||
| = 100 % 3 | ||||
| = 1 | ||||
| </artwork> | ||||
| </li> | ||||
| <li>This is different from the ordinal number of Router X (2). | ||||
| Hence, Router X will not be GDR for Group2.</li> | ||||
| </ul> | ||||
| <t>For IPv6, consider this example, similar to the above. | ||||
| Router X with IPv6 address fe80::1 receives a DRLB-List | Router X with IPv6 address fe80::1 receives a DRLB-List | |||
| Hello Option from the DR, which announces RP Hash | Hello Option from the DR that announces RP Hash | |||
| Mask ::ffff:ffff:ffff:0 and a list of GDR Candidates, sorted by IP | Mask ::ffff:ffff:ffff:0 and a list of GDR Candidates, sorted by IP | |||
| addresses from high to low: fe80::3, fe80::2 and fe80::1. | addresses from high to low: fe80::3, fe80::2, and fe80::1. | |||
| The ordinal number assigned to those addresses would be: | The ordinal number assigned to those addresses would be: | |||
| </t> | </t> | |||
| <t>0 for fe80::3; 1 for fe80::2; 2 for fe80::1 (Router X). | <ul empty="true"> | |||
| </t> | <li>0 for fe80::3; 1 for fe80::2; 2 for fe80::1 (Router X).</li> | |||
| <t>Assume there are 2 RPs: RP1 2001:db8::1:0:5678:1 for Group1 and | </ul> | |||
| <t>Assume there are 2 RPs: RP1 2001:db8::1:0:5678:1 for Group1 and | ||||
| RP2 2001:db8::1:0:1234:2 for Group2. | RP2 2001:db8::1:0:1234:2 for Group2. | |||
| Following the modulo Hash Algorithm: | Following the modulo hash algorithm: | |||
| </t> | </t> | |||
| <t>LSZC(::ffff:ffff:ffff:0) is 16 and GDRC is 3. | <ul spacing="normal"> | |||
| The hashvalue_RP for Group1 with RP RP1 is: | <li>LSZC(::ffff:ffff:ffff:0) is 16, and GDRC is 3. | |||
| </t> | The hashvalue_RP for Group1 with RP RP1 is:</li> | |||
| <t>(((2001:db8::1:0:5678:1 & ::ffff:ffff:ffff:0) >> 16) & | </ul> | |||
| 0xffffffff % 3) = | <ul empty="true"> | |||
| ((::1:0:5678:0 >> 16) & 0xffffffff % 3) = | <li> | |||
| (::1:0:5678 & 0xffffffff % 3) = ::5678 % 3 = 2 | <artwork> | |||
| </t> | (((2001:db8::1:0:5678:1 & ::ffff:ffff:ffff:0) >> 16) & | |||
| <t>which matches the ordinal number assigned to Router X. | 0xffffffff % 3) | |||
| Router X will be the GDR for Group1. | = ((::1:0:5678:0 >> 16) & 0xffffffff % 3) | |||
| </t> | = (::1:0:5678 & 0xffffffff % 3) | |||
| <t>The hashvalue_RP for Group2 with RP RP2 is: | = ::5678 % 3 | |||
| </t> | = 2 | |||
| <t>(((2001:db8::1:0:1234:1 & ::ffff:ffff:ffff:0) >> 16) & | </artwork> | |||
| 0xffffffff % 3) = | </li> | |||
| ((::1:0:1234:0 >> 16) & 0xffffffff % 3) = | <li>This matches the ordinal number assigned to Router X. | |||
| (::1:0:1234 & 0xffffffff % 3) = ::1234 % 3 = 1 | Router X will be the GDR for Group1.</li> | |||
| </t> | </ul> | |||
| <t>which is different from the ordinal number of Router X (2). | <ul spacing="normal"> | |||
| Hence, Router X will not be GDR for Group2. | <li>The hashvalue_RP for Group2 with RP RP2 is:</li> | |||
| </t> | </ul> | |||
| </section> | <ul empty="true"> | |||
| <section title="Limitations"> | <li> | |||
| <artwork> | ||||
| (((2001:db8::1:0:1234:1 & ::ffff:ffff:ffff:0) >> 16) & | ||||
| 0xffffffff % 3) | ||||
| = ((::1:0:1234:0 >> 16) & 0xffffffff % 3) | ||||
| = (::1:0:1234 & 0xffffffff % 3) | ||||
| = ::1234 % 3 | ||||
| = 1 | ||||
| </artwork> | ||||
| </li> | ||||
| <li>This is different from the ordinal number of Router X (2). | ||||
| Hence, Router X will not be GDR for Group2.</li> | ||||
| </ul> | ||||
| </section> | ||||
| <section numbered="true" toc="default"> | ||||
| <name>Limitations</name> | ||||
| <t> | <t> | |||
| The Modulo Hash Algorithm has poor failover characteristics when | The modulo hash algorithm has poor failover characteristics when | |||
| a shared LAN has more than two GDRs. In the | a shared LAN has more than two GDRs. In the | |||
| case of more than two GDRs on a LAN, when one GDR fails, all | case of more than two GDRs on a LAN, when one GDR fails, all | |||
| of the groups may be reassigned to a different GDR, even if | of the groups may be reassigned to a different GDR, even if | |||
| they were not assigned to the failed GDR. However, many | they were not assigned to the failed GDR. However, many | |||
| deployments use only two routers on a shared LAN for redundancy | deployments use only two routers on a shared LAN for redundancy | |||
| purposes. Future work may define new Hash Algorithms where only | purposes. Future work may define new hash algorithms where only | |||
| groups assigned to the failed GDR get reassigned. | groups assigned to the failed GDR get reassigned. | |||
| </t> | </t> | |||
| <t>The Modulo Hash Algorithm will use at most 32 consecutive bits of | <t>The modulo hash algorithm will use, at most, 32 consecutive bits of | |||
| the input addresses for its computation. Exactly which bits are | the input addresses for its computation. Exactly which bits are | |||
| used of the source, group or RP addresses, depend on the respective | used of the source, group, or RP addresses depend on the respective | |||
| masks. This limitation may be an issue for IPv6 deployments, | masks. This limitation may be an issue for IPv6 deployments, | |||
| since not all bits of the IPv6 addresses are considered. If this | since not all bits of the IPv6 addresses are considered. If this | |||
| causes operational issues, a new hash algorithm would need to be | causes operational issues, a new hash algorithm would need to be | |||
| defined. | defined. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section title="PIM Hello Options"> | <section numbered="true" toc="default"> | |||
| <name>PIM Hello Options</name> | ||||
| <t>PIM routers include a new option, called | <t>PIM routers include a new option, called | |||
| "Load Balancing Capability (DRLB-Cap)" in their PIM Hello messages. | "Load-Balancing Capability (DRLB-Cap)", in their PIM Hello messages. | |||
| </t> | </t> | |||
| <t>Besides this DRLB-Cap Hello Option, the elected PIM DR also | <t>Besides this DRLB-Cap Hello Option, the elected PIM DR also | |||
| includes a new "DR Load Balancing List (DRLB-List) Hello Option". | includes a new "DR Load-Balancing List (DRLB-List) Hello Option". | |||
| The DRLB-List Hello Option consists of three Hash Masks as defined | The DRLB-List Hello Option consists of three hash masks, as defined | |||
| above and also a list of GDR Candidate addresses on the LAN. It is | above, and also a list of GDR Candidate addresses on the LAN. It is | |||
| recommended that the GDR Candidate addresses are sorted in descending | recommended that the GDR Candidate addresses are sorted in descending | |||
| order. This ensures that when using algorithms such as the Modulo | order. This ensures that when using algorithms, such as the modulo hash | |||
| algorithm in this document, that it is predictable which GDR is | algorithm in this document, that it is predictable which GDR is | |||
| responsible for which groups, regardless of the order the DR learned | responsible for which groups, regardless of the order the DR learned | |||
| about the candidates. | about the candidates. | |||
| </t> | </t> | |||
| <section title="PIM DR Load Balancing Capability (DRLB-Cap) Hello | <section numbered="true" toc="default"> | |||
| Option"> | <name>PIM DR Load-Balancing Capability (DRLB-Cap) Hello Option</name> | |||
| <figure align="center"> | <figure anchor="PIM-CAP"> | |||
| <artwork align="center"><![CDATA[ | <name>PIM DR Load-Balancing Capability Hello Option</name> | |||
| <artwork align="center" name="" type="" alt=""><![CDATA[ | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | Type = 34 | Length = 4 | | | Type = 34 | Length = 4 | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | Reserved |Hash Algorithm | | | Reserved |Hash Algorithm | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork> | ||||
| Figure 3: PIM DR Load Balancing Capability Hello Option | </figure> | |||
| ]]></artwork> | <dl newline="false" spacing="normal"> | |||
| <postamble></postamble> | <dt>Type:</dt> | |||
| </figure> | <dd>34</dd> | |||
| <t> | <dt>Length:</dt> | |||
| <list style="empty"> | <dd>4</dd> | |||
| <t>Type: 34 | <dt>Reserved:</dt> | |||
| </t> | <dd>Transmitted as zero, ignored on receipt.</dd> | |||
| <t>Length: 4 | <dt>Hash Algorithm:</dt> | |||
| </t> | <dd>Hash algorithm type. A value listed in the | |||
| <t>Reserved: Transmitted as zero, ignored on receipt. | IANA "PIM Designated Router Load-Balancing Hash Algorithms" | |||
| </t> | registry. 0 is used for the hash algorithm defined in this | |||
| <t>Hash Algorithm: Hash Algorithm type. A value listed in the | ||||
| IANA Designated Router Load Balancing Hash Algorithms | ||||
| registry. 0 is used for the Modulo algorithm defined in this | ||||
| document. | document. | |||
| </t> | </dd> | |||
| </list> | </dl> | |||
| </t> | <t>This DRLB-Cap Hello Option <bcp14>MUST</bcp14> be advertised by rou | |||
| <t>This DRLB-Cap Hello Option MUST be advertised by routers on | ters on | |||
| all interfaces where DR Load Balancing is enabled. Note that the | all interfaces where DR Load Balancing is enabled. Note that the | |||
| option is included at most once. | option is included, at most, once. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title = "PIM DR Load Balancing List (DRLB-List) Hello Option"> | <section numbered="true" toc="default"> | |||
| <figure align="center"> | <name>PIM DR Load-Balancing List (DRLB-List) Hello Option</name> | |||
| <artwork align="center"><![CDATA[ | <figure anchor="PIM-LIST"> | |||
| 0 1 2 3 | <name>PIM DR Load-Balancing List Hello Option</name> | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | <artwork align="center" name="" type="" alt=""><![CDATA[ | |||
| 0 1 2 3 | ||||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | Type = 35 | Length | | | Type = 35 | Length | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | Group Mask | | | Group Mask | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | Source Mask | | | Source Mask | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | RP Mask | | | RP Mask | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | GDR Candidate Address(es) | | | GDR Candidate Address(es) | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| Figure 4: PIM DR Load Balancing List Hello Option | ||||
| ]]></artwork> | ]]></artwork> | |||
| <postamble></postamble> | ||||
| </figure> | </figure> | |||
| <t> | <dl newline="false" spacing="normal"> | |||
| <list style="empty"> | <dt>Type:</dt> | |||
| <t>Type: 35</t> | <dd>35</dd> | |||
| <t>Length: (3 + n) x (4 or 16) bytes, where n is the number | <dt>Length:</dt> | |||
| of GDR candidates.</t> | <dd>(3 + n) x (4 or 16) bytes, where n is the number | |||
| <t>Group Mask (32/128 bits): Mask applied to group addresses | of GDR Candidates.</dd> | |||
| as part of hash computation.</t> | <dt>Group Mask (32/128 bits):</dt> | |||
| <t> Source Mask (32/128 bits): Mask applied to source addresses | <dd>Mask applied to group addresses | |||
| as part of hash computation.</t> | as part of hash computation.</dd> | |||
| <t>RP Mask (32/128 bits): Mask applied to RP addresses | <dt> Source Mask (32/128 bits):</dt> | |||
| as part of hash computation.</t> | <dd>Mask applied to source addresses | |||
| <t> | as part of hash computation.</dd> | |||
| <list style="empty"> | <dt>RP Mask (32/128 bits):</dt> | |||
| <t>All masks MUST have the same number of bits as the IP | <dd>Mask applied to RP addresses | |||
| source address in the PIM Hello IP header. | as part of hash computation.</dd> | |||
| </t> | </dl> | |||
| </list> | <t>All masks <bcp14>MUST</bcp14> have the same number of bits as th | |||
| </t> | e IP | |||
| <t>GDR Candidate Address(es) (32/128 bits): List of GDR | source address in the PIM Hello IP header. | |||
| Candidate(s) | </t> | |||
| <list style="empty"> | <dl newline="false" spacing="normal"> | |||
| <t>All addresses MUST be in the same address family as the | <dt>GDR Candidate Address(es) (32/128 bits):</dt> | |||
| <dd><t>List of GDR Candidate(s)</t> | ||||
| <t>All addresses <bcp14>MUST</bcp14> be in the same address fami | ||||
| ly as the | ||||
| PIM Hello IP header. It is recommended that the addresses are | PIM Hello IP header. It is recommended that the addresses are | |||
| sorted in descending order. | sorted in descending order. | |||
| </t> | </t> | |||
| <t>If the "Interface ID" option, as specified in | <t>If the "Interface ID" option, as specified in | |||
| <xref target="RFC6395"/>, is present in a GDR Candidate's | <xref target="RFC6395" format="default"/>, is present in a GDR Ca | |||
| PIM Hello message, and the "Router Identifier" portion is | ndidate's | |||
| PIM Hello message and the "Router Identifier" portion is | ||||
| non-zero: | non-zero: | |||
| <list style="symbols"> | </t> | |||
| <t>For IPv4, the "GDR Candidate Address" will be set directly | <ul spacing="normal"> | |||
| <li>For IPv4, the "GDR Candidate Address" will be set direct | ||||
| ly | ||||
| to the "Router Identifier". | to the "Router Identifier". | |||
| </t> | </li> | |||
| <t>For IPv6, the "GDR Candidate Address" will be 96 bits of | <li>For IPv6, the "GDR Candidate Address" will be 96 bits of | |||
| zeroes followed by the 32 bit Router Identifier. | zeroes, followed by the 32 bit Router Identifier. | |||
| </t> | </li> | |||
| </list> | </ul> | |||
| </t> | <t>If the "Interface ID" option is not present in a GDR | |||
| <t>If the "Interface ID" option is not present in a GDR | Candidate's PIM Hello message or if the "Interface ID" | |||
| Candidate' PIM Hello message, or if the "Interface ID" | ||||
| option is present but the "Router Identifier" field is zero, | option is present but the "Router Identifier" field is zero, | |||
| the "GDR Candidate Address" will be the IPv4 or IPv6 source | the "GDR Candidate Address" will be the IPv4 or IPv6 source | |||
| address of the PIM Hello message. | address of the PIM Hello message. | |||
| </t> | </t> | |||
| <t>This DRLB-List Hello Option MUST only be advertised by the | <t>This DRLB-List Hello Option <bcp14>MUST</bcp14> only be adver | |||
| elected PIM DR. It MUST be ignored if received from a non-DR. | tised by the | |||
| The option MUST also be ignored if the hash masks are not | elected PIM DR. It <bcp14>MUST</bcp14> be ignored if received fro | |||
| the correct number of bits, or GDR Candidate addresses are in | m a non-DR. | |||
| The option <bcp14>MUST</bcp14> also be ignored if the hash masks | ||||
| are not | ||||
| the correct number of bits or GDR Candidate addresses are in | ||||
| the wrong address family. | the wrong address family. | |||
| </t> | </t> | |||
| </list> | </dd></dl> | |||
| </t> | </section> | |||
| </list> | ||||
| </t> | ||||
| </section> | ||||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="PIM DR Operation"> | <name>PIM DR Operation</name> | |||
| <t>The DR election process is still the same as defined in | <t>The DR election process is still the same as defined in | |||
| <xref target="RFC7761"/>. The DR advertises the new DRLB-List Hello | <xref target="RFC7761" format="default"/>. The DR advertises the new DRLB -List Hello | |||
| Option, which contains mask values from user configuration (or default | Option, which contains mask values from user configuration (or default | |||
| values), followed by a list of GDR Candidate Addresses. Note that | values), followed by a list of GDR Candidate addresses. Note that | |||
| if a router included the "Interface ID" option in the hello message, | if a router included the "Interface ID" option in the hello message | |||
| and the Router ID is non-zero, the Router ID will be used to form the | and the Router ID is non-zero, the Router ID will be used to form the | |||
| GDR Candidate address of the router, as discussed in the previous | GDR Candidate address of the router, as discussed in the previous | |||
| section. It is recommended that the list be sorted, from the highest | section. It is recommended that the list be sorted from the highest | |||
| value to the lowest value. The reason for sorting the list is to | value to the lowest value. The reason for sorting the list is to | |||
| make the behavior deterministic, regardless of the order in which the | make the behavior deterministic, regardless of the order in which the | |||
| DR learns of new candidates. Note that, as for non-DR routers, the DR | DR learns of new candidates. Note that, as for non-DR routers, the DR | |||
| also advertises the DRLB-Cap Hello Option to indicate its ability to | also advertises the DRLB-Cap Hello Option to indicate its ability to | |||
| support the new functionality and the type of GDR election Hash | support the new functionality and the type of GDR election hash | |||
| Algorithm it uses. | algorithm it uses. | |||
| </t> | </t> | |||
| <t>If a PIM DR receives a neighbor DRLB-Cap Hello Option, which | <t>If a PIM DR receives a neighbor DRLB-Cap Hello Option that | |||
| contains the same Hash Algorithm as the DR, and the neighbor has the | contains the same hash algorithm as the DR and the neighbor has the | |||
| same DR priority as the DR, PIM DR SHOULD consider the neighbor as a | same DR priority as the DR, PIM DR <bcp14>SHOULD</bcp14> consider the nei | |||
| GDR Candidate and insert the GDR Candidate' Address into the | ghbor as a | |||
| GDR Candidate and insert the GDR Candidate's Address into the | ||||
| list of the DRLB-List Option. However, the DR may have policies | list of the DRLB-List Option. However, the DR may have policies | |||
| limiting which GDR Candidates, or the number of GDR Candidates to | limiting which or the number of GDR Candidates to | |||
| include. Likewise, the DR SHOULD include itself in the list of GDR | include. Likewise, the DR <bcp14>SHOULD</bcp14> include itself in the lis | |||
| Candidates, but it is permissible not to do so, if for instance there | t of GDR | |||
| Candidates, but it is permissible not to do so, for instance, if there | ||||
| is some policy restricting the candidate set. | is some policy restricting the candidate set. | |||
| </t> | </t> | |||
| <t>If a PIM neighbor included in the list expires, stops announcing | <t>If a PIM neighbor included in the list expires, stops announcing | |||
| the DRLB-Cap Hello Option, changes DR priority, changes Hash Algorithm | the DRLB-Cap Hello Option, changes DR priority, changes hash algorithm, | |||
| or otherwise becomes ineligible as a candidate, the DR SHOULD | or otherwise becomes ineligible as a candidate, the DR <bcp14>SHOULD</bcp | |||
| 14> | ||||
| immediately send a triggered hello with a new list in the DRLB-List | immediately send a triggered hello with a new list in the DRLB-List | |||
| option, excluding the neighbor. | option, excluding the neighbor. | |||
| </t> | </t> | |||
| <t>If a new router becomes eligible as a candidate, there is no | <t>If a new router becomes eligible as a candidate, there is no | |||
| urgency in sending out an updated list. An updated list SHOULD be | urgency in sending out an updated list. An updated list <bcp14>SHOULD</bc | |||
| p14> be | ||||
| included in the next hello. | included in the next hello. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title="PIM GDR Candidate Operation"> | <section numbered="true" toc="default"> | |||
| <t>When an IGMP/MLD report is received, a Hash Algorithm is used by | <name>PIM GDR Candidate Operation</name> | |||
| <t>When an IGMP/MLD report is received, a hash algorithm is used by | ||||
| the GDR Candidates to determine which router is going to be responsible | the GDR Candidates to determine which router is going to be responsible | |||
| for building forwarding trees on behalf of the host. | for building forwarding trees on behalf of the host. | |||
| </t> | </t> | |||
| <t>The router MUST include the DRLB-Cap Hello Option in all PIM Hello | <t>The router <bcp14>MUST</bcp14> include the DRLB-Cap Hello Option in a ll PIM Hello | |||
| messages sent on the interface. Note that the presence of the | messages sent on the interface. Note that the presence of the | |||
| DRLB-Cap Option in the PIM Hello does not guarantee that the router | DRLB-Cap Option in the PIM Hello does not guarantee that the router | |||
| will be considered as a GDR candidate. Once the DR election is done, | will be considered as a GDR Candidate. Once the DR election is done, | |||
| the DRLB-List Hello Option is received from the current PIM DR | the DRLB-List Hello Option is received from the current PIM DR | |||
| containing a list of the selected GDRs Candidates. | containing a list of the selected GDR Candidates. | |||
| </t> | </t> | |||
| <t>A router only acts as a GDR Candidate if it is included in the GDR | <t>A router only acts as a GDR Candidate if it is included in the GDR | |||
| Candidate list of the DRLB-List Hello Option. See next section for | Candidate list of the DRLB-List Hello Option. See next section for | |||
| details. | details. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title="DRLB-List Hello Option Processing"> | <section numbered="true" toc="default"> | |||
| <t> | <name>DRLB-List Hello Option Processing</name> | |||
| <t> | ||||
| This section discusses processing of the DRLB-List Hello Option, | This section discusses processing of the DRLB-List Hello Option, | |||
| including the case where it was received in the previous hello, | including the case where it was received in the previous hello | |||
| but not in the current hello. | but not in the current hello. | |||
| All routers MUST ignore the DRLB-List Hello Option if it is | All routers <bcp14>MUST</bcp14> ignore the DRLB-List Hello Option if it | |||
| received from a PIM router which is not the DR. The option MUST | is | |||
| only be processed by routers that are announcing the DRLB-Cap Option, | received from a PIM router that is not the DR. The option <bcp14>MUST</ | |||
| and only if the Hash Algorithm announced by the DR is the same as | bcp14> | |||
| only be processed by routers that are announcing the DRLB-Cap Option | ||||
| and only if the hash algorithm announced by the DR is the same as | ||||
| the local announcement. | the local announcement. | |||
| All GDR Candidates MUST use the Hash Masks advertised in the Option, | All GDR Candidates <bcp14>MUST</bcp14> use the hash masks advertised | |||
| in the Option, | ||||
| even if they differ from those the candidate was configured with. | even if they differ from those the candidate was configured with. | |||
| The DR MUST also process its own DRLB-List Hello Option. | The DR <bcp14>MUST</bcp14> also process its own DRLB-List Hello Option. | |||
| </t> | </t> | |||
| <t>A router stores the latest option contents that was announced, | <t>A router stores the latest option contents that were announced, | |||
| if any, and deletes the previous contents. The router MUST also | if any, and deletes the previous contents. The router <bcp14>MUST</bcp14> | |||
| compare the new contents with any previous contents, and if there | also | |||
| compare the new contents with any previous contents and, if there | ||||
| are any changes, continue processing as below. Note that if the | are any changes, continue processing as below. Note that if the | |||
| option does not pass the above checks, the below processing MUST be | option does not pass the above checks, the below processing <bcp14>MUST</ bcp14> be | |||
| done as if the option was not announced. | done as if the option was not announced. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| If the contents of the DRLB-List Option, the masks or the candidate | If the contents of the DRLB-List Option, the masks, or the candidate | |||
| list, differs from the previously saved copy, it is received for the | list differ from the previously saved copy, it is received for the | |||
| first time, or it is no longer being received or accepted, the | first time, or it is no longer being received or accepted, the | |||
| option MUST be processed as below. | option <bcp14>MUST</bcp14> be processed as below. | |||
| <list style="numbers"> | </t> | |||
| <t>If the local router is included in the GDR Candidate Address(es) | <ol spacing="normal" type="1"> | |||
| field (it will look for its own address, or its Router ID if it | <li> | |||
| announces a non-zero Router ID), for each of the groups, or source | <t>If the local router is included in the "GDR Candidate | |||
| and group pairs if the group is in SSM mode, with local receiver | Address(es)" field, it will look for its own address, or if it | |||
| interest, the router MUST run the Hash Algorithm to determine | announces a non-zero Router ID, its own Router ID. For each of the | |||
| which of them it is the GDR for. | groups or source and group pairs, if the group is in SSM mode | |||
| <list style="symbol"> | with local receiver interest, the router <bcp14>MUST</bcp14> run | |||
| <t>If there is no change in the GDR status, then no further | the hash algorithm to determine which of them is for the GDR. | |||
| </t> | ||||
| <ul spacing="normal"> | ||||
| <li>If there is no change in the GDR status, then no further | ||||
| action is required. | action is required. | |||
| </t> | </li> | |||
| <t>If the router becomes the new GDR, then a multicast | <li>If the router becomes the new GDR, then a multicast | |||
| forwarding tree MUST be built <xref target="RFC7761"/>. | forwarding tree <bcp14>MUST</bcp14> be built <xref target="RFC7761" | |||
| </t> | format="default"/>. | |||
| <t> | </li> | |||
| <li> | ||||
| If the router is no longer the GDR, then it uses an Assert as | If the router is no longer the GDR, then it uses an Assert as | |||
| explained in [<xref target="assert"/>]. | explained in <xref target="assert" format="default"/>. | |||
| </t> | </li> | |||
| </list> | </ul> | |||
| </t> | </li> | |||
| <t>If the local router is not included in the GDR Candidate | ||||
| Address(es) field, or if the DRLB-List Hello Option is no longer | <li> | |||
| included in the DR's Hello, or if the DR's Neighbor Liveness Timer | <t>If one of the following occurs:</t> | |||
| expires <xref target="RFC7761"/>, for each of the groups, or | <ul> | |||
| source and group pairs if the group is in SSM mode, with local | <li>the local router is not included in the "GDR Candidate | |||
| receiver interest, for which the router is the GDR, it | Address(es)" field,</li> | |||
| uses an Assert as explained in [<xref target="assert"/>]. | <li>the DRLB-List Hello Option is no longer included in the DR's | |||
| </t> | Hello, or</li> | |||
| </list> | <li>the DR's Neighbor Liveness Timer expires [RFC7761],</li> | |||
| </t> | </ul> | |||
| <t> | ||||
| then for each group (or each source and group pair if the group | ||||
| is in SSM mode) with local receiver interest, for which the | ||||
| router is the GDR, the router uses an Assert as explained in | ||||
| <xref target="assert"/>. | ||||
| </t> | ||||
| </li> | ||||
| </ol> | ||||
| </section> | </section> | |||
| <section title="PIM Assert Modification" anchor="assert"> | <section anchor="assert" numbered="true" toc="default"> | |||
| <t>GDR changes may occur due to configuration change, due to | <name>PIM Assert Modification</name> | |||
| GDR candidates going down, and also new routers coming up and | <t>GDR changes may occur due to configuration change, | |||
| becoming GDR candidates. This may occur while flows are being | GDR Candidates going down, and also new routers coming up and | |||
| becoming GDR Candidates. This may occur while flows are being | ||||
| forwarded. If the GDR for an active flow changes, there is likely | forwarded. If the GDR for an active flow changes, there is likely | |||
| to be some disruption, such as packet loss or duplicates. | to be some disruption, such as packet loss or duplicates. | |||
| By using asserts, packet loss is minimized, while allowing a small | By using asserts, packet loss is minimized while allowing a small | |||
| amount of duplicates. | amount of duplicates. | |||
| </t> | </t> | |||
| <t>When a router stops acting as the GDR for a group, or source and | <t>When a router stops acting as the GDR for a group, or source and | |||
| group pair if SSM, it MUST set the Assert metric preference to maximum | group pair if SSM, it <bcp14>MUST</bcp14> set the Assert metric preferenc | |||
| e to maximum | ||||
| (0x7fffffff) and the Assert metric to one less than maximum | (0x7fffffff) and the Assert metric to one less than maximum | |||
| (0xfffffffe). That is, whenever it sends or receives an Assert for the | (0xfffffffe). That is, whenever it sends or receives an Assert for the | |||
| group, it must use these values as the metric preference and metric | group, it must use these values as the metric preference and metric | |||
| rather than the values provided by the unicast routing protocol. | rather than the values provided by the unicast routing protocol. | |||
| </t> | </t> | |||
| <t>The rest of this section is just for illustration purposes and | <t>The rest of this section is just for illustration purposes and | |||
| not part of the protocol definition. | not part of the protocol definition. | |||
| </t> | </t> | |||
| <t>To illustrate the behavior when there is a GDR change, consider | <t>To illustrate the behavior when there is a GDR change, consider | |||
| the following scenario where there are two flows | the following scenario where there are two flows: | |||
| G1 and G2. R1 is the GDR for G1, and R2 is the GDR for G2. | G1 and G2. R1 is the GDR for G1, and R2 is the GDR for G2. | |||
| When R3 comes up, it is possible that R3 becomes GDR for both | When R3 comes up, it is possible that R3 becomes GDR for both | |||
| G1 and G2, hence R3 starts to build the forwarding tree for G1 and | G1 and G2; hence, R3 starts to build the forwarding tree for G1 and | |||
| G2. If R1 and R2 stop forwarding before R3 completes the process, | G2. If R1 and R2 stop forwarding before R3 completes the process, | |||
| packet loss might occur. On the other hand, if R1 and R2 continue | packet loss might occur. On the other hand, if R1 and R2 continue | |||
| forwarding while R3 is building the forwarding trees, duplicates | forwarding while R3 is building the forwarding trees, duplicates | |||
| might occur. | might occur. | |||
| </t> | </t> | |||
| <t>When the role of GDR changes as above, instead of immediately | <t>When the role of GDR changes as above, instead of immediately | |||
| stopping forwarding, R1 and R2 continue forwarding to G1 and G2 | stopping forwarding, R1 and R2 continue forwarding to G1 and G2 | |||
| respectively, while, at the same time, R3 build forwarding trees for | respectively, while, at the same time, R3 build forwarding trees for | |||
| G1 and G2. This will lead to PIM Asserts. | G1 and G2. This will lead to PIM Asserts. | |||
| </t> | </t> | |||
| <t>For G1, using the functionality described in this document, R1 | <t>For G1, using the functionality described in this document, R1 | |||
| and R3 determine the new GDR, which is R3. With the modified Assert | and R3 determine the new GDR, which is R3. With the modified Assert | |||
| behavior, R1 sets its Assert metric to the near maximum value discussed | behavior, R1 sets its Assert metric to the near maximum value, as discuss | |||
| above. That will make R3, which has normal metric in its Assert as | ed | |||
| above. That will make R3, which has normal metric in its Assert, | ||||
| the Assert winner. | the Assert winner. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="Backward Compatibility"> | <name>Backward Compatibility</name> | |||
| <t>In the case of a hybrid Ethernet shared LAN (where some PIM routers | <t>In the case of a hybrid Ethernet shared LAN (where some PIM routers | |||
| support the functionality defined in this document, and some do not); | support the functionality defined in this document and some do not): | |||
| <list style="symbols"> | ||||
| <t>If the DR does not support the new functionality, then there | ||||
| will be no load-balancing. | ||||
| </t> | ||||
| <t>If non-DR routers do not support the new functionality, they | ||||
| will not be considered as Candidate GDRs and it will not take part | ||||
| in load-balancing. Load-balancing may still happen on the link. | ||||
| </t> | ||||
| </list> | ||||
| </t> | </t> | |||
| <ul spacing="normal"> | ||||
| <li>If the DR does not support the new functionality, then there | ||||
| will be no load balancing. | ||||
| </li> | ||||
| <li>If non-DR routers do not support the new functionality, they | ||||
| will not be considered as GDR Candidate and will not take part | ||||
| in load balancing. Load balancing may still happen on the link. | ||||
| </li> | ||||
| </ul> | ||||
| </section> | </section> | |||
| </section> | </section> | |||
| <section title="Operational Considerations"> | <section numbered="true" toc="default"> | |||
| <name>Operational Considerations</name> | ||||
| <t> | <t> | |||
| An administrator needs to consider what the total bandwidth | An administrator needs to consider what the total bandwidth | |||
| requirements are and find a set of routers that together has | requirements are and find a set of routers that together have | |||
| enough available capacity, while making sure that each of the routers | enough available capacity while making sure that each of the routers | |||
| can handle its part, assuming that the traffic is distributed | can handle its part, assuming that the traffic is distributed | |||
| roughly equally among the routers. Ideally, one should also have | roughly equally among the routers. Ideally, one should also have | |||
| enough bandwidth to handle the case where at least one router fails. | enough bandwidth to handle the case where at least one router fails. | |||
| All routers should have reachability to the sources, and | All routers should have reachability to the sources and | |||
| RPs if applicable, that is not via the LAN. | RPs, if applicable, that are not via the LAN. | |||
| </t> | </t> | |||
| <t>Care must be taken when choosing what hash masks to configure. One | <t>Care must be taken when choosing what hash masks to configure. One | |||
| would typically configure the same masks on all the routers, so that | would typically configure the same masks on all the routers so that | |||
| they are the same, regardless of which router is elected as DR. The | they are the same, regardless of which router is elected as DR. The | |||
| default masks are likely suitable for most deployment. The RP Hash | default masks are likely suitable for most deployment. The RP Hash | |||
| Mask must be configured (the default is no bits set) if one wishes to | Mask must be configured (the default is no bits set) if one wishes to | |||
| hash based on the RP address rather than the group address for ASM. | hash based on the RP address rather than the group address for ASM. | |||
| The default masks will use the entire group addresses, and source | The default masks will use the entire group addresses, and source | |||
| addresses if SSM, as part of the hash. An administrator may set other | addresses if SSM, as part of the hash. An administrator may set other | |||
| masks that masks out part of the addresses to ensure that certain | masks that mask out part of the addresses to ensure that certain | |||
| flows always get hashed to the same router. How this is achieved depends | flows always get hashed to the same router. How this is achieved depends | |||
| on how the group addresses are allocated. | on how the group addresses are allocated. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| Only the routers announcing the same Hash Algorithm as the DR | Only the routers announcing the same hash algorithm as the DR | |||
| would be considered as GDR candidates. Network administrators | would be considered as GDR Candidates. Network administrators | |||
| need to make sure that the desired set of routers announce the | need to make sure that the desired set of routers announce the | |||
| same algorithm. Migration between different algorithms is | same algorithm. Migration between different algorithms is | |||
| not considered in this document. | not considered in this document. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title="IANA Considerations"> | <section numbered="true" toc="default"> | |||
| <t>IANA has temporarily assigned type 34 for the PIM DR Load Balancing | <name>IANA Considerations</name> | |||
| Capability (DRLB-Cap) Hello Option, and type 35 for the | <t>IANA has made these assignments in the "PIM-Hello Options" registry: | |||
| PIM DR Load Balancing List (DRLB-List) Hello Option in the | value 34 for the PIM DR Load-Balancing Capability (DRLB-Cap) Hello | |||
| PIM-Hello Options registry. IANA is requested | Option (with Length of 4), and value 35 for the PIM DR Load-Balancing | |||
| to make these assignments permanent when this document is published | List (DRLB-List) Hello Option (with variable Length). | |||
| as an RFC. Note that the option names have changed slightly since | </t> | |||
| the temporary assignments were made. Also, the length of option 34 | <t> | |||
| is always 4, the registry currently says it is variable. | Per this document, IANA has created a registry called | |||
| </t><t> | "PIM Designated Router Load-Balancing Hash Algorithms" in the | |||
| This document requests IANA to create a registry called | ||||
| "Designated Router Load Balancing Hash Algorithms" in the | ||||
| "Protocol Independent Multicast (PIM)" branch of the registry tree. | "Protocol Independent Multicast (PIM)" branch of the registry tree. | |||
| The registry lists Hash Algorithms for use by PIM Designated Router | The registry lists hash algorithms for use by PIM Designated Router | |||
| Load Balancing. | Load Balancing. | |||
| </t> | </t> | |||
| <section title="Initial registry"> | <section numbered="true" toc="default"> | |||
| <name>Initial Registry</name> | ||||
| <t> | <t> | |||
| The initial content of the registry should be as follows. | The initial content of the registry is as follows. | |||
| <figure> | ||||
| <artwork> | ||||
| <![CDATA[ | ||||
| Type Name Reference | ||||
| ------ ---------------------------------------- -------------------- | ||||
| 0 Modulo This document | ||||
| 1-255 Unassigned | ||||
| ]]> | ||||
| </artwork> | ||||
| </figure> | ||||
| </t> | </t> | |||
| <table anchor="initial-reg" align="center"> | ||||
| <thead> | ||||
| <tr> | ||||
| <th>Type</th> | ||||
| <th>Name</th> | ||||
| <th>Reference</th> | ||||
| </tr> | ||||
| </thead> | ||||
| <tbody> | ||||
| <tr> | ||||
| <td>0</td> | ||||
| <td>Modulo</td> | ||||
| <td>RFC 8775</td> | ||||
| </tr> | ||||
| <tr> | ||||
| <td>1-255</td> | ||||
| <td>Unassigned</td> | ||||
| <td></td> | ||||
| </tr> | ||||
| </tbody> | ||||
| </table> | ||||
| </section> | </section> | |||
| <section title="Assignment of new Hash Algorithms"> | <section numbered="true" toc="default"> | |||
| <t>Assignment of new Hash Algorithms is done according to the "IETF | <name>Assignment of New Hash Algorithms</name> | |||
| Review" model, see <xref target="RFC8126"/>. | <t>Assignment of new hash algorithms is done according to the "IETF | |||
| Review" procedure; see <xref target="RFC8126" format="default"/>. | ||||
| </t> | </t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title="Security Considerations"> | <name>Security Considerations</name> | |||
| <t>Security of the new DR Load Balancing PIM Hello Options is only | <t>Security of the new DR Load-Balancing PIM Hello Options is only | |||
| guaranteed by the security of PIM Hello messages, so the security | guaranteed by the security of PIM Hello messages, so the security | |||
| considerations for PIM Hello messages as described in PIM-SM | considerations for PIM Hello messages, as described in PIM-SM | |||
| <xref target="RFC7761"/> apply here. | <xref target="RFC7761" format="default"/>, apply here. | |||
| </t> | </t> | |||
| <t>If the DR is subverted it could omit or add certain GDRs or | <t>If the DR is subverted, it could omit or add certain GDRs or | |||
| announce an unsupported algorithm. If another router is subverted, it | announce an unsupported algorithm. If another router is subverted, it | |||
| could be made DR and cause similar issues. While these issues are | could be made DR and cause similar issues. While these issues are | |||
| specific to this specification, they are not that different from existing | specific to this specification, they are not that different from existing | |||
| attacks such as subverting a DR and lowering the DR priority, causing a | attacks, such as subverting a DR and lowering the DR priority, causing a | |||
| different router to become the DR. | different router to become the DR. | |||
| </t> | </t> | |||
| <t>If for any reason, the DR includes a GDR in the announced list which | <t>If, for any reason, the DR includes a GDR in the announced list that | |||
| announces a different algorithm from what the DR announces, the GDR | announces a different algorithm from what the DR announces, the GDR | |||
| is required to ignore the announcement, and there will be no router | is required to ignore the announcement, and there will be no router | |||
| acting as the DR for the flows that hash to that GDR. | acting as the DR for the flows that hash to that GDR. | |||
| </t> | </t> | |||
| <t>If a GDR is subverted, it could potentially be made to stop forwarding | <t>If a GDR is subverted, it could potentially be made to stop forwarding | |||
| all the traffic it is expected to forward. This is also similar today to | all the traffic it is expected to forward. This is also similar today to | |||
| if a DR is subverted. | if a DR is subverted. | |||
| </t> | </t> | |||
| <t>An administrator may be able to achieve the desired load-balancing | <t>An administrator may be able to achieve the desired load balancing | |||
| of known flows, but an attacker may send a single high rate flow which | of known flows, but an attacker may send a single high rate flow that | |||
| is served by a single GDR, or send multiple flows that are expected to | is served by a single GDR or send multiple flows that are expected to | |||
| be hashed to the same GDR.</t> | be hashed to the same GDR.</t> | |||
| </section> | </section> | |||
| <section title="Acknowledgement"> | ||||
| <t> | ||||
| The authors would like to thank Steve Simlo and Taki Millonis for | ||||
| helping with the original idea; Alia Atlas, Bill Atwood, Joe Clarke, | ||||
| Alissa Cooper, Jake Holland, Bharat Joshi, Anish Kachinthaya, | ||||
| Anvitha Kachinthaya, Benjamin Kaduk, Mirja Kuhlewind, Barry Leiba, | ||||
| Ben Niven-Jenkins, Alvaro Retana, Adam Roach, | ||||
| Michael Scharf, Eric Vyncke and Carl Wallace | ||||
| for reviews and comments; and Toerless Eckert and Rishabh | ||||
| Parekh for helpful conversation on the document. | ||||
| </t> | ||||
| </section> | ||||
| </middle> | </middle> | |||
| <!-- *****BACK MATTER ***** --> | <!-- *****BACK MATTER ***** --> | |||
| <back> | <back> | |||
| <references title='Normative References'> | <references> | |||
| <?rfc include='reference.RFC.2119' ?> | <name>References</name> | |||
| <?rfc include='reference.RFC.6395' ?> | <references> | |||
| <?rfc include='reference.RFC.7761' ?> | <name>Normative References</name> | |||
| <?rfc include='reference.RFC.8126' ?> | <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | |||
| <?rfc include='reference.RFC.8174' ?> | ence.RFC.2119.xml"/> | |||
| <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.6395.xml"/> | ||||
| <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.7761.xml"/> | ||||
| <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.8126.xml"/> | ||||
| <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.8174.xml"/> | ||||
| </references> | </references> | |||
| <references title="Informative References"> | <references> | |||
| <?rfc include='reference.RFC.3376' ?> | <name>Informative References</name> | |||
| <?rfc include='reference.RFC.3810' ?> | <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | |||
| <?rfc include='reference.RFC.4541' ?> | ence.RFC.3376.xml"/> | |||
| <?rfc include='reference.RFC.4607' ?> | <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | |||
| ence.RFC.3810.xml"/> | ||||
| <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.4541.xml"/> | ||||
| <xi:include href="https://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.4607.xml"/> | ||||
| </references> | </references> | |||
| </references> | ||||
| <section numbered="false" toc="default"> | ||||
| <name>Acknowledgements</name> | ||||
| <t> | ||||
| The authors would like to thank <contact fullname="Steve Simlo"/> and | ||||
| <contact fullname="Taki Millonis"/> for | ||||
| helping with the original idea; <contact fullname="Alia Atlas"/>, | ||||
| <contact fullname="Bill Atwood"/>, <contact fullname="Joe Clarke"/>, | ||||
| <contact fullname="Alissa Cooper"/>, <contact fullname="Jake | ||||
| Holland"/>, <contact fullname="Bharat Joshi"/>, <contact | ||||
| fullname="Anish Kachinthaya"/>, | ||||
| <contact fullname="Anvitha Kachinthaya"/>, <contact fullname="Benjamin | ||||
| Kaduk"/>, <contact fullname="Mirja Kühlewind"/>, <contact | ||||
| fullname="Barry Leiba"/>, | ||||
| <contact fullname="Ben Niven-Jenkins"/>, <contact fullname="Alvaro | ||||
| Retana"/>, <contact fullname="Adam Roach"/>, | ||||
| <contact fullname="Michael Scharf"/>, <contact fullname="Éric | ||||
| Vyncke"/>, and <contact fullname="Carl Wallace"/> | ||||
| for reviews and comments; and <contact fullname="Toerless Eckert"/> | ||||
| and <contact fullname="Rishabh Parekh"/> for helpful conversation on | ||||
| the document. | ||||
| </t> | ||||
| </section> | ||||
| </back> | </back> | |||
| </rfc> | </rfc> | |||
| End of changes. 171 change blocks. | ||||
| 565 lines changed or deleted | 659 lines changed or added | |||
This html diff was produced by rfcdiff 1.45. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ | ||||