rfc9574xml2.original.xml | rfc9574.xml | |||
---|---|---|---|---|
<?xml version="1.0" encoding="US-ASCII"?> | <?xml version='1.0' encoding='utf-8'?> | |||
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ | <!DOCTYPE rfc [ | |||
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | <!ENTITY nbsp " "> | |||
C.2119.xml"> | <!ENTITY zwsp "​"> | |||
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | <!ENTITY nbhy "‑"> | |||
C.8174.xml"> | <!ENTITY wj "⁠"> | |||
<!ENTITY RFC6514 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.6514.xml"> | ||||
<!ENTITY RFC7432 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7432.xml"> | ||||
<!ENTITY I-D.ietf-bess-evpn-bum-procedure-updates SYSTEM "https://xml2rfc.ietf.o | ||||
rg/public/rfc/bibxml3/reference.I-D.ietf-bess-evpn-bum-procedure-updates.xml"> | ||||
<!ENTITY RFC8365 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.8365.xml"> | ||||
<!ENTITY RFC7902 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7902.xml"> | ||||
<!ENTITY RFC6513 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.6513.xml"> | ||||
<!ENTITY RFC7348 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7348.xml"> | ||||
<!ENTITY RFC4023 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.4023.xml"> | ||||
<!ENTITY RFC7637 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF | ||||
C.7637.xml"> | ||||
<!ENTITY I-D.ietf-bess-evpn-proxy-arp-nd SYSTEM "https://xml2rfc.ietf.org/public | ||||
/rfc/bibxml3/reference.I-D.ietf-bess-evpn-proxy-arp-nd.xml"> | ||||
]> | ]> | |||
<?rfc toc="yes"?> | ||||
<?rfc tocompact="yes"?> | ||||
<?rfc tocdepth="3"?> | ||||
<?rfc tocindent="yes"?> | ||||
<?rfc symrefs="yes"?> | ||||
<?rfc sortrefs="yes"?> | ||||
<?rfc comments="yes"?> | ||||
<?rfc inline="yes"?> | ||||
<?rfc compact="yes"?> | ||||
<?rfc subcompact="no"?> | ||||
<rfc category="std" docName="draft-ietf-bess-evpn-optimized-ir-12" | ||||
ipr="trust200902" submissionType="IETF"> | ||||
<!-- Generated by id2xml 1.5.0 on 2019-12-29T20:19:06Z --> | ||||
<?rfc strict="yes"?> | ||||
<?rfc compact="yes"?> | ||||
<?rfc subcompact="no"?> | ||||
<?rfc symrefs="yes"?> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="std" docName="draft-ie | |||
tf-bess-evpn-optimized-ir-12" number="9574" ipr="trust200902" submissionType="IE | ||||
<?rfc sortrefs="no"?> | TF" obsoletes="" updates="" xml:lang="en" tocInclude="true" consensus="true" to | |||
cDepth="3" symRefs="true" sortRefs="true" version="3"> | ||||
<?rfc text-list-symbols="-o+*"?> | ||||
<?rfc toc="yes"?> | ||||
<front> | <front> | |||
<title abbrev="EVPN Optimized IR">Optimized Ingress Replication Solution | <title abbrev="EVPN Optimized IR">Optimized Ingress Replication Solution | |||
for Ethernet VPN (EVPN)</title> | for Ethernet VPNs (EVPNs)</title> | |||
<author fullname="J. Rabadan" initials="J." role="editor" | <seriesInfo name="RFC" value="9574"/> | |||
surname="Rabadan"> | <author fullname="Jorge Rabadan" initials="J." role="editor" surname="Rabada | |||
n"> | ||||
<organization>Nokia</organization> | <organization>Nokia</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>777 Middlefield Road</street> | <street>777 Middlefield Road</street> | |||
<city>Mountain View</city> | <city>Mountain View</city> | |||
<region>CA</region> | <region>CA</region> | |||
<code>94043</code> | <code>94043</code> | |||
<country>United States of America</country> | ||||
<country>USA</country> | ||||
</postal> | </postal> | |||
<email>jorge.rabadan@nokia.com</email> | <email>jorge.rabadan@nokia.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Senthil Sathappan" initials="S." surname="Sathappan"> | ||||
<author fullname="S. Sathappan" initials="S." surname="Sathappan"> | ||||
<organization>Nokia</organization> | <organization>Nokia</organization> | |||
<address> | <address> | |||
<email>senthil.sathappan@nokia.com</email> | <email>senthil.sathappan@nokia.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Wen Lin" initials="W." surname="Lin"> | ||||
<author fullname="W. Lin" initials="W." surname="Lin"> | ||||
<organization>Juniper Networks</organization> | <organization>Juniper Networks</organization> | |||
<address> | <address> | |||
<email>wlin@juniper.net</email> | <email>wlin@juniper.net</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Mukul Katiyar" initials="M." surname="Katiyar"> | ||||
<author fullname="M. Katiyar" initials="M." surname="Katiyar"> | ||||
<organization>Versa Networks</organization> | <organization>Versa Networks</organization> | |||
<address> | <address> | |||
<email>mukul@versa-networks.com</email> | <email>mukul@versa-networks.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Ali Sajassi" initials="A." surname="Sajassi"> | ||||
<author fullname="A. Sajassi" initials="A." surname="Sajassi"> | ||||
<organization>Cisco Systems</organization> | <organization>Cisco Systems</organization> | |||
<address> | <address> | |||
<email>sajassi@cisco.com</email> | <email>sajassi@cisco.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<date month="May" year="2024"/> | ||||
<area>rtg</area> | ||||
<workgroup>BESS</workgroup> | ||||
<date day="25" month="January" year="2022"/> | <keyword>Assisted Replication</keyword> | |||
<keyword>AR</keyword> | ||||
<workgroup>BESS Workgroup</workgroup> | <keyword>AR-Replicator</keyword> | |||
<keyword>RNVE</keyword> | ||||
<keyword>Pruned Flood List</keyword> | ||||
<keyword>PFL</keyword> | ||||
<keyword>Pruned Flooding List</keyword> | ||||
<abstract> | <abstract> | |||
<t>Network Virtualization Overlay networks using Ethernet VPN (EVPN) as | <t>Network Virtualization Overlay (NVO) networks using Ethernet VPNs | |||
their control plane may use Ingress Replication or PIM (Protocol | (EVPNs) as their control plane may use trees based on ingress replication | |||
Independent Multicast)-based trees to convey the overlay Broadcast, | or Protocol Independent Multicast (PIM) to convey the overlay Broadcast, | |||
Unknown unicast and Multicast (BUM) traffic. PIM provides an efficient | Unknown Unicast, or Multicast (BUM) traffic. PIM provides an efficient | |||
solution to avoid sending multiple copies of the same packet over the | solution that prevents sending multiple copies of the same packet over the | |||
same physical link, however it may not always be deployed in the Network | same physical link; however, it may not always be deployed in the | |||
Virtualization Overlay core network. Ingress Replication avoids the | NVO network core. Ingress replication avoids the | |||
dependency on PIM in the Network Virtualization Overlay network core. | dependency on PIM in the NVO network core. | |||
While Ingress Replication provides a simple multicast transport, some | While ingress replication provides a simple multicast transport, some | |||
Network Virtualization Overlay networks with demanding multicast | NVO networks with demanding multicast | |||
applications require a more efficient solution without PIM in the core. | applications require a more efficient solution without PIM in the core. | |||
This document describes a solution to optimize the efficiency of Ingress | This document describes a solution to optimize the efficiency of ingress | |||
Replication trees.</t> | replication trees.</t> | |||
</abstract> | </abstract> | |||
</front> | </front> | |||
<middle> | <middle> | |||
<section anchor="sect-1" title="Introduction"> | <section anchor="sect-1" numbered="true" toc="default"> | |||
<t>Ethernet Virtual Private Networks (EVPN) may be used as the control | <name>Introduction</name> | |||
plane for a Network Virtualization Overlay network <xref | <t>Ethernet Virtual Private Networks (EVPNs) may be used as the control | |||
target="RFC8365"/>. Network Virtualization Edge (NVE) and Provider Edge | plane for a Network Virtualization Overlay (NVO) network <xref target="RFC | |||
8365" format="default"/>. Network Virtualization Edge (NVE) and Provider Edge | ||||
(PE) devices that are part of the same EVPN Broadcast Domain (BD) use | (PE) devices that are part of the same EVPN Broadcast Domain (BD) use | |||
Ingress Replication or PIM-based trees to transport the tenant's | Ingress Replication (IR) or PIM-based trees to transport the tenant's | |||
Broadcast, Unknown unicast and Multicast (BUM) traffic.</t> | Broadcast, Unknown Unicast, or Multicast (BUM) traffic.</t> | |||
<t> In the ingress replication approach, the ingress NVE receiving a BUM | ||||
<t>In the Ingress Replication approach, the ingress NVE receving a BUM | frame from the Tenant System (TS) will create as many copies of the | |||
frame from the Tenant System will create as many copies of the frame as | frame as the number of remote NVEs/PEs that are attached to the BD. Each o | |||
remote NVEs/PEs are attached to the BD. Each of those copies will be | f | |||
those copies will be | ||||
encapsulated into an IP packet where the outer IP Destination Address | encapsulated into an IP packet where the outer IP Destination Address | |||
(IP DA) identifies the loopback of the egress NVE/PE. The IP fabric core | (IP DA) identifies the loopback of the egress NVE/PE. The IP fabric core | |||
nodes (also known as Spines) will simply route the IP encapsulated BUM | nodes (also known as spines) will simply route the IP-encapsulated BUM | |||
frames based on the outer IP DA. If PIM-based trees are used instead of | frames based on the outer IP DA. If PIM-based trees are used instead of | |||
Ingress Replication, the NVEs/PEs attached to the same BD will join a | ingress replication, the NVEs/PEs attached to the same BD will join a | |||
PIM-based tree. The ingress NVE receiving a BUM frame will send a single | PIM-based tree. The ingress NVE receiving a BUM frame will send a single | |||
copy of the frame, encapsulated into an IP packet where the outer IP DA | copy of the frame, encapsulated into an IP packet where the outer IP DA | |||
is the multicast address that represents the PIM-based tree. The IP | is the multicast address that represents the PIM-based tree. The IP | |||
fabric core nodes are part of the PIM tree and keep multicast state for | fabric core nodes are part of the PIM tree and keep multicast state for | |||
the multicast group, so that IP encapsulated BUM frames can be routed to | the multicast group, so that IP-encapsulated BUM frames can be routed to | |||
all the NVEs/PEs that joined the tree.</t> | all the NVEs/PEs that joined the tree.</t> | |||
<t>The two approaches are illustrated in <xref target="IR-PIM" format="def | ||||
<t>The two approaches are illustrated in <xref target="IR-PIM"/>. On the | ault"/>. On the | |||
left-hand side, NVE1 uses Ingress Replication to send a BUM frame | left-hand side of the diagram, NVE1 uses ingress replication to send a BUM | |||
frame | ||||
(originated from Tenant System TS1) to the remote nodes attached to the | (originated from Tenant System TS1) to the remote nodes attached to the | |||
BD, i.e., NVE2, NV3, PE1. On the right-hand side of the diagram, the | BD, i.e., NVE2, NVE3, and PE1. On the right-hand side, the | |||
same example is depicted but using a PIM-based tree, i.e., (S1,G1), | same example is depicted but using a PIM-based tree, i.e., (S1,G1), | |||
instead of Ingress Replication. While a single copy of the tunneled BUM | instead of ingress replication. While a single copy of the tunneled BUM | |||
frame is generated in the latter approach, all the routers in the fabric | frame is generated in the latter approach, all the routers in the fabric | |||
need to keep muticast state, e.g., the Spine keeps a PIM multicast | need to keep multicast state, e.g., the spine keeps a PIM | |||
routing entry for (S1,G1) with an Incoming Interface (IIF) and three | routing entry for (S1,G1) with an Incoming Interface (IIF) and three | |||
Outgoing Interfaces (OIFs).</t> | Outgoing Interfaces (OIFs).</t> | |||
<figure anchor="IR-PIM"> | ||||
<t><figure anchor="IR-PIM" | <name>Ingress Replication vs. PIM-Based Trees in NVO Networks</name> | |||
title="Ingress Replication vs PIM-based trees in NVO networks"> | <artwork name="" type="" align="left" alt=""><![CDATA[ To WAN | |||
<artwork><![CDATA[ To-WAN To-WAN | To WAN | |||
^ ^ | ^ ^ | |||
| | | | | | |||
+-----+ +-----+ | +-----+ +-----+ | |||
+----------| PE1 |-----------+ +----------| PE1 |-----------+ | +----------| PE1 |-----------+ +----------| PE1 |-----------+ | |||
| +--^--+ | | +--^--+ | | | +--^--+ | | +--^--+ | | |||
| | IP Fabric | | | IP Fabric | | | | IP Fabric | | | IP Fabric | | |||
| PE | | (S1,G1) |OIF to-G | | | PE | | (S1,G1) |OIF to G1 | | |||
| +----PE->+-----+ No State | | IIF +-----+ OIF to-G | | | +----PE->+-----+ No State | | IIF +-----+ OIF to G1 | | |||
| | +---2->|Spine|------+ | | +------>Spine|------+ | | | | +---2->|Spine|------+ | | +------>Spine|------+ | | |||
| | | +-3->+-----+ | | | | +-----+ | | | | | | +-3->+-----+ | | | | +-----+ | | | |||
| | | | 2 3 | | |PIM |OIF to-G | | | | | | | 2 3 | | |PIM |OIF to G1| | | |||
| | | |IR | | | | |tree | | | | | | | |IR | | | | |tree | | | | |||
|+-----+ +--v--+ +--v--+ | |+-----+ +--v--+ +--v--+ | | |+-----+ +--v--+ +--v--+ | |+-----+ +--v--+ +--v--+ | | |||
+| NVE1|---| NVE2|---| NVE3|-+ +| NVE1|---| NVE2|---| NVE3|-+ | +| NVE1|---| NVE2|---| NVE3|-+ +| NVE1|---| NVE2|---| NVE3|-+ | |||
+--^--+ +-----+ +-----+ +--^--+ +-----+ +-----+ | +--^--+ +-----+ +-----+ +--^--+ +-----+ +-----+ | |||
| | | | | | | | | | | | | | |||
| v v | v v | | v v | v v | |||
TS1 TS2 TS3 TS1 TS2 TS3]]></artwork> | TS1 TS2 TS3 TS1 TS2 TS3 | |||
</figure></t> | ]]></artwork> | |||
</figure> | ||||
<t>In Network Virtualization Overlay networks where PIM-based trees | <t>In NVO networks where PIM-based trees | |||
cannot be used, Ingress Replication is the only option. Examples of | cannot be used, ingress replication is the only option. Examples of | |||
these situations are Network Virtualization Overlay networks where the | these situations are NVO networks where the | |||
core nodes do not support PIM or the network operator does not want to | core nodes do not support PIM or the network operator does not want to | |||
run PIM in the core.</t> | run PIM in the core.</t> | |||
<t>In some use cases, the amount of replication for BUM traffic is kept | ||||
<t>In some use-cases, the amount of replication for BUM traffic is kept | ||||
under control on the NVEs due to the following fairly common | under control on the NVEs due to the following fairly common | |||
assumptions:</t> | assumptions:</t> | |||
<t><list hangIndent="3" style="letters"> | <ol spacing="normal" type="a"><li>Broadcast traffic is greatly reduced due | |||
<t hangText="">Broadcast is greatly reduced due to the proxy ARP | to the proxy | |||
(Address Resolution Protocol) and proxy ND (Neighbor Discovery) | Address Resolution Protocol (ARP) and proxy Neighbor Discovery (ND) | |||
capabilities supported by EVPN on the NVEs <xref | capabilities supported by EVPNs <xref target="RFC9161" format="default | |||
target="I-D.ietf-bess-evpn-proxy-arp-nd"/>. Some NVEs can even | "/> on the NVEs. Some NVEs can even | |||
provide Dynamic Host Configuration Protocol (DHCP) server functions | provide Dynamic Host Configuration Protocol (DHCP) server functions | |||
for the attached Tenant Systems, reducing the broadcast even | for the attached TSs, reducing the broadcast traffic even | |||
further.</t> | further.</li> | |||
<li>Unknown | ||||
<t | unicast traffic is greatly reduced in NVO | |||
hangText="b) Unknown unicast traffic is greatly reduced in virtualized | networks where all the Media Access Control (MAC) and IP addresses fro | |||
NVO">Unknown | m the TSs | |||
unicast traffic is greatly reduced in Network Virtualization Overlay | are learned in the control plane.</li> | |||
networks where all the MAC and IP addresses from the Tenant Systems | <li>Multicast applications are not used.</li> | |||
are learned in the control plane.</t> | </ol> | |||
<t>If the above assumptions are true for a given NVO network, then ingress | ||||
<t>Multicast applications are not used.</t> | replication provides a simple solution for | |||
</list></t> | multi-destination traffic. However, statement c. above is not always | |||
true, and multicast applications are required in many use cases.</t> | ||||
<t>If the above assumptions are true for a given Network Virtualization | ||||
Overlay network, then Ingress Replication provides a simple solution for | ||||
multi-destination traffic. However, the statement c) above is not always | ||||
true and multicast applications are required in many use-cases.</t> | ||||
<t>When the multicast sources are attached to NVEs residing in | <t>When the multicast sources are attached to NVEs residing in | |||
hypervisors or low-performance-replication TORs (Top Of Rack switches), | hypervisors or low-performance-replication Top-of-Rack (ToR) switches, | |||
the ingress replication of a large amount of multicast traffic to a | the ingress replication of a large amount of multicast traffic to a | |||
significant number of remote NVEs/PEs can seriously degrade the | significant number of remote NVEs/PEs can seriously degrade the | |||
performance of the NVE and impact the application.</t> | performance of the NVE and impact the application.</t> | |||
<t>This document describes a solution that makes use of two ingress | ||||
<t>This document describes a solution that makes use of two Ingress | replication optimizations:</t> | |||
Replication optimizations:</t> | <ol spacing="normal" type="1"><li>Assisted Replication (AR)</li> | |||
<li>Pruned Flooding Lists (PFLs)</li> | ||||
<t><list style="numbers"> | </ol> | |||
<t>Assisted-Replication (AR)</t> | <t>Assisted Replication consists of a set of procedures that allows the | |||
ingress NVE/PE to send a single copy of a broadcast or multicast frame | ||||
<t>Pruned-Flood-Lists (PFL)</t> | received from a TS to the BD without the need | |||
</list></t> | ||||
<t>Assisted-Replication consists of a set of procedures that allows the | ||||
ingress NVE/PE to send a single copy of a Broadcast or Multicast frame | ||||
received from a Tenant System to the Broadcast Domain, without the need | ||||
for PIM in the underlay. Assisted Replication defines the roles of | for PIM in the underlay. Assisted Replication defines the roles of | |||
AR-REPLICATOR and AR-LEAF routers. The AR-LEAF is the ingress NVE/PE | AR-REPLICATOR and AR-LEAF routers. The AR-LEAF is the ingress NVE/PE | |||
attached to the Tenant System. The AR-LEAF sends a single copy of a | attached to the TS. The AR-LEAF sends a single copy of a | |||
Broadcast or Multicast packet to a selected AR-REPLICATOR that | broadcast or multicast packet to a selected AR-REPLICATOR that | |||
replicates the packet mutiple times to remote AR-LEAF or AR-REPLICATOR | replicates the packet multiple times to remote AR-LEAF or AR-REPLICATOR | |||
routers, and therefore "assisting" the ingress AR-LEAF in delivering the | routers and is therefore "assisting" the ingress AR-LEAF in delivering the | |||
Broadcast or Multicast traffic to the remote NVEs/PEs attached to the | broadcast or multicast traffic to the remote NVEs/PEs attached to the | |||
same Broadcast Domain. Assisted-Replication can use a single | same BD. Assisted Replication can use a single | |||
AR-REPLICATOR or two AR-REPLICATOR routers in the path between the | AR-REPLICATOR or two AR-REPLICATOR routers in the path between the | |||
ingress AR-LEAF and the remote destination NVE/PEs. The procedures that | ingress AR-LEAF and the remote destination NVEs/PEs. The procedures that | |||
use a single AR-REPLICATOR (Non-Selective Assisted-Replication Solution) | use a single AR-REPLICATOR (the non-selective Assisted Replication solutio | |||
are specified in <xref target="sect-5"/>, whereas <xref | n) | |||
target="sect-6"/> describes how multi-staged replication, i.e., two | are specified in <xref target="sect-5" format="default"/>, whereas <xref t | |||
arget="sect-6" format="default"/> describes how multi-stage replication, i.e., t | ||||
wo | ||||
AR-REPLICATOR routers in the path between the ingress AR-LEAF and | AR-REPLICATOR routers in the path between the ingress AR-LEAF and | |||
destination NVEs/PEs, is accomplished (Selective Assisted-Replication | destination NVEs/PEs, is accomplished (the selective Assisted Replication | |||
Solution). The Assisted-Replication procedures do not impact unknown | solution). The procedures for Assisted Replication do not impact unknown | |||
unicast traffic, which follows the same forwarding procedures as known | unicast traffic, which follows the same forwarding procedures as known | |||
unicast traffic so that packet re-ordering does not occur.</t> | unicast traffic so that packet reordering does not occur.</t> | |||
<t>PFLs provide a method for the ingress NVE/PE to prune or | ||||
<t>Pruned-Flood-Lists is a method for the ingress NVE/PE to prune or | remove certain destination NVEs/PEs from a flooding list, depending on the | |||
remove certain destination NVEs/PEs from a flood-list, depending on the | interest of those NVEs/PEs in receiving BUM traffic. As specified in <xref | |||
interest of those NVEs/PEs in receiving Broadcast, Multicast or Unknown | target="RFC8365" format="default"/>, an NVE/PE builds a | |||
unicast. As specified in <xref target="RFC8365"/>, an NVE/PE builds a | flooding list for BUM traffic based on the next hops of the received EVPN | |||
flood-list for BUM traffic based on the Next-Hops of the received EVPN | Inclusive Multicast Ethernet Tag routes for the BD. While | |||
Inclusive Multicast Ethernet Tag routes for the Broadcast Domain. While | <xref target="RFC8365" format="default"/> states that the flooding list is | |||
<xref target="RFC8365"/> states that the flood-list is used for all BUM | used for all BUM | |||
traffic, this document allows pruning certain Next-Hops from the list. | traffic, this document allows pruning certain next hops from the list. | |||
As an example, suppose an ingress NVE creates a flood-list with | As an example, suppose an ingress NVE creates a flooding list with | |||
Next-Hops PE1, PE2 and PE3. If PE2 and PE3 signaled no-interest in | next hops PE1, PE2, and PE3. If PE2 and PE3 did not signal any interest in | |||
receiving Unknown Unicast in their Inclusive Multicast Ethernet Tag | receiving unknown unicast traffic in their Inclusive Multicast Ethernet Ta | |||
routes, when the ingress NVE receives an Unknown Unicast frame from a | g | |||
Tenant System it will replicate it only to PE1. That is, PE2 and PE3 are | routes, when the ingress NVE receives an unknown unicast frame from a | |||
"pruned" from the NVE's flood-list for Unknown Unicast traffic. | TS, it will replicate it only to PE1. That is, PE2 and PE3 are | |||
Pruned-Flood-Lists can be used with Ingress Replication or | "pruned" from the NVE's flooding list for unknown unicast traffic. | |||
Assisted-Replication, and it is described in <xref | PFLs can be used with ingress replication or | |||
target="sect-7"/>.</t> | Assisted Replication and are described in <xref target="sect-7" format="de | |||
fault"/>.</t> | ||||
<t>Both optimizations, Assisted-Replication and Pruned-Flood-Lists, may | <t>Both optimizations -- Assisted Replication and PFLs -- may | |||
be used together or independently so that the performance and efficiency | be used together or independently so that the performance and efficiency | |||
of the network to transport multicast can be improved. Both solutions | of the network to transport multicast can be improved. Both solutions | |||
require some extensions to the BGP attributes used in <xref | require some extensions to the BGP attributes used in <xref target="RFC743 | |||
target="RFC7432"/>, and they are described in <xref | 2" format="default"/>; see <xref target="sect-4" format="default"/> for details. | |||
target="sect-4"/>.</t> | </t> | |||
<t>The Assisted Replication solution described in this document is | ||||
<t>The Assisted-Replication solution described in this document is | focused on NVO networks (hence its use of IP | |||
focused on Network Virtualization Overlay networks (hence it uses IP | tunnels). MPLS transport networks are out of scope for this document. The | |||
tunnels) and MPLS transport networks are out of scope. The | PFLs solution <bcp14>MAY</bcp14> be used in NVO | |||
Pruned-Flood-Lists solution MAY be used in Network Virtualization | and MPLS transport networks.</t> | |||
Overlay and MPLS transport networks.</t> | <t><xref target="sect-3" format="default"/> lists the requirements of the | |||
combined | ||||
<t><xref target="sect-3"/> lists the requirements of the combined | optimized ingress replication solution, whereas Sections <xref target | |||
optimized Ingress Replication solution, whereas <xref target="sect-5"/> | ="sect-5" format="counter"/> | |||
and <xref target="sect-6"/> describe the Assisted-Replication solution | and <xref target="sect-6" format="counter"/> describe the Assisted Replica | |||
(for Non-Selective and Selective procedures, respectively), and <xref | tion solution | |||
target="sect-7"/> the Pruned-Flood-Lists solution.</t> | for non-selective and selective procedures, respectively. <xref target="se | |||
ct-7" format="default"/> provides the PFLs solution.</t> | ||||
</section> | </section> | |||
<section anchor="sect-2" title="Terminology and Conventions"> | <section anchor="sect-2" numbered="true" toc="default"> | |||
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", | <name>Terminology and Conventions</name> | |||
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and | <t>The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
"OPTIONAL" in this document are to be interpreted as described in BCP 14 | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL | |||
<xref target="RFC2119"/> <xref target="RFC8174"/> when, and only when, | NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", | |||
they appear in all capitals, as shown here.</t> | "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | |||
"<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are | ||||
<t>The following terminology is used throughout the document:</t> | to be interpreted as described in BCP 14 <xref target="RFC2119" | |||
format="default"/> <xref target="RFC8174" format="default"/> when, and | ||||
<t><list style="symbols"> | only when, they appear in all capitals, as shown here.</t> | |||
<t>Asisted Replication forwarding mode: for an AR-LEAF, it means | ||||
sending an Attachment Circuit BM packet to a single AR-REPLICATOR | ||||
with tunnel destination IP AR-IP. For an AR-REPLICATOR, it means | ||||
sending a BM packet to a selected number or all the overlay tunnels | ||||
when the packet was previously received from an overlay tunnel.</t> | ||||
<t>AR-LEAF: Assisted Replication - LEAF, refers to an NVE/PE that | ||||
sends all the Broadcast and Multicast traffic to an AR-REPLICATOR | ||||
that can replicate the traffic further on its behalf. An AR-LEAF is | ||||
typically an NVE/PE with poor replication performance | ||||
capabilities.</t> | ||||
<t>AR-REPLICATOR: Assisted Replication - REPLICATOR, refers to an | <t>The following terminology is used throughout this document:</t> | |||
NVE/PE that can replicate Broadcast or Multicast traffic received on | ||||
overlay tunnels to other overlay tunnels and local Attachment | ||||
Circuits. This document defines the control and data plane | ||||
procedures that an AR-REPLICATOR needs to follow.</t> | ||||
<t>AR-IP: IP address owned by the AR-REPLICATOR and used to | <dl> | |||
<dt>AR-IP:</dt><dd>Assisted Replication - IP. Refers to an IP address ow | ||||
ned by the AR-REPLICATOR and used to | ||||
differentiate the incoming traffic that must follow the AR | differentiate the incoming traffic that must follow the AR | |||
procedures. The AR-IP is also used in the Tunnel Identifier and | procedures. The AR-IP is also used in the Tunnel Identifier and | |||
Next-Hop fields of the Replicator-AR route.</t> | Next Hop fields of the Replicator-AR route.</dd> | |||
<dt>AR-LEAF:</dt><dd>Assisted Replication - LEAF. Refers to an NVE/PE th | ||||
<t>AR-VNI: VNI advertised by the AR-REPLICATOR along with the | at | |||
sends all the BM traffic to an AR-REPLICATOR | ||||
that can replicate the traffic further on its behalf. An AR-LEAF is | ||||
typically an NVE/PE with poor replication performance | ||||
capabilities.</dd> | ||||
<dt>AR-REPLICATOR:</dt><dd>Assisted Replication - REPLICATOR. Refers to | ||||
an | ||||
NVE/PE that can replicate broadcast or multicast traffic received on | ||||
overlay tunnels to other overlay tunnels and local Attachment Circuits | ||||
(ACs). | ||||
This document defines the control and data plane | ||||
procedures that an AR-REPLICATOR needs to follow.</dd> | ||||
<dt>AR-VNI:</dt><dd>Assisted Replication - VNI. Refers to a Virtual eXte | ||||
nsible Local Area Network (VXLAN) Network Identifier (VNI) advertised by the AR- | ||||
REPLICATOR along with the | ||||
Replicator-AR route. It is used to identify the incoming packets | Replicator-AR route. It is used to identify the incoming packets | |||
that must follow AR procedures ONLY in the Single-IP AR-REPLICATOR | that must follow the AR procedures ONLY in the single-IP AR-REPLICATOR | |||
case <xref target="sect-8"/>.</t> | case (see <xref target="sect-8" format="default"/>).</dd> | |||
<dt>Assisted Replication forwarding mode:</dt><dd>In the case of an AR-L | ||||
<t>BM traffic: Refers to Broadcast and Multicast frames (excluding | EAF, | |||
unknown unicast frames).</t> | sending an AC Broadcast and Multicast (BM) packet to a single AR-REPLI | |||
CATOR | ||||
<t>BD: Broadcast Domain, as defined in <xref target="RFC7432"/>.</t> | with a tunnel destination address AR-IP. In the case of an AR-REPLICAT | |||
OR, this means | ||||
<t>BD label: defined as the MPLS label that identifies the Broadcast | sending a BM packet to a selected number of, or all of, the overlay tu | |||
Domain and is advertised in Regular-IR or Replicator-AR routes, when | nnels | |||
the encapsulation is MPLSoGRE or MPLSoUDP. </t> | when the packet was previously received from an overlay tunnel.</dd> | |||
<dt>BD:</dt><dd> Broadcast Domain, as defined in <xref target="RFC7432" | ||||
<t>DF and NDF: Designated Forwarder and Non-Designated Forwarder, | format="default"/>.</dd> | |||
are roles defined in NVE/PEs attached to Multi-Homed Tenant Systems, | <dt>BD label:</dt><dd>Defined as the MPLS label that identifies the BD | |||
as per <xref target="RFC7432"/> and <xref target="RFC8365"/>.</t> | and is advertised in Regular-IR or Replicator-AR routes, when | |||
the encapsulation is MPLS over GRE (MPLSoGRE) or MPLS over UDP (MPLSoU | ||||
<t>ES and ESI: Ethernet Segment and Ethernet Segment Identifier, as | DP). </dd> | |||
EVPN Multi-Homing concepts specified in <xref | <dt>BM traffic:</dt><dd>Refers to broadcast and multicast frames (exclud | |||
target="RFC7432"/>.</t> | ing | |||
unknown unicast frames).</dd> | ||||
<t>EVI: EVPN Instance. A group of Provider Edge (PE) devices | <dt>DF and NDF:</dt><dd>Designated Forwarder and Non-Designated Forwarde | |||
participating in the same EVPN service, as specified in <xref | r. | |||
target="RFC7432"/>.</t> | These are roles defined in NVEs/PEs attached to multihomed TSs, | |||
as per <xref target="RFC7432" format="default"/> and <xref target="RFC | ||||
<t>GRE: Generic Routing Encapsulation <xref target="RFC4023"/>.</t> | 8365" format="default"/>.</dd> | |||
<dt>ES and ESI:</dt><dd>Ethernet Segment and Ethernet Segment Identifier | ||||
<t>Ingress Replication forwarding mode: it refers to the Ingress | . | |||
Replication behavior explained in <xref target="RFC7432"/>. It means | EVPN multihoming concepts as specified in <xref target="RFC7432" forma | |||
sending an Attachment Circuit BM packet copy to each remote PE/NVE | t="default"/>.</dd> | |||
in the BD and sending an overlay BM packet only to the Attachment | <dt>EVI:</dt><dd> EVPN Instance. A group of Provider Edge (PE) devices | |||
Circuits and not other overlay tunnels.</t> | participating in the same EVPN service, as specified in <xref target=" | |||
RFC7432" format="default"/>.</dd> | ||||
<t>IR-IP: local IP address of an NVE/PE that is used for the Ingress | <dt>GRE:</dt><dd>Generic Routing Encapsulation <xref target="RFC4023" fo | |||
Replication signaling and procedures in <xref target="RFC7432"/>. | rmat="default"/>.</dd> | |||
Encapsulated incoming traffic with outer destination IP matching the | <dt>Ingress Replication forwarding mode:</dt><dd> Refers to the ingress | |||
IR-IP will follow the Ingress Replication procedures and not the | replication behavior explained in <xref target="RFC7432" format="defau | |||
Assisted-Replication procedures. The IR-IP is also used in the | lt"/>. In | |||
Tunnel Identifier and Next-hop fields of the Regular-IR route.</t> | this mode, an AC BM packet copy is sent to each remote PE/NVE | |||
in the BD, and an overlay BM packet is sent only to the ACs | ||||
<t>IR-VNI: VNI advertised along with the Inclusive Multicast | and not to other overlay tunnels.</dd> | |||
Ethernet Tag route for Ingress Replication Tunnel Type.</t> | <dt>IR-IP:</dt><dd>Ingress Replication - IP. Refers to the local IP addr | |||
ess of an NVE/PE that is used for the ingress | ||||
<t>MPLS: Multi-Protocol Label Switching.</t> | replication signaling and procedures provided in <xref target="RFC7432 | |||
" format="default"/>. | ||||
<t>NVE: Network Virtualization Edge router, used in this document as | Encapsulated incoming traffic with an outer destination IP address mat | |||
in <xref target="RFC8365"/>.</t> | ching the | |||
IR-IP will follow the procedures for ingress replication and not the | ||||
<t>NVGRE: Network Virtualization using Generic Routing | procedures for Assisted Replication. The IR-IP is also used in the | |||
Encapsulation, as in <xref target="RFC7637"/>.</t> | Tunnel Identifier and Next Hop fields of the Regular-IR route.</dd> | |||
<dt>IR-VNI:</dt><dd>Ingress Replication - VNI. Refers to a VNI advertise | ||||
<t>PE: Provider Edge router.</t> | d along with the Inclusive Multicast | |||
Ethernet Tag route for the ingress replication tunnel type.</dd> | ||||
<t>PMSI: P-Multicast Service Interface - a conceptual interface for | <dt>MPLS:</dt><dd>Multi-Protocol Label Switching.</dd> | |||
<dt>NVE:</dt><dd>Network Virtualization Edge <xref target="RFC8365" form | ||||
at="default"/>.</dd> | ||||
<dt>NVGRE:</dt><dd>Network virtualization using Generic Routing | ||||
Encapsulation <xref target="RFC7637" format="default"/>.</dd> | ||||
<dt>PE:</dt><dd>Provider Edge.</dd> | ||||
<dt>PMSI:</dt><dd>P-Multicast Service Interface. A conceptual interface | ||||
for | ||||
a PE to send customer multicast traffic to all or some PEs in the | a PE to send customer multicast traffic to all or some PEs in the | |||
same VPN <xref target="RFC6513"/>.</t> | same VPN <xref target="RFC6513" format="default"/>.</dd> | |||
<dt>RD:</dt><dd>Route Distinguisher.</dd> | ||||
<t>RD: Route Distinguisher.</t> | <dt>Regular-IR route:</dt><dd>An EVPN Inclusive Multicast Ethernet Tag r | |||
oute | ||||
<t>Regular-IR route: an EVPN Inclusive Multicast Ethernet Tag route | <xref target="RFC7432" format="default"/> that uses the ingress replic | |||
<xref target="RFC7432"/> that uses Ingress Replication Tunnel | ation tunnel | |||
Type.</t> | type.</dd> | |||
<dt>Replicator-AR route:</dt><dd>An EVPN Inclusive Multicast Ethernet Ta | ||||
<t>RNVE: Regular NVE, refers to an NVE that supports the procedures | g | |||
of <xref target="RFC8365"/> and does not support the procedures in | ||||
this document. However, this document defines procedures to | ||||
interoperate with RNVEs.</t> | ||||
<t>Replicator-AR route: an EVPN Inclusive Multicast Ethernet Tag | ||||
route that is advertised by an AR-REPLICATOR to signal its | route that is advertised by an AR-REPLICATOR to signal its | |||
capabilities, as described in <xref target="sect-4"/>.</t> | capabilities, as described in <xref target="sect-4" format="default"/> | |||
.</dd> | ||||
<t>TOR: Top Of Rack switch.</t> | <dt>RNVE:</dt><dd>Regular NVE. Refers to an NVE that supports the proced | |||
ures | ||||
<t>TS and VM: Tenant System and Virtual Machine. In this document | provided in <xref target="RFC8365" format="default"/> and does not sup | |||
Tenant Systems and Virtual Machiness are the devices connected to | port the procedures provided in | |||
the Attachment Circuits of the PEs and NVEs.</t> | this document. However, this document defines procedures to | |||
interoperate with RNVEs.</dd> | ||||
<t>VNI: VXLAN Network Identifier, used in VXLAN tunnels.</t> | <dt>ToR switch:</dt><dd>Top-of-Rack switch.</dd> | |||
<dt>TS and VM:</dt><dd>Tenant System and Virtual Machine. In this docume | ||||
<t>VSID: Virtual Segment Identifier, used in NVGRE tunnels.</t> | nt, | |||
TSs and VMs are the devices connected to | ||||
<t>VXLAN: Virtual Extensible LAN <xref target="RFC7348"/>.</t> | the ACs of the PEs and NVEs.</dd> | |||
</list></t> | <dt>VNI:</dt><dd>VXLAN Network Identifier. Used in VXLAN tunnels.</dd> | |||
<dt>VSID:</dt><dd>Virtual Segment Identifier. Used in NVGRE tunnels.</dd | ||||
> | ||||
<dt>VXLAN:</dt><dd>Virtual eXtensible Local Area Network <xref target="R | ||||
FC7348" format="default"/>.</dd> | ||||
</dl> | ||||
</section> | </section> | |||
<section anchor="sect-3" title="Solution Requirements"> | <section anchor="sect-3" numbered="true" toc="default"> | |||
<t>The Ingress Replication optimization solution specified in this | <name>Solution Requirements</name> | |||
<t>The ingress replication optimization solution specified in this | ||||
document meets the following requirements:</t> | document meets the following requirements:</t> | |||
<ol spacing="normal" type="a"><li>The solution provides an ingress replica | ||||
<t><list style="letters"> | tion optimization for BM | |||
<t>It provides an Ingress Replication optimization for Broadcast and | traffic without the need for PIM while preserving the | |||
Multicast traffic without the need for PIM, while preserving the | ||||
packet order for unicast applications, i.e., unknown unicast traffic | packet order for unicast applications, i.e., unknown unicast traffic | |||
should follow the same path as known unicast traffic. This | should follow the same path as known unicast traffic. This | |||
optimization is required in low-performance NVEs.</t> | optimization is required in low-performance NVEs.</li> | |||
<li>The solution reduces the flooded traffic in NVO | ||||
<t>It reduces the flooded traffic in Network Virtualization Overlay | ||||
networks where some NVEs do not need broadcast/multicast and/or | networks where some NVEs do not need broadcast/multicast and/or | |||
unknown unicast traffic.</t> | unknown unicast traffic.</li> | |||
<li> | ||||
<t>The solution is compatible with <xref target="RFC7432"/> and | <t>The solution is compatible with <xref target="RFC7432" format="defa | |||
<xref target="RFC8365"/> and has no impact on the CE procedures for | ult"/> and | |||
<xref target="RFC8365" format="default"/> and has no impact on the Cus | ||||
tomer Edge (CE) procedures for | ||||
BM traffic. In particular, the solution supports the following EVPN | BM traffic. In particular, the solution supports the following EVPN | |||
functions: <list style="symbols"> | functions:</t> | |||
<t>All-active multi-homing, including the split-horizon and | <ul spacing="normal"> | |||
Designated Forwarder (DF) functions.</t> | <li>All-active multihoming, including the split-horizon and | |||
DF functions.</li> | ||||
<t>Single-active multi-homing, including the DF function.</t> | <li>Single-active multihoming, including the DF function.</li> | |||
<li>Handling of multi-destination traffic and processing of | ||||
<t>Handling of multi-destination traffic and processing of | BM traffic as per <xref target="RFC7432" format="default"/>.</li> | |||
broadcast and multicast as per <xref target="RFC7432"/>.</t> | </ul> | |||
</list></t> | </li> | |||
<li>The solution is backward compatible with existing NVEs using a | ||||
<t>The solution is backwards compatible with existing NVEs using a | non-optimized version of ingress replication. A given BD can have | |||
non-optimized version of Ingress Replication. A given BD can have | NVEs/PEs supporting regular ingress replication and optimized | |||
NVEs/PEs supporting regular Ingress Replication and optimized | ingress replication.</li> | |||
Ingress Replication.</t> | <li>The solution is independent of the NVO-specific data plane encapsula | |||
tion and the virtual identifiers being | ||||
<t>The solution is independent of the Network Virtualization Overlay | used, e.g., VXLAN VNIs, NVGRE VSIDs, or MPLS labels, as long as the | |||
specific data plane encapsulation and the virtual identifiers being | tunnel is IP based.</li> | |||
used, e.g.: VXLAN VNIs, NVGRE VSIDs or MPLS labels, as long as the | </ol> | |||
tunnel is IP-based.</t> | ||||
</list></t> | ||||
</section> | </section> | |||
<section anchor="sect-4" numbered="true" toc="default"> | ||||
<section anchor="sect-4" | <name>EVPN BGP Attributes for Optimized Ingress Replication</name> | |||
title="EVPN BGP Attributes for Optimized Ingress Replication"> | <t>The ingress replication optimization solution specified in this documen | |||
<t>This solution extends the <xref target="RFC7432"/> Inclusive | t | |||
Multicast Ethernet Tag routes and attributes so that an NVE/PE can | extends the Inclusive | |||
signal its optimized Ingress Replication capabilities.</t> | Multicast Ethernet Tag routes and attributes described in <xref target="RF | |||
C7432" format="default"/> so that an NVE/PE can | ||||
<t>The NLRI of the Inclusive Multicast Ethernet Tag route as in <xref | signal its optimized ingress replication capabilities.</t> | |||
target="RFC7432"/> is shown in <xref target="imet-route"/> and it is | <t>The Network Layer Reachability Information (NLRI) of the Inclusive Mult | |||
icast Ethernet Tag route <xref target="RFC7432" format="default"/> is shown in < | ||||
xref target="imet-route" format="default"/> and is | ||||
used in this document without any modifications to its format. The PMSI | used in this document without any modifications to its format. The PMSI | |||
Tunnel Attribute's general format as in <xref target="RFC7432"/> (which | Tunnel Attribute's general format as provided in <xref target="RFC7432" fo | |||
takes it from <xref target="RFC6514"/>) is used in this document, only a | rmat="default"/> (which | |||
new Tunnel Type and new flags are specified, as shown in <xref | takes it from <xref target="RFC6514" format="default"/>) is used in this d | |||
target="pta"/>:</t> | ocument; only a | |||
new tunnel type and new flags are specified, as shown in <xref target="pta | ||||
" format="default"/>.</t> | ||||
<figure anchor="imet-route"> | ||||
<name>EVPN Inclusive Multicast Ethernet Tag Route's NLRI</name> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ + | ||||
------------------------------------+ | ||||
| RD (8 octets) | | ||||
+------------------------------------+ | ||||
| Ethernet Tag ID (4 octets) | | ||||
+------------------------------------+ | ||||
| IP Address Length (1 octet) | | ||||
+------------------------------------+ | ||||
| Originating Router's IP Address | | ||||
| (4 or 16 octets) | | ||||
+------------------------------------+ | ||||
]]></artwork> | ||||
</figure> | ||||
<t><figure anchor="imet-route" | <figure anchor="pta"> | |||
title="EVPN Inclusive Multicast Tag route's NLRI"> | <name>PMSI Tunnel Attribute</name> | |||
<artwork><![CDATA[ +---------------------------------+ | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
| RD (8 octets) | | 0 1 2 3 4 5 6 7 | |||
+---------------------------------+ | ||||
| Ethernet Tag ID (4 octets) | | ||||
+---------------------------------+ | ||||
| IP Address Length (1 octet) | | ||||
+---------------------------------+ | ||||
| Originating Router's IP Addr | | ||||
| (4 or 16 octets) | | ||||
+---------------------------------+]]></artwork> | ||||
</figure><figure anchor="pta" title="PMSI Tunnel Attribute"> | ||||
<artwork><![CDATA[ 0 1 2 3 | ||||
4 5 6 7 | ||||
+---------------------------------+ +--+--+--+--+--+--+--+--+ | +---------------------------------+ +--+--+--+--+--+--+--+--+ | |||
| Flags (1 octet) | -> |x |E |x | T |BM|U |L | | | Flags (1 octet) | -> |x |E |x | T |BM|U |L | | |||
+---------------------------------+ +--+--+--+--+--+--+--+--+ | +---------------------------------+ +--+--+--+--+--+--+--+--+ | |||
| Tunnel Type (1 octets) | T = Assisted-Replication Type | | Tunnel Type (1 octet) | T = Assisted Replication Type | |||
+---------------------------------+ BM = Broadcast and Multicast | +---------------------------------+ BM = Broadcast and Multicast | |||
| MPLS Label (3 octets) | U = Unknown unicast | | MPLS Label (3 octets) | U = Unknown (unknown unicast) | |||
+---------------------------------+ x = unassigned | +---------------------------------+ x = unassigned | |||
| Tunnel Identifier (variable) | | | Tunnel Identifier (variable) | | |||
+---------------------------------+]]></artwork> | +---------------------------------+ | |||
</figure>The Flags field in <xref target="pta"/> is 8 bits long as per | ]]></artwork> | |||
<xref target="RFC7902"/>, where the Extension flag (E) and the Leaf | </figure> | |||
Information Required (L) Flag are already allocated. This document | <t>The Flags field in <xref target="pta" format="default"/> is 8 bits long | |||
defines the use of 4 bits of this Flags field, and suggests the | as per | |||
following allocation to IANA:</t> | <xref target="RFC7902" format="default"/>. The Extension (E) flag was allo | |||
cated by <xref target="RFC7902" format="default"/>, and the Leaf | ||||
<t><list style="symbols"> | Information Required (L) flag was allocated by <xref target="RFC6514" form | |||
<t>bits 3 and 4, forming together the Assisted-Replication Type (T) | at="default"/>. This document defines the use of 4 bits of this Flags field: | |||
field</t> | </t> | |||
<t>bit 5, called the Broadcast and Multicast (BM) flag</t> | ||||
<t>bit 6, called the Unknown (U) flag</t> | ||||
</list>Bits 5 and 6 are collectively referred to as the Pruned-Flood | ||||
Lists (PFL) flags.</t> | ||||
<t>The T field and Pruned-Flood-Lists flags are defined as follows:</t> | ||||
<t><list style="symbols"> | ||||
<t>T is the Assisted-Replication Type field (2 bits) that defines | ||||
the AR role of the advertising router:<list style="symbols"> | ||||
<t>00 (decimal 0) = RNVE (non-AR support)</t> | ||||
<t>01 (decimal 1) = AR-REPLICATOR</t> | ||||
<t>10 (decimal 2) = AR-LEAF</t> | ||||
<t>11 (decimal 3) = RESERVED</t> | ||||
</list></t> | ||||
<t>The Pruned-Flood-Lists flags define the desired behavior of the | ||||
advertising router for the different types of traffic:<list | ||||
style="symbols"> | ||||
<t>Broadcast and Multicast (BM) flag. BM=1 means "prune-me" from | ||||
the BM flooding list. BM=0 means regular behavior.</t> | ||||
<t>Unknown (U) flag. U=1 means "prune-me" from the Unknown | ||||
flooding list. U=0 means regular behavior.</t> | ||||
</list></t> | ||||
<t>Flag L is an existing flag defined in <xref target="RFC6514"/> | <ul spacing="normal"> | |||
(L=Leaf Information Required, bit 7) and it will be used only in the | <li>Bits 3 and 4, which together form the Assisted Replication Type (T) | |||
Selective AR Solution.</t> | field</li> | |||
</list></t> | <li>Bit 5, called the Broadcast and Multicast (BM) flag</li> | |||
<li>Bit 6, called the Unknown (U) flag</li> | ||||
</ul> | ||||
<t>Bits 5 and 6 are collectively referred to as the Pruned Flooding Lists | ||||
(PFLs) flags.</t> | ||||
<t>The T field and PFLs flags are defined as follows:</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t>T is the Assisted Replication Type field (2 bits), which defines | ||||
the AR role of the advertising router:</t> | ||||
<ul spacing="normal"> | ||||
<li>00 (decimal 0) = RNVE (non-AR support)</li> | ||||
<li>01 (decimal 1) = AR-REPLICATOR</li> | ||||
<li>10 (decimal 2) = AR-LEAF</li> | ||||
<li>11 (decimal 3) = RESERVED</li> | ||||
</ul> | ||||
</li> | ||||
<li> | ||||
<t>The PFLs flags define the desired behavior of the | ||||
advertising router for the different types of traffic:</t> | ||||
<ul spacing="normal"> | ||||
<li>Broadcast and Multicast (BM) flag. BM = 1 means "prune me from | ||||
the BM flooding list". BM = 0 indicates regular behavior.</li> | ||||
<li>Unknown (U) flag. U = 1 means "prune me from the Unknown | ||||
flooding list". U = 0 indicates regular behavior.</li> | ||||
</ul> | ||||
</li> | ||||
<t>Please refer to <xref target="sect-11"/> for the IANA considerations | <li>The L flag (bit 7) is defined in <xref target="RFC6514" format="defa | |||
ult"/> | ||||
and will be used only in the | ||||
selective AR solution.</li> | ||||
</ul> | ||||
<t>Please refer to <xref target="sect-11" format="default"/> for the IANA | ||||
considerations | ||||
related to the PMSI Tunnel Attribute flags.</t> | related to the PMSI Tunnel Attribute flags.</t> | |||
<t>In this document, the above Inclusive Multicast Ethernet Tag route | <t>In this document, the above Inclusive Multicast Ethernet Tag route | |||
<xref target="imet-route"/> and PMSI Tunnel Attribute <xref | (<xref target="imet-route" format="default"/>) and PMSI Tunnel Attribute ( | |||
target="pta"/> can be used in two different modes for the same BD:</t> | <xref target="pta" format="default"/>) can be used in two different modes for th | |||
e same BD:</t> | ||||
<t><list style="symbols"> | <dl> | |||
<t>Regular-IR route: in this route, Originating Router's IP Address, | <dt>Regular-IR route:</dt><dd>In this route, Originating Router's IP Add | |||
Tunnel Type (0x06), MPLS Label and Tunnel Identifier MUST be used as | ress, | |||
described in <xref target="RFC7432"/> when Ingress Replication is in | Tunnel Type (0x06), MPLS Label, and Tunnel Identifier <bcp14>MUST</bcp | |||
use. The NVE/PE that advertises the route will set the Next-Hop to | 14> be used as | |||
described in <xref target="RFC7432" format="default"/> when ingress re | ||||
plication is in | ||||
use. The NVE/PE that advertises the route will set the Next Hop to | ||||
an IP address that we denominate IR-IP in this document. When | an IP address that we denominate IR-IP in this document. When | |||
advertised by an AR-LEAF node, the Regular-IR route MUST be | advertised by an AR-LEAF node, the Regular-IR route <bcp14>MUST</bcp14 | |||
advertised with type T set to 10 (AR-LEAF).</t> | > be | |||
advertised with the T field set to 10 (AR-LEAF).</dd> | ||||
<dt> | ||||
Replicator-AR route:</dt><dd><t>This route is used by the AR-REPLICATO | ||||
R to | ||||
advertise its AR capabilities, with the fields set as follows:</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t>Replicator-AR route: this route is used by the AR-REPLICATOR to | <t>Originating Router's IP Address <bcp14>MUST</bcp14> be set to a | |||
advertise its AR capabilities, with the fields set as follows:<list | n IP address | |||
style="symbols"> | ||||
<t>Originating Router's IP Address MUST be set to an IP address | ||||
of the advertising router that is common to all the EVIs on the | of the advertising router that is common to all the EVIs on the | |||
PE (usually this is a loopback address of the PE). <list | PE (usually this is a loopback address of the PE). </t> | |||
style="symbols"> | <ul spacing="normal"> | |||
<t>The Tunnel Identifier and Next-Hop SHOULD be set to the | <li>The Tunnel Identifier and Next Hop fields <bcp14>SHOULD</bcp | |||
same IP address as the Originating Router's IP address when | 14> be set to the | |||
the NVE/PE originates the route, that is, when the NVE/PE is | same IP address as the Originating Router's IP Address field w | |||
not an ASBR as in section 10.2 of <xref target="RFC8365"/>. | hen | |||
the NVE/PE originates the route -- that is, when the NVE/PE is | ||||
not an ASBR; see <xref target="RFC8365" section="10.2" section | ||||
Format="of"/>. | ||||
Irrespective of the values in the Tunnel Identifier and | Irrespective of the values in the Tunnel Identifier and | |||
Originating Router's IP Address fields, the ingress NVE/PE | Originating Router's IP Address fields, the ingress NVE/PE | |||
will process the received Replicator-AR route and will use | will process the received Replicator-AR route and will use | |||
the IP Address in the Next-Hop field to create IP tunnels to | the IP address setting in the Next Hop field to create IP tunn | |||
the AR-REPLICATOR.</t> | els to | |||
the AR-REPLICATOR.</li> | ||||
<t>The Next-Hop address is referred to as the AR-IP and MUST | <li>The Next Hop address is referred to as the AR-IP and <bcp14> | |||
MUST</bcp14> | ||||
be different from the IR-IP for a given PE/NVE, unless the | be different from the IR-IP for a given PE/NVE, unless the | |||
procedures in <xref target="sect-8"/> are followed.</t> | procedures provided in <xref target="sect-8" format="default"/ | |||
</list></t> | > are followed.</li> | |||
</ul> | ||||
<t>Tunnel Type MUST be set to Assisted-Replication Tunnel. <xref | </li> | |||
target="sect-11"/> provides the allocated type value.</t> | <li>Tunnel Type <bcp14>MUST</bcp14> be set to Assisted Replication T | |||
unnel. <xref target="sect-11" format="default"/> provides the allocated type val | ||||
<t>T (AR role type) MUST be set to 01 (AR-REPLICATOR).</t> | ue.</li> | |||
<li>T (Assisted Replication type) <bcp14>MUST</bcp14> be set to 01 ( | ||||
<t>L (Leaf Information Required) MUST be set to 0 (for | AR-REPLICATOR).</li> | |||
non-selective AR), and MUST be set to 1 (for selective AR).</t> | <li>L (Leaf Information Required) <bcp14>MUST</bcp14> be set to 0 fo | |||
</list></t> | r | |||
</list></t> | non-selective AR and <bcp14>MUST</bcp14> be set to 1 for selective | |||
AR.</li> | ||||
</ul> | ||||
</dd> | ||||
</dl> | ||||
<t>An NVE/PE configured as AR-REPLICATOR for a BD MUST advertise a | <t>An NVE/PE configured as an AR-REPLICATOR for a BD <bcp14>MUST</bcp14> a | |||
Replicator-AR route for the BD and MAY advertise a Regular-IR route. The | dvertise a | |||
advertisement of the Replicator-AR route will indicate the AR-LEAFs what | Replicator-AR route for the BD and <bcp14>MAY</bcp14> advertise a Regular- | |||
outer IP DA, i.e., the AR-IP, they need to use for IP encapsulated BM | IR route. The | |||
advertisement of the Replicator-AR route will indicate to the AR-LEAFs whi | ||||
ch | ||||
outer IP DA, i.e., which AR-IP, they need to use for IP-encapsulated BM | ||||
frames that use Assisted Replication forwarding mode. The AR-REPLICATOR | frames that use Assisted Replication forwarding mode. The AR-REPLICATOR | |||
will forward an IP encapsulated BM frame in Assisted Replication | will forward an IP-encapsulated BM frame in Assisted Replication | |||
forwarding mode if the outer IP DA matches its AR-IP, but will forward | forwarding mode if the outer IP DA matches its AR-IP but will forward | |||
in Ingress Replication forwarding mode if the outer IP DA matches its | in Ingress Replication forwarding mode if the outer IP DA matches its | |||
IR-IP.</t> | IR-IP.</t> | |||
<t>In addition, this document also uses the Leaf Auto-Discovery (Leaf | <t>In addition, this document also uses the Leaf Auto-Discovery (Leaf | |||
A-D) route defined in <xref | A-D) route defined in <xref target="RFC9572" format="default"/> in cases w | |||
target="I-D.ietf-bess-evpn-bum-procedure-updates"/> in case the | here the | |||
selective AR mode is used. An AR-LEAF MAY send a Leaf A-D route in | selective AR mode is used. An AR-LEAF <bcp14>MAY</bcp14> send a Leaf A-D r | |||
oute in | ||||
response to reception of a Replicator-AR route whose L flag is set. The | response to reception of a Replicator-AR route whose L flag is set. The | |||
Leaf Auto-Discovery route is only used for selective AR and the fields | Leaf A-D route is only used for selective AR, and the fields | |||
of such route are set as follows:</t> | of such a route are set as follows:</t> | |||
<ul spacing="normal"> | ||||
<t><list hangIndent="2" style="empty"> | <li>Originating Router's IP Address is set to the advertising | |||
<t><list style="symbols"> | router's IP address (the same IP address used by the AR-LEAF in Re | |||
<t>Originating Router's IP Address is set to the advertising | gular-IR | |||
router's IP address (same IP used by the AR-LEAF in regular-IR | routes). The Next Hop address is set to the IR-IP, which <bcp14>SH | |||
routes). The Next-Hop address is set to the IR-IP, which SHOULD | OULD</bcp14> | |||
be the same IP address as the advertising router's IP address, | be the same IP address as the advertising router's IP address, | |||
when the NVE/PE originates the route, i.e., when the NVE/PE is | when the NVE/PE originates the route, i.e., when the NVE/PE is | |||
not an ASBR as in section 10.2 of <xref target="RFC8365"/>.</t> | not an ASBR; see <xref target="RFC8365" sectionFormat="of" section | |||
="10.2" format="default"/>.</li> | ||||
<t>Route Key is the "Route Type Specific" NLRI of the | <li>Route Key <xref target="RFC9572" format="default"/> is the "Rout | |||
Replicator-AR route for which this Leaf Auto-Discovery route is | e Type Specific" NLRI of the | |||
generated.</t> | Replicator-AR route for which this Leaf A-D route is | |||
generated.</li> | ||||
<t>The AR-LEAF constructs an IP-address-specific route-target, | <li>The AR-LEAF constructs an IP-address-specific Route Target, | |||
analogously to <xref | analogously to <xref target="RFC9572" format="default"/>, by placi | |||
target="I-D.ietf-bess-evpn-bum-procedure-updates"/>, by placing | ng | |||
the IP address carried in the Next-Hop field of the received | the IP address carried in the Next Hop field of the received | |||
Replicator-AR route in the Global Administrator field of the | Replicator-AR route in the Global Administrator field of the | |||
Community, with the Local Administrator field of this Community | extended community, with the Local Administrator field of this ext ended community | |||
set to 0, and setting the Extended Communities attribute of the | set to 0, and setting the Extended Communities attribute of the | |||
Leaf Auto-Discovery route to that Community. The same | Leaf A-D route to that extended community. The same | |||
IP-address-specific import route-target is auto-configured by | IP-address-specific import Route Target is auto-configured by | |||
the AR-REPLICATOR that sent the Replicator-AR route, in order to | the AR-REPLICATOR that sent the Replicator-AR route, in order to | |||
control the acceptance of the Leaf Auto-Discovery routes.</t> | control the acceptance of the Leaf A-D routes.</li> | |||
<li>The Leaf A-D route <bcp14>MUST</bcp14> include the PMSI Tunnel | ||||
<t>The Leaf Auto-Discovery route MUST include the PMSI Tunnel | Attribute with Tunnel Type set to Assisted Replication Tunnel (<xr | |||
attribute with the Tunnel Type set to AR (<xref | ef target="sect-11" format="default"/>), T (Assisted Replication type) set to AR | |||
target="sect-11"/>), T (AR role type) set to AR-LEAF and the | -LEAF, and | |||
Tunnel Identifier set to the IP address of the advertising | Tunnel Identifier set to the IP address of the advertising | |||
AR-LEAF. The PMSI Tunnel attribute MUST carry a | AR-LEAF. The PMSI Tunnel Attribute <bcp14>MUST</bcp14> carry a | |||
downstream-assigned MPLS label or VNI that is used by the | downstream-assigned MPLS label or VNI that is used by the | |||
AR-REPLICATOR to send traffic to the AR-LEAF.</t> | AR-REPLICATOR to send traffic to the AR-LEAF.</li> | |||
</list></t> | </ul> | |||
</list></t> | ||||
<t>Each AR-enabled node understands and process the T | <t>Each AR-enabled node understands and processes the T | |||
(Assisted-Replication type) field in the PMSI Tunnel Attribute (Flags | (Assisted Replication type) field in the PMSI Tunnel Attribute (Flags | |||
field) of the routes, and MUST signal the corresponding type | field) of the routes and <bcp14>MUST</bcp14> signal the corresponding type | |||
(AR-REPLICATOR or AR-LEAF type) according to its administrative choice. | (AR-REPLICATOR or AR-LEAF type) according to its administrative choice. | |||
An NVE/PE following this specification is not expected to set the | An NVE/PE following this specification is not expected to set the | |||
Assisted-Replication Type field to decimal 3 (which is a RESERVED | Assisted Replication Type field to decimal 3 (which is a RESERVED | |||
value). If a route with the AR type field set to decimal 3 is received | value). If a route with the Assisted Replication Type field set to decimal | |||
3 is received | ||||
by an AR-REPLICATOR or AR-LEAF, the router will process the route as a | by an AR-REPLICATOR or AR-LEAF, the router will process the route as a | |||
Regular-IR route advertised by an RNVE.</t> | Regular-IR route advertised by an RNVE.</t> | |||
<t>Each node attached to the BD may understand and process the BM/U | <t>Each node attached to the BD may understand and process the BM/U | |||
flags (Pruned-Flood-Lists flags). Note that these BM/U flags may be used | flags (PFLs flags). Note that these BM/U flags may be used | |||
to optimize the delivery of multi-destination traffic and their use | to optimize the delivery of multi-destination traffic; their use | |||
SHOULD be an administrative choice, and independent of the AR role. When | <bcp14>SHOULD</bcp14> be an administrative choice and independent of the A | |||
the Pruned-Flood-List capability is enabled, the BM/U flags can be used | R role. When | |||
with the Regular-IR, Replicator-AR and Leaf Auto-Discovery routes.</t> | the PFL capability is enabled, the BM/U flags can be used | |||
with the Regular-IR, Replicator-AR, and Leaf A-D routes.</t> | ||||
<t>Non-optimized Ingress Replication NVEs/PEs will be unaware of the new | <t>Non-optimized ingress replication NVEs/PEs will be unaware of the new | |||
PMSI Tunnel Attribute flag definition as well as the new Tunnel Type | PMSI Tunnel Attribute flag definition as well as the new tunnel type | |||
(AR), i.e., non-upgraded NVEs/PEs will ignore the information contained | (AR), i.e., non-upgraded NVEs/PEs will ignore the information contained | |||
in the flags field or an unknown Tunnel Type (type AR in this case) for | in the Flags field or an unknown tunnel type (type AR in this case) for | |||
any Inclusive Multicast Ethernet Tag route.</t> | any Inclusive Multicast Ethernet Tag route.</t> | |||
</section> | </section> | |||
<section anchor="sect-5" numbered="true" toc="default"> | ||||
<section anchor="sect-5" | <name>Non-selective Assisted Replication (AR) Solution Description</name> | |||
title="Non-Selective Assisted-Replication (AR) Solution Description | <t><xref target="ure-optimized-ir-scenario" format="default"/> illustrates | |||
"> | an example | |||
<t><xref target="ure-optimized-ir-scenario"/> illustrates an example | NVO network where the non-selective AR | |||
Network Virtualization Overlay network where the non-selective AR | ||||
function is enabled. Three different roles are defined for a given BD: | function is enabled. Three different roles are defined for a given BD: | |||
AR-REPLICATOR, AR-LEAF and RNVE (Regular NVE). The solution is called | AR-REPLICATOR, AR-LEAF, and RNVE. The solution is called | |||
"non-selective" because the chosen AR-REPLICATOR for a given flow MUST | "non-selective" because the chosen AR-REPLICATOR for a given flow <bcp14>M | |||
replicate the BM traffic to all the NVE/PEs in the BD except for the | UST</bcp14> | |||
source NVE/PE. Network Virtualization Overlay tunnels, i.e., IP tunnels, | replicate the BM traffic to all the NVEs/PEs in the BD except for the | |||
source NVE/PE. NVO tunnels, i.e., IP tunnels, | ||||
exist among all the PEs and NVEs in the diagram. The PEs and NVEs in the | exist among all the PEs and NVEs in the diagram. The PEs and NVEs in the | |||
diagram have Tenant Systems or Virtual Machines connected to their | diagram have TSs or VMs connected to their | |||
Attachment Circuits.</t> | ACs.</t> | |||
<figure anchor="ure-optimized-ir-scenario"> | ||||
<figure anchor="ure-optimized-ir-scenario" | <name>Non-selective AR Scenario</name> | |||
title="Non-Selective AR scenario"> | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
<artwork><![CDATA[ | ||||
( ) | ( ) | |||
(_ WAN _) | (_ WAN _) | |||
+---(_ _)----+ | +---(_ _)----+ | |||
| (_ _) | | | (_ _) | | |||
PE1 | PE2 | | PE1 | PE2 | | |||
+------+----+ +----+------+ | +------+----+ +----+------+ | |||
TS1--+ (BD-1) | | (BD-1) +--TS2 | TS1--+ (BD-1) | | (BD-1) +--TS2 | |||
|REPLICATOR | |REPLICATOR | | |REPLICATOR | |REPLICATOR | | |||
+--------+--+ +--+--------+ | +--------+--+ +--+--------+ | |||
| | | | | | |||
+--+----------------+--+ | +--+----------------+--+ | |||
| | | | | | |||
| | | | | | |||
+----+ VXLAN/nvGRE/MPLSoGRE +----+ | +----+ VXLAN/NVGRE/MPLSoGRE +----+ | |||
| | IP Fabric | | | | | IP Fabric | | | |||
| | | | | | | | | | |||
NVE1 | +-----------+----------+ | NVE3 | NVE1 | +-----------+----------+ | NVE3 | |||
Hypervisor| TOR | NVE2 |Hypervisor | Hypervisor| ToR | NVE2 |Hypervisor | |||
+---------+-+ +-----+-----+ +-+---------+ | +---------+-+ +-----+-----+ +-+---------+ | |||
| (BD-1) | | (BD-1) | | (BD-1) | | | (BD-1) | | (BD-1) | | (BD-1) | | |||
| LEAF | | RNVE | | LEAF | | | LEAF | | RNVE | | LEAF | | |||
+--+-----+--+ +--+-----+--+ +--+-----+--+ | +--+-----+--+ +--+-----+--+ +--+-----+--+ | |||
| | | | | | | | | | | | | | |||
VM11 VM12 TS3 TS4 VM31 VM32 | VM11 VM12 TS3 TS4 VM31 VM32 | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t>In AR BDs, such as BD-1 in <xref target="ure-optimized-ir-scenario"/>, | ||||
<t>In AR BDs such as BD-1 in the example, BM (Broadcast and Multicast) | BM | |||
traffic between two NVEs may follow a different path than unicast | traffic between two NVEs may follow a different path than unicast | |||
traffic. This solution recommends the replication of BM through the | traffic. This solution recommends the replication of BM traffic through th | |||
AR-REPLICATOR node, whereas unknown/known unicast will be delivered | e | |||
AR-REPLICATOR node, whereas unknown/known unicast traffic will be delivere | ||||
d | ||||
directly from the source node to the destination node without being | directly from the source node to the destination node without being | |||
replicated by any intermediate node.</t> | replicated by any intermediate node.</t> | |||
<t>Note that known unicast forwarding is not impacted by this solution, | <t>Note that known unicast forwarding is not impacted by this solution, | |||
i.e., unknown unicast SHALL follow the same path as known unicast | i.e., unknown unicast traffic <bcp14>SHALL</bcp14> follow the same path as known unicast | |||
traffic.</t> | traffic.</t> | |||
<section anchor="sect-5.1" numbered="true" toc="default"> | ||||
<section anchor="sect-5.1" | <name>Non-selective AR-REPLICATOR Procedures</name> | |||
title="Non-selective AR-REPLICATOR Procedures"> | ||||
<t>An AR-REPLICATOR is defined as an NVE/PE capable of replicating | <t>An AR-REPLICATOR is defined as an NVE/PE capable of replicating | |||
incoming BM traffic received on an overlay tunnel to other overlay | incoming BM traffic received on an overlay tunnel to other overlay | |||
tunnels and local Attachment Circuits. The AR-REPLICATOR signals its | tunnels and local ACs. The AR-REPLICATOR signals its | |||
role in the control plane and understands where the other roles | role in the control plane and understands where the other roles | |||
(AR-LEAF nodes, RNVEs and other AR-REPLICATORs) are located. A given | (AR-LEAF nodes, RNVEs, and other AR-REPLICATORs) are located. A given | |||
AR-enabled BD service may have zero, one or more AR-REPLICATORs. In | AR-enabled BD service may have zero, one, or more AR-REPLICATORs. In | |||
our example in <xref target="ure-optimized-ir-scenario"/>, PE1 and PE2 | our example in <xref target="ure-optimized-ir-scenario" format="default" | |||
/>, PE1 and PE2 | ||||
are defined as AR-REPLICATORs. The following considerations apply to | are defined as AR-REPLICATORs. The following considerations apply to | |||
the AR-REPLICATOR role:</t> | the AR-REPLICATOR role:</t> | |||
<ol spacing="normal" type="a"><li>The AR-REPLICATOR role <bcp14>SHOULD</ | ||||
<t><list hangIndent="3" style="letters"> | bcp14> be an administrative | |||
<t hangText="">The AR-REPLICATOR role SHOULD be an administrative | ||||
choice in any NVE/PE that is part of an AR-enabled BD. This | choice in any NVE/PE that is part of an AR-enabled BD. This | |||
administrative option to enable AR-REPLICATOR capabilities MAY be | administrative option to enable AR-REPLICATOR capabilities <bcp14>MA | |||
implemented as a system level option as opposed to as a per-BD | Y</bcp14> be | |||
option.</t> | implemented as a system-level option as opposed to a per-BD | |||
option.</li> | ||||
<t hangText="">An AR-REPLICATOR MUST advertise a Replicator-AR | <li>An AR-REPLICATOR <bcp14>MUST</bcp14> advertise a Replicator-AR | |||
route and MAY advertise a Regular-IR route. The AR-REPLICATOR MUST | route and <bcp14>MAY</bcp14> advertise a Regular-IR route. The AR-RE | |||
NOT generate a Regular-IR route if it does not have local | PLICATOR <bcp14>MUST | |||
attachment circuits (AC). If the Regular-IR route is advertised, | NOT</bcp14> generate a Regular-IR route if it does not have local | |||
the Assisted-Replication Type field of the Regular-IR route MUST | ACs. If the Regular-IR route is advertised, | |||
be set to zero.</t> | the Assisted Replication Type field of the Regular-IR route <bcp14>M | |||
UST</bcp14> | ||||
<t hangText="">The Replicator-AR and Regular-IR routes are | be set to 0.</li> | |||
generated according to <xref target="sect-4"/>. The AR-IP and | <li>The Replicator-AR and Regular-IR routes are | |||
IR-IP are different IP addresses owned by the AR-REPLICATOR.</t> | generated according to <xref target="sect-4" format="default"/>. The | |||
AR-IP and | ||||
<t hangText="">When a node defined as AR-REPLICATOR receives a BM | IR-IP are different IP addresses owned by the AR-REPLICATOR.</li> | |||
<li> | ||||
<t>When a node defined as an AR-REPLICATOR receives a BM | ||||
packet on an overlay tunnel, it will do a tunnel destination IP | packet on an overlay tunnel, it will do a tunnel destination IP | |||
address lookup and apply the following procedures: <list | address lookup and apply the following procedures: </t> | |||
style="symbols"> | <ul spacing="normal"> | |||
<t>If the destination IP address is the AR-REPLICATOR IR-IP | <li>If the destination IP address is the AR-REPLICATOR IR-IP | |||
Address the node will process the packet normally as in <xref | address, the node will process the packet normally as discussed | |||
target="RFC7432"/>.</t> | in <xref target="RFC7432" format="default"/>.</li> | |||
<li>If the destination IP address is the AR-REPLICATOR AR-IP | ||||
<t>If the destination IP address is the AR-REPLICATOR AR-IP | address, the node <bcp14>MUST</bcp14> replicate the packet to lo | |||
Address the node MUST replicate the packet to local Attachment | cal ACs | |||
Circuits and overlay tunnels (excluding the overlay tunnel to | and overlay tunnels (excluding the overlay tunnel to | |||
the source of the packet). When replicating to remote | the source of the packet). When replicating to remote | |||
AR-REPLICATORs the tunnel destination IP address will be an | AR-REPLICATORs, the tunnel destination IP address will be an | |||
IR-IP. That will be an indication for the remote AR-REPLICATOR | IR-IP. This will indicate to the remote AR-REPLICATOR | |||
that it MUST NOT replicate to overlay tunnels. The tunnel | that it <bcp14>MUST NOT</bcp14> replicate to overlay tunnels. Th | |||
source IP address used by the AR-REPLICATOR MUST be its IR-IP | e tunnel | |||
when replicating to AR-REPLICATOR or AR-LEAF nodes.</t> | source IP address used by the AR-REPLICATOR <bcp14>MUST</bcp14> | |||
</list></t> | be its IR-IP | |||
</list>An AR-REPLICATOR MUST follow a data path implementation | when replicating to AR-REPLICATOR or AR-LEAF nodes.</li> | |||
compatible with the following rules:</t> | </ul> | |||
</li> | ||||
<t><list style="symbols"> | </ol> | |||
<t>The AR-REPLICATORs will build a flooding list composed of | ||||
Attachment Circuits and overlay tunnels to remote nodes in the BD. | ||||
Some of those overlay tunnels MAY be flagged as non-BM receivers | ||||
based on the BM flag received from the remote nodes in the BD.</t> | ||||
<t>When an AR-REPLICATOR receives a BM packet on an Attachment | ||||
Circuit, it will forward the BM packet to its flooding list | ||||
(including local Attachment Circuits and remote NVE/PEs), skipping | ||||
the non-BM overlay tunnels.</t> | ||||
<t>An AR-REPLICATOR <bcp14>MUST</bcp14> follow a data path implementatio | ||||
n | ||||
compatible with the following rules:</t> | ||||
<ul spacing="normal"> | ||||
<li>The AR-REPLICATORs will build a flooding list composed of | ||||
ACs and overlay tunnels to remote nodes in the BD. | ||||
Some of those overlay tunnels <bcp14>MAY</bcp14> be flagged as non-B | ||||
M receivers | ||||
based on the BM flag received from the remote nodes in the BD.</li> | ||||
<li>When an AR-REPLICATOR receives a BM packet on an AC, | ||||
it will forward the BM packet to its flooding list | ||||
(including local ACs and remote NVEs/PEs), skipping | ||||
the non-BM overlay tunnels.</li> | ||||
<li> | ||||
<t>When an AR-REPLICATOR receives a BM packet on an overlay | <t>When an AR-REPLICATOR receives a BM packet on an overlay | |||
tunnel, it will check the destination IP address of the underlay | tunnel, it will check the destination IP address of the underlay | |||
IP header and: <list style="symbols"> | IP header and:</t> | |||
<t>If the destination IP address matches its IR-IP, the | <ul spacing="normal"> | |||
<li>If the destination IP address matches its IR-IP, the | ||||
AR-REPLICATOR will skip all the overlay tunnels from the | AR-REPLICATOR will skip all the overlay tunnels from the | |||
flooding list, i.e. it will only replicate to local Attachment | flooding list, i.e., it will only replicate to local ACs. | |||
Circuits. This is the regular Ingress Replication behavior | This is the regular ingress replication behavior | |||
described in <xref target="RFC7432"/>.</t> | described in <xref target="RFC7432" format="default"/>.</li> | |||
<li>If the destination IP address matches its AR-IP, the | ||||
<t>If the destination IP address matches its AR-IP, the | AR-REPLICATOR <bcp14>MUST</bcp14> forward the BM packet to its f | |||
AR-REPLICATOR MUST forward the BM packet to its flooding list | looding list | |||
(ACs and overlay tunnels) excluding the non-BM overlay | (ACs and overlay tunnels), excluding the non-BM overlay | |||
tunnels. The AR-REPLICATOR will ensure the traffic is not sent | tunnels. The AR-REPLICATOR will ensure that the traffic is not s | |||
back to the originating AR-LEAF.</t> | ent | |||
back to the originating AR-LEAF.</li> | ||||
<t>If the encapsulation is MPLSoGRE or MPLSoUDP and the | <li>If the encapsulation is MPLSoGRE or MPLSoUDP and the | |||
received BD label that the AR-REPLICATOR advertised in the | received BD label that the AR-REPLICATOR advertised in the | |||
Replicator-AR route is not the bottom of the stack, the | Replicator-AR route is not at the bottom of the stack, the | |||
AR-REPLICATOR MUST copy the all the labels below the BD label | AR-REPLICATOR <bcp14>MUST</bcp14> copy all the labels below the | |||
BD label | ||||
and propagate them when forwarding the packet to the egress | and propagate them when forwarding the packet to the egress | |||
overlay tunnels.</t> | overlay tunnels.</li> | |||
</list></t> | </ul> | |||
</li> | ||||
<t>The AR-REPLICATOR/LEAF nodes will build an Unknown unicast | <li> | |||
flood-list composed of Attachment Circuits and overlay tunnels to | <t>The AR-REPLICATOR/LEAF nodes will build an unknown unicast | |||
the IR-IP Addresses of the remote nodes in the BD. Some of those | flooding list composed of ACs and overlay tunnels to | |||
overlay tunnels MAY be flagged as non-U (Unknown unicast) | the IR-IP addresses of the remote nodes in the BD. Some of those | |||
overlay tunnels <bcp14>MAY</bcp14> be flagged as non-U (unknown unic | ||||
ast) | ||||
receivers based on the U flag received from the remote nodes in | receivers based on the U flag received from the remote nodes in | |||
the BD.<list style="symbols"> | the BD.</t> | |||
<t>When an AR-REPLICATOR/LEAF receives an unknown unicast | <ul spacing="normal"> | |||
packet on an Attachment Circuit, it will forward the unknown | <li>When an AR-REPLICATOR/LEAF receives an unknown unicast | |||
unicast packet to its flood-list, skipping the non-U overlay | packet on an AC, it will forward the unknown | |||
tunnels.</t> | unicast packet to its flooding list, skipping the non-U overlay | |||
tunnels.</li> | ||||
<t>When an AR-REPLICATOR/LEAF receives an unknown unicast | <li>When an AR-REPLICATOR/LEAF receives an unknown unicast | |||
packet on an overlay tunnel, it will forward the unknown | packet on an overlay tunnel, it will forward the unknown | |||
unicast packet to its local Attachment Circuits and never to | unicast packet to its local ACs and never to | |||
an overlay tunnel. This is the regular Ingress Replication | an overlay tunnel. This is the regular ingress replication | |||
behavior described in <xref target="RFC7432"/>.</t> | behavior described in <xref target="RFC7432" format="default"/>. | |||
</list></t> | </li> | |||
</list></t> | </ul> | |||
</li> | ||||
</ul> | ||||
</section> | </section> | |||
<section anchor="sect-5.2" numbered="true" toc="default"> | ||||
<section anchor="sect-5.2" title="Non-Selective AR-LEAF Procedures"> | <name>Non-selective AR-LEAF Procedures</name> | |||
<t>AR-LEAF is defined as an NVE/PE that - given its poor replication | <t>An AR-LEAF is defined as an NVE/PE that, given its poor replication | |||
performance - sends all the BM traffic to an AR-REPLICATOR that can | performance, sends all the BM traffic to an AR-REPLICATOR that can | |||
replicate the traffic further on its behalf. It MAY signal its AR-LEAF | replicate the traffic further on its behalf. It <bcp14>MAY</bcp14> signa | |||
l its AR-LEAF | ||||
capability in the control plane and understands where the other roles | capability in the control plane and understands where the other roles | |||
are located (AR-REPLICATOR and RNVEs). A given service can have zero, | are located (AR-REPLICATORs and RNVEs). A given service can have zero, | |||
one or more AR-LEAF nodes. <xref target="ure-optimized-ir-scenario"/> | one, or more AR-LEAF nodes. In <xref target="ure-optimized-ir-scenario" | |||
shows NVE1 and NVE3 (both residing in hypervisors) acting as AR-LEAF. | format="default"/>, | |||
NVE1 and NVE3 (both residing in hypervisors) act as AR-LEAF nodes. | ||||
The following considerations apply to the AR-LEAF role:</t> | The following considerations apply to the AR-LEAF role:</t> | |||
<ol spacing="normal" type="a"><li>The AR-LEAF role <bcp14>SHOULD</bcp14> | ||||
<t><list hangIndent="3" style="letters"> | be an administrative choice | |||
<t hangText="">The AR-LEAF role SHOULD be an administrative choice | ||||
in any NVE/PE that is part of an AR-enabled BD. This | in any NVE/PE that is part of an AR-enabled BD. This | |||
administrative option to enable AR-LEAF capabilities MAY be | administrative option to enable AR-LEAF capabilities <bcp14>MAY</bcp | |||
implemented as a system level option as opposed to as per-BD | 14> be | |||
option.</t> | implemented as a system-level option as opposed to a per-BD | |||
option.</li> | ||||
<t hangText="">In this non-selective AR solution, the AR-LEAF MUST | <li>In this non-selective AR solution, the AR-LEAF <bcp14>MUST</bcp14> | |||
advertise a single Regular-IR inclusive multicast route as in | advertise a single Regular-IR Inclusive Multicast Ethernet Tag route | |||
<xref target="RFC7432"/>. The AR-LEAF SHOULD set the | as described in | |||
Assisted-Replication Type field to AR-LEAF. Note that although | <xref target="RFC7432" format="default"/>. The AR-LEAF <bcp14>SHOULD | |||
this field does not make any difference for the remote nodes when | </bcp14> set the | |||
creating an EVPN destination to the AR-LEAF, this field is useful | Assisted Replication Type field to AR-LEAF. Note that although this | |||
for an easy operation and troubleshooting of the BD.</t> | field does not affect the remote nodes when creating an EVPN destina | |||
tion | ||||
<t hangText="">In a BD where there are no AR-REPLICATORs due to | to the AR-LEAF, this field is useful from the standpoint of ease of | |||
the AR-REPLICATORs being down or reconfigured, the AR-LEAF MUST | operation and troubleshooting of the BD.</li> | |||
use regular Ingress Replication, based on the remote Regular-IR | <li> | |||
Inclusive Multicast Routes as described in <xref | <t>In a BD where there are no AR-REPLICATORs due to | |||
target="RFC7432"/>. This may happen in the following cases: <list | the AR-REPLICATORs being down or reconfigured, the AR-LEAF <bcp14>MU | |||
style="symbols"> | ST</bcp14> | |||
<t>The AR-LEAF has a list of AR-REPLICATORs for the BD, but it | use regular ingress replication based on the remote Regular-IR | |||
Inclusive Multicast Ethernet Tag routes as described in <xref target | ||||
="RFC7432" format="default"/>. This may happen in the following cases: </t> | ||||
<ul spacing="normal"> | ||||
<li>The AR-LEAF has a list of AR-REPLICATORs for the BD, but it | ||||
detects that all the AR-REPLICATORs for the BD are down (via | detects that all the AR-REPLICATORs for the BD are down (via | |||
next-hop tracking in the IGP or any other detection | next-hop tracking in the IGP or some other detection | |||
mechanism).</t> | mechanism).</li> | |||
<li>The AR-LEAF receives updates from all the former | ||||
<t>The AR-LEAF receives updates from all the former | ||||
AR-REPLICATORs containing a non-REPLICATOR AR type in the | AR-REPLICATORs containing a non-REPLICATOR AR type in the | |||
Inclusive Multicast Etherner Tag routes.</t> | Inclusive Multicast Ethernet Tag routes.</li> | |||
<li>The AR-LEAF never discovered an AR-REPLICATOR for the | ||||
<t>The AR-LEAF never discovered an AR-REPLICATOR for the | BD.</li> | |||
BD.</t> | </ul> | |||
</list></t> | </li> | |||
<li> | ||||
<t hangText="">In a service where there is one or more | <t>In a service where there are one or more | |||
AR-REPLICATORs (based on the received Replicator-AR routes for the | AR-REPLICATORs (based on the received Replicator-AR routes for the | |||
BD), the AR-LEAF can locally select which AR-REPLICATOR it sends | BD), the AR-LEAF can locally select which AR-REPLICATOR it sends | |||
the BM traffic to: <list style="symbols"> | the BM traffic to:</t> | |||
<t>A single AR-REPLICATOR MAY be selected for all the BM | <ul spacing="normal"> | |||
packets received on the AR-LEAF attachment circuits (ACs) for | <li>A single AR-REPLICATOR <bcp14>MAY</bcp14> be selected for all | |||
a given BD. This selection is a local decision and it does not | the BM | |||
packets received on the AR-LEAF ACs for | ||||
a given BD. This selection is a local decision and does not | ||||
have to match other AR-LEAFs' selections within the same | have to match other AR-LEAFs' selections within the same | |||
BD.</t> | BD.</li> | |||
<li>An AR-LEAF <bcp14>MAY</bcp14> select more than one AR-REPLICAT | ||||
<t>An AR-LEAF MAY select more than one AR-REPLICATOR and do | OR and do | |||
either per-flow or per-BD load balancing.</t> | either per-flow or per-BD load balancing.</li> | |||
<li>In the case of failure of the selected AR-REPLICATOR, another | ||||
<t>In case of a failure of the selected AR-REPLICATOR, another | AR-REPLICATOR <bcp14>SHOULD</bcp14> be selected by the AR-LEAF.< | |||
AR-REPLICATOR SHOULD be selected by the AR-LEAF.</t> | /li> | |||
<li>When an AR-REPLICATOR is selected for a given flow or BD, | ||||
<t>When an AR-REPLICATOR is selected for a given flow or BD, | the AR-LEAF <bcp14>MUST</bcp14> send all the BM packets targeted | |||
the AR-LEAF MUST send all the BM packets targeted to that | to that | |||
AR-REPLICATOR using the forwarding information given by the | AR-REPLICATOR using the forwarding information given by the | |||
Replicator-AR route for the chosen AR-REPLICATOR, with tunnel | Replicator-AR route for the chosen AR-REPLICATOR, with Tunnel | |||
type = 0x0A (AR tunnel). The underlay destination IP address | Type = 0x0A (AR tunnel). The underlay destination IP address | |||
MUST be the AR-IP advertised by the AR-REPLICATOR in the | <bcp14>MUST</bcp14> be the AR-IP advertised by the AR-REPLICATOR | |||
Replicator-AR route.</t> | in the | |||
Replicator-AR route.</li> | ||||
<t>An AR-LEAF MAY change the AR-REPLICATOR(s) selection | <li>An AR-LEAF <bcp14>MAY</bcp14> change the selection of AR-REPLI | |||
dynamically, due to an administrative or policy configuration | CATOR(s) | |||
change.</t> | dynamically due to an administrative or policy configuration | |||
change.</li> | ||||
<t>AR-LEAF nodes SHALL send service-level BM control plane | <li>AR-LEAF nodes <bcp14>SHALL</bcp14> send service-level BM contr | |||
packets following regular Ingress Replication procedures. An | ol plane | |||
example would be IGMP, MLD or PIM multicast packets, and in | packets, following the procedures for regular ingress replicatio | |||
general any packets using link-local scope multicast IPv4 or | n. An | |||
IPv6 packets. The AR-REPLICATORs MUST NOT replicate these | example would be IGMP, Multicast Listener Discovery (MLD), or PI | |||
control plane packets to other overlay tunnels since they will | M | |||
use the regular IR-IP Address.</t> | packets, and, in | |||
</list></t> | general, any packets using link-local scope multicast IPv4 or | |||
IPv6 packets. The AR-REPLICATORs <bcp14>MUST NOT</bcp14> replica | ||||
<t hangText="">The use of an AR-REPLICATOR-activation-timer (in | te these | |||
seconds, default value is 3) on the AR-LEAF nodes is RECOMMENDED. | control plane packets to other overlay tunnels, since they will | |||
use the IR-IP address.</li> | ||||
</ul> | ||||
</li> | ||||
<li>The use of an AR-REPLICATOR-activation-timer (in | ||||
seconds, with a default value of 3) on the AR-LEAF nodes is <bcp14>R | ||||
ECOMMENDED</bcp14>. | ||||
Upon receiving a new Replicator-AR route where the AR-REPLICATOR | Upon receiving a new Replicator-AR route where the AR-REPLICATOR | |||
is selected, the AR-LEAF will run a timer before programming the | is selected, the AR-LEAF will run a timer before programming the | |||
new AR-REPLICATOR. In case of a new added AR-REPLICATOR, or in | new AR-REPLICATOR. In the case of a newly added AR-REPLICATOR or if | |||
case the AR-REPLICATOR reboots, this timer will give the | an AR-REPLICATOR reboots, this timer will give the | |||
AR-REPLICATOR some time to program the AR-LEAF nodes before the | AR-REPLICATOR some time to program the AR-LEAF nodes before the | |||
AR-LEAF sends BM traffic. The AR-REPLICATOR-activation-timer | AR-LEAF sends BM traffic. The AR-REPLICATOR-activation-timer | |||
SHOULD be configurable in seconds, and its value account for the | <bcp14>SHOULD</bcp14> be configurable in seconds, and its value need | |||
time it takes for the AR-LEAF Regular-IR inclusive multicast route | s to account for the | |||
time it takes for the AR-LEAF Regular-IR Inclusive Multicast Etherne | ||||
t Tag route | ||||
to get to the AR-REPLICATOR and be programmed. While the | to get to the AR-REPLICATOR and be programmed. While the | |||
AR-REPLICATOR-activation-time is running, the AR-LEAF node will | AR-REPLICATOR-activation-timer is running, the AR-LEAF node will | |||
use regular ingress replication.</t> | use regular ingress replication.</li> | |||
<li>If the AR-LEAF has selected an AR-REPLICATOR, whether | ||||
<t>If the AR-LEAF has selected an AR-REPLICATOR, it is a matter of | or not to change to a new preferred AR-REPLICATOR for the existing B | |||
local policy to change to a new preferred AR-REPLICATOR for the | M traffic flows is a matter of local policy.</li> | |||
existing BM traffic flows.</t> | </ol> | |||
</list>An AR-LEAF MUST follow a data path implementation compatible | <t>An AR-LEAF <bcp14>MUST</bcp14> follow a data path implementation comp | |||
atible | ||||
with the following rules:</t> | with the following rules:</t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
<t>The AR-LEAF nodes will build two flooding lists:</t> | ||||
<dl> | ||||
<t><list style="symbols"> | <dt>Flooding list #1: | |||
<t>The AR-LEAF nodes will build two flood-lists:<list | </dt> | |||
style="numbers"> | <dd>Composed of ACs and an AR-REPLICATOR-set of | |||
<t>Flood-list #1 - composed of Attachment Circuits and an | overlay tunnels. The AR-REPLICATOR-set is defined as one or more | |||
AR-REPLICATOR-set of overlay tunnels. The AR-REPLICATOR-set is | overlay tunnels to the AR-IP addresses of the remote | |||
defined as one or more overlay tunnels to the AR-IP Addresses | AR-REPLICATOR(s) in the BD. The selection of more than one | |||
of the remote AR-REPLICATOR(s) in the BD. The selection of | AR-REPLICATOR is described in item d. above and is a local | |||
more than one AR-REPLICATOR is described in point d) above and | AR-LEAF decision. | |||
it is a local AR-LEAF decision.</t> | </dd> | |||
<t>Flood-list #2 - composed of Attachment Circuits and overlay | ||||
tunnels to the remote IR-IP Addresses.</t> | ||||
</list></t> | ||||
<t>When an AR-LEAF receives a BM packet on an Attachment Circuit, | ||||
it will check the AR-REPLICATOR-set:<list style="symbols"> | ||||
<t>If the AR-REPLICATOR-set is empty, the AR-LEAF MUST send | ||||
the packet to flood-list #2.</t> | ||||
<t>If the AR-REPLICATOR-set is NOT empty, the AR-LEAF MUST | <dt>Flooding list #2: | |||
send the packet to flood-list #1, where only one of the | </dt> | |||
overlay tunnels of the AR-REPLICATOR-set is used.</t> | <dd>Composed of ACs and overlay tunnels to the | |||
</list></t> | remote IR-IP addresses. | |||
</dd> | ||||
<t>When an AR-LEAF receives a BM packet on an overlay tunnel, it | </dl> | |||
will forward the BM packet to its local Attachment Circuits and | ||||
never to an overlay tunnel. This is the regular Ingress | ||||
Replication behavior described in <xref target="RFC7432"/>.</t> | ||||
<t>AR-LEAF nodes process Unknown unicast traffic in the same way | </li> | |||
AR-REPLICATORS do, as described in <xref target="sect-5.1"/>.</t> | <li> | |||
</list></t> | <t>When an AR-LEAF receives a BM packet on an AC, | |||
it will check the AR-REPLICATOR-set:</t> | ||||
<ul spacing="normal"> | ||||
<li>If the AR-REPLICATOR-set is empty, the AR-LEAF <bcp14>MUST</bc | ||||
p14> send | ||||
the packet to flooding list #2.</li> | ||||
<li>If the AR-REPLICATOR-set is NOT empty, the AR-LEAF <bcp14>MUST | ||||
</bcp14> | ||||
send the packet to flooding list #1, where only one of the | ||||
overlay tunnels of the AR-REPLICATOR-set is used.</li> | ||||
</ul> | ||||
</li> | ||||
<li>When an AR-LEAF receives a BM packet on an overlay tunnel, it | ||||
will forward the BM packet to its local ACs and | ||||
never to an overlay tunnel. This is the regular ingress | ||||
replication behavior described in <xref target="RFC7432" format="def | ||||
ault"/>.</li> | ||||
<li>AR-LEAF nodes process unknown unicast traffic in the same way | ||||
AR-REPLICATORS do, as described in <xref target="sect-5.1" format="d | ||||
efault"/>.</li> | ||||
</ul> | ||||
</section> | </section> | |||
<section anchor="sect-5.3" title="RNVE Procedures"> | <section anchor="sect-5.3" numbered="true" toc="default"> | |||
<t>RNVE (Regular Network Virtualization Edge node) is defined as an | <name>RNVE Procedures</name> | |||
NVE/PE without AR-REPLICATOR or AR-LEAF capabilities that does Ingress | <t>An RNVE is defined as an | |||
Replication as described in <xref target="RFC7432"/>. The RNVE does | NVE/PE without AR-REPLICATOR or AR-LEAF capabilities that does ingress | |||
replication as described in <xref target="RFC7432" format="default"/>. T | ||||
he RNVE does | ||||
not signal any AR role and is unaware of the AR-REPLICATOR/LEAF roles | not signal any AR role and is unaware of the AR-REPLICATOR/LEAF roles | |||
in the BD. The RNVE will ignore the Flags in the Regular-IR routes and | in the BD. The RNVE will ignore the flags in the Regular-IR routes and | |||
will ignore the Replicator-AR routes (due to an unknown tunnel type in | will ignore the Replicator-AR routes (due to an unknown tunnel type in | |||
the PMSI Tunnel Attribute) and the Leaf Auto-Discovery routes (due to | the PMSI Tunnel Attribute) and the Leaf A-D routes (due to | |||
the IP-address-specific route-target).</t> | the IP-address-specific Route Target).</t> | |||
<t>This role provides EVPNs with the backward compatibility required | ||||
<t>This role provides EVPN with the backwards compatibility required | in optimized ingress replication BDs. In <xref target="ure-optimized-ir- | |||
in optimized Ingress Replication BDs. <xref | scenario" format="default"/>, NVE2 acts as an RNVE.</t> | |||
target="ure-optimized-ir-scenario"/> shows NVE2 as RNVE.</t> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sect-6" numbered="true" toc="default"> | ||||
<section anchor="sect-6" | <name>Selective Assisted Replication (AR) Solution Description</name> | |||
title="Selective Assisted-Replication (AR) Solution Description"> | <t><xref target="selective-ar" format="default"/> is used to describe the | |||
<t><xref target="selective-ar"/> is used to describe the selective AR | selective AR | |||
solution.</t> | solution.</t> | |||
<figure anchor="selective-ar"> | ||||
<figure anchor="selective-ar" title="Selective AR scenario"> | <name>Selective AR Scenario</name> | |||
<artwork><![CDATA[ | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
( ) | ( ) | |||
(_ WAN _) | (_ WAN _) | |||
+---(_ _)----+ | +---(_ _)----+ | |||
| (_ _) | | | (_ _) | | |||
PE1 | PE2 | | PE1 | PE2 | | |||
+------+----+ +----+------+ | +------+----+ +----+------+ | |||
TS1--+ (BD-1) | | (BD-1) +--TS2 | TS1--+ (BD-1) | | (BD-1) +--TS2 | |||
|REPLICATOR | |REPLICATOR | | |REPLICATOR | |REPLICATOR | | |||
+--------+--+ +--+--------+ | +--------+--+ +--+--------+ | |||
| | | | | | |||
+--+----------------+--+ | +--+----------------+--+ | |||
| | | | | | |||
| | | | | | |||
+----+ VXLAN/nvGRE/MPLSoGRE +----+ | +----+ VXLAN/NVGRE/MPLSoGRE +----+ | |||
| | IP Fabric | | | | | IP Fabric | | | |||
| | | | | | | | | | |||
NVE1 | +-----------+----------+ | NVE3 | NVE1 | +-----------+----------+ | NVE3 | |||
Hypervisor| TOR | NVE2 |Hypervisor | Hypervisor| ToR | NVE2 |Hypervisor | |||
+---------+-+ +-----+-----+ +-+---------+ | +---------+-+ +-----+-----+ +-+---------+ | |||
| (BD-1) | | (BD-1) | | (BD-1) | | | (BD-1) | | (BD-1) | | (BD-1) | | |||
| LEAF-set1 | |LEAF-set-1 | |LEAF-set-2 | | |LEAF-set-1 | |LEAF-set-1 | |LEAF-set-2 | | |||
+--+-----+--+ +--+-----+--+ +--+-----+--+ | +--+-----+--+ +--+-----+--+ +--+-----+--+ | |||
| | | | | | | | | | | | | | |||
VM11 VM12 TS3 TS4 VM31 VM32 | VM11 VM12 TS3 TS4 VM31 VM32 | |||
]]></artwork> | ]]></artwork> | |||
</figure> | </figure> | |||
<t>The solution is called "selective" because a given AR-REPLICATOR MUST | <t>The solution is called "selective" because a given AR-REPLICATOR <bcp14 >MUST</bcp14> | |||
replicate the BM traffic to only the AR-LEAFs that requested the | replicate the BM traffic to only the AR-LEAFs that requested the | |||
replication (as opposed to all the AR-LEAF nodes) and MUST replicate the | replication (as opposed to all the AR-LEAF nodes) and <bcp14>MUST</bcp14> | |||
BM traffic to the RNVEs (if there are any). The same AR roles defined in | replicate the | |||
<xref target="sect-4"/> are used here, however the procedures are | BM traffic to the RNVEs (if there are any). The same AR roles as those def | |||
ined in | ||||
Sections <xref target="sect-4" format="counter"/> and <xref target="s | ||||
ect-5" format="counter"/> are used here; however, the procedures are | ||||
different.</t> | different.</t> | |||
<t>The selective AR procedures create multiple AR-LEAF-sets in the EVPN | ||||
<t>The Selective AR procedures create multiple AR-LEAF-sets in the EVPN | BD and build single-hop trees among AR-LEAFs of the same set | |||
BD, and build single-hop trees among AR-LEAFs of the same set | (AR-LEAF->AR-REPLICATOR->AR-LEAF) and two-hop trees among | |||
(AR-LEAF->AR-REPLICATOR->AR-LEAF), and two-hop trees among | ||||
AR-LEAFs of different sets | AR-LEAFs of different sets | |||
(AR-LEAF->AR-REPLICATOR->AR-REPLICATOR->AR-LEAF). Compared to | (AR-LEAF->AR-REPLICATOR->AR-REPLICATOR->AR-LEAF). Compared to | |||
the Selective solution, the Non-Selective AR method assumes that all the | the selective solution, the non-selective AR method assumes that all the | |||
AR-LEAFs of the BD are in the same set and always creates two-hop trees | AR-LEAFs of the BD are in the same set and always creates single-hop trees | |||
among AR-LEAFs. While the Selective solution is more efficient than the | among AR-LEAFs. While the selective solution is more efficient than the | |||
Non-Selective solution in multi-stage IP fabrics, the trade-off is | non-selective solution in multi-stage IP fabrics, the trade-off is | |||
additional signaling and an additional outer source IP address | additional signaling and an additional outer source IP address | |||
lookup.</t> | lookup.</t> | |||
<t>The following subsections describe the differences in the procedures | ||||
<t>The following sub-sections describe the differences in the procedures | for AR-REPLICATORs/LEAFs compared to the non-selective AR solution. There | |||
of AR-REPLICATOR/LEAFs compared to the non-selective AR solution. There | are no changes applicable to RNVEs.</t> | |||
is no change on the RNVEs.</t> | <section anchor="sect-6.1" numbered="true" toc="default"> | |||
<name>Selective AR-REPLICATOR Procedures</name> | ||||
<section anchor="sect-6.1" title="Selective AR-REPLICATOR Procedures"> | <t>In our example in <xref target="selective-ar" format="default"/>, PE1 | |||
<t>In our example in <xref target="selective-ar"/>, PE1 and PE2 are | and PE2 are | |||
defined as Selective AR-REPLICATORs. The following considerations | defined as selective AR-REPLICATORs. The following considerations | |||
apply to the Selective AR-REPLICATOR role:</t> | apply to the selective AR-REPLICATOR role:</t> | |||
<ol spacing="normal" type="a"><li>The selective AR-REPLICATOR role <bcp1 | ||||
<t><list style="letters"> | 4>SHOULD</bcp14> be an | |||
<t>The Selective AR-REPLICATOR capability SHOULD be an | ||||
administrative choice in any NVE/PE that is part of an | administrative choice in any NVE/PE that is part of an | |||
Assisted-Replication-enabled BD, as the AR role itself. This | AR-enabled BD. This | |||
administrative option MAY be implemented as a system level option | administrative option <bcp14>MAY</bcp14> be implemented as a system- | |||
as opposed to as a per-BD option.</t> | level option | |||
as opposed to a per-BD option.</li> | ||||
<t>Each AR-REPLICATOR will build a list of AR-REPLICATOR, AR-LEAF | <li>Each AR-REPLICATOR will build a list of AR-REPLICATOR, AR-LEAF, | |||
and RNVE nodes. In spite of the 'Selective' administrative option, | and RNVE nodes. In spite of the "selective" administrative option, | |||
an AR-REPLICATOR MUST NOT behave as a Selective AR-REPLICATOR if | an AR-REPLICATOR <bcp14>MUST NOT</bcp14> behave as a selective AR-RE | |||
PLICATOR if | ||||
at least one of the AR-REPLICATORs has the L flag NOT set. If at | at least one of the AR-REPLICATORs has the L flag NOT set. If at | |||
least one AR-REPLICATOR sends a Replicator-AR route with L=0 (in | least one AR-REPLICATOR sends a Replicator-AR route with L = 0 (in | |||
the BD context), the rest of the AR-REPLICATORs will fall back to | the BD context), the rest of the AR-REPLICATORs will fall back to | |||
non-selective AR mode.</t> | non-selective AR mode.</li> | |||
<li> | ||||
<t>The Selective AR-REPLICATOR MUST follow the procedures | <t>The selective AR-REPLICATOR <bcp14>MUST</bcp14> follow the proced | |||
described in <xref target="sect-5.1"/>, except for the following | ures | |||
differences:<list style="symbols"> | described in <xref target="sect-5.1" format="default"/>, except for | |||
<t>The Replicator-AR route MUST include L=1 (Leaf Information | the following | |||
Required) in the Replicator-AR route. This flag is used by the | differences:</t> | |||
AR-REPLICATORs to advertise their 'selective' AR-REPLICATOR | <ul spacing="normal"> | |||
<li>The AR-REPLICATOR <bcp14>MUST</bcp14> have the L flag set to 1 | ||||
when advertising the Replicator-AR route. This flag is used by t | ||||
he | ||||
AR-REPLICATORs to advertise their "selective" AR-REPLICATOR | ||||
capabilities. In addition, the AR-REPLICATOR auto-configures | capabilities. In addition, the AR-REPLICATOR auto-configures | |||
its IP-address-specific import route-target as described in | its IP-address-specific import Route Target as described in | |||
the third bullet of the procedures for Leaf Auto-Discovery | the third bullet of the procedures for Leaf A-D | |||
route in <xref target="sect-4"/>.</t> | routes in <xref target="sect-4" format="default"/>.</li> | |||
<li>The AR-REPLICATOR will build a "selective" AR-LEAF-set with | ||||
<t>The AR-REPLICATOR will build a 'selective' AR-LEAF-set with | ||||
the list of nodes that requested replication to its own AR-IP. | the list of nodes that requested replication to its own AR-IP. | |||
For instance, assuming NVE1 and NVE2 advertise a Leaf | For instance, assuming that NVE1 and NVE2 advertise a Leaf | |||
Auto-Discovery route with PE1's IP-address-specific | A-D route with PE1's IP-address-specific | |||
route-target and NVE3 advertises a Leaf Auto-Discovery route | Route Target and NVE3 advertises a Leaf A-D route | |||
with PE2's IP-address-specific route-target, PE1 will only add | with PE2's IP-address-specific Route Target, PE1 will only add | |||
NVE1/NVE2 to its selective AR-LEAF-set for BD-1, and exclude | NVE1/NVE2 to its selective AR-LEAF-set for BD-1 and exclude | |||
NVE3. Likewise, PE2 will only add NVE3 to its selective | NVE3. Likewise, PE2 will only add NVE3 to its selective | |||
AR-LEAF-set for BD-1, and exclude NVE1/NVE2.</t> | AR-LEAF-set for BD-1 and exclude NVE1/NVE2.</li> | |||
<li> | ||||
<t>When a node defined and operating as a Selective | <t>When a node defined and operating as a selective | |||
AR-REPLICATOR receives a packet on an overlay tunnel, it will | AR-REPLICATOR receives a packet on an overlay tunnel, it will | |||
do a tunnel destination IP lookup and if the destination IP | do a tunnel destination IP lookup, and if the destination IP | |||
address is the AR-REPLICATOR AR-IP Address, the node MUST | address is the AR-REPLICATOR AR-IP address, the node <bcp14>MUST | |||
replicate the packet to:<list style="symbols"> | </bcp14> | |||
<t>local Attachment Circuits</t> | replicate the packet to:</t> | |||
<ul spacing="normal"> | ||||
<t>overlay tunnels in the Selective AR-LEAF-set, excluding | <li>Local ACs.</li> | |||
the overlay tunnel to the source AR-LEAF.</t> | <li>Overlay tunnels in the selective AR-LEAF-set, excluding | |||
the overlay tunnel to the source AR-LEAF.</li> | ||||
<t>overlay tunnels to the RNVEs if the tunnel source IP | <li>Overlay tunnels to the RNVEs if the tunnel source IP | |||
address is the IR-IP of an AR-LEAF. In any other case, the | address is the IR-IP of an AR-LEAF. In any other case, the | |||
AR-REPLICATOR MUST NOT replicate the BM traffic to remote | AR-REPLICATOR <bcp14>MUST NOT</bcp14> replicate the BM traff ic to remote | |||
RNVEs. In other words, only the first-hop selective | RNVEs. In other words, only the first-hop selective | |||
AR-REPLICATOR will replicate to all the RNVEs.</t> | AR-REPLICATOR will replicate to all the RNVEs.</li> | |||
<li>Overlay tunnels to the remote selective AR-REPLICATORs | ||||
<t>overlay tunnels to the remote Selective AR-REPLICATORs | ||||
if the tunnel source IP address (of the encapsulated | if the tunnel source IP address (of the encapsulated | |||
packet that arrived on the overlay tunnel) is an IR-IP of | packet that arrived on the overlay tunnel) is an IR-IP of | |||
its own AR-LEAF-set. In any other case, the AR-REPLICATOR | its own AR-LEAF-set. In any other case, the AR-REPLICATOR | |||
MUST NOT replicate the BM traffic to remote | <bcp14>MUST NOT</bcp14> replicate the BM traffic to remote | |||
AR-REPLICATORs. When doing this replication, the tunnel | AR-REPLICATORs. When doing this replication, the tunnel | |||
destination IP address is the AR-IP of the remote | destination IP address is the AR-IP of the remote | |||
Selective AR-REPLICATOR. The tunnel destination IP AR-IP | selective AR-REPLICATOR. The tunnel destination address AR-I | |||
will be an indication for the remote Selective | P | |||
will indicate to the remote selective | ||||
AR-REPLICATOR that the packet needs further replication to | AR-REPLICATOR that the packet needs further replication to | |||
its AR-LEAFs.</t> | its AR-LEAFs.</li> | |||
</list></t> | </ul> | |||
</list></t> | </li> | |||
</list>A Selective AR-REPLICATOR data path implementation MUST be | </ul> | |||
</li> | ||||
</ol> | ||||
<t>A selective AR-REPLICATOR data path implementation <bcp14>MUST</bcp14 | ||||
> be | ||||
compatible with the following rules:</t> | compatible with the following rules:</t> | |||
<ul spacing="normal"> | ||||
<li> | ||||
<t>The selective AR-REPLICATORs will build two flooding lists:</t> | ||||
<t><list style="symbols"> | <dl> | |||
<t>The Selective AR-REPLICATORs will build two flood-lists:<list | <dt>Flooding list #1: | |||
style="numbers"> | </dt> | |||
<t>Flood-list #1 - composed of Attachment Circuits and overlay | <dd><t>Composed of ACs and overlay tunnels to the | |||
tunnels to the remote nodes in the BD, always using the IR-IPs | remote nodes in the BD, always using the IR-IPs in the tunnel | |||
in the tunnel destination IP addresses.</t> | destination IP addresses.</t> | |||
</dd> | ||||
<t>Flood-list #2 - composed of Attachment Circuits, a | ||||
Selective AR-LEAF-set and a Selective AR-REPLICATOR-set, | ||||
where:<list style="symbols"> | ||||
<t>The Selective AR-LEAF-set is composed of the overlay | ||||
tunnels to the AR-LEAFs that advertise a Leaf | ||||
Auto-Discovery route for the local AR-REPLICATOR. This set | ||||
is updated with every Leaf Auto-Discovery route | ||||
received/withdrawn from a new AR-LEAF.</t> | ||||
<t>The Selective AR-REPLICATOR-set is composed of the | <dt>Flooding list #2: | |||
overlay tunnels to all the AR-REPLICATORs that send a | </dt> | |||
Replicator-AR route with L=1. The AR-IP addresses are used | <dd><t>Composed of ACs, a selective AR-LEAF-set, and | |||
as tunnel destination IP.</t> | a selective AR-REPLICATOR-set, where:</t> | |||
</list></t> | ||||
</list></t> | ||||
<t>Some of the overlay tunnels in the flood-lists MAY be flagged | <ul spacing="normal"> | |||
as non-BM receivers based on the BM flag received from the remote | <li>The selective AR-LEAF-set is composed of the overlay | |||
nodes in the routes.</t> | tunnels to the AR-LEAFs that advertise a Leaf A-D | |||
route for the local AR-REPLICATOR. This set is updated with | ||||
every Leaf A-D route received/withdrawn from a | ||||
new AR-LEAF.</li> | ||||
<li>The selective AR-REPLICATOR-set is composed of the | ||||
overlay tunnels to all the AR-REPLICATORs that send a | ||||
Replicator-AR route with L = 1. The AR-IP addresses are used | ||||
as tunnel destination IP addresses.</li> | ||||
</ul> | ||||
<t>When a Selective AR-REPLICATOR receives a BM packet on an | </dd> | |||
Attachment Circuit, it MUST forward the BM packet to its | </dl> | |||
flood-list #1, skipping the non-BM overlay tunnels.</t> | ||||
<t>When a Selective AR-REPLICATOR receives a BM packet on an | </li> | |||
<li>Some of the overlay tunnels in the flooding lists <bcp14>MAY</bcp1 | ||||
4> be flagged | ||||
as non-BM receivers based on the BM flag received from the remote | ||||
nodes in the routes.</li> | ||||
<li>When a selective AR-REPLICATOR receives a BM packet on an | ||||
AC, it <bcp14>MUST</bcp14> forward the BM packet to its | ||||
flooding list #1, skipping the non-BM overlay tunnels.</li> | ||||
<li> | ||||
<t>When a selective AR-REPLICATOR receives a BM packet on an | ||||
overlay tunnel, it will check the destination and source IPs of | overlay tunnel, it will check the destination and source IPs of | |||
the underlay IP header and:<list style="symbols"> | the underlay IP header and:</t> | |||
<t>If the destination IP address matches its AR-IP and the | <ul spacing="normal"> | |||
source IP address matches an IP of its own Selective | <li>If the destination IP address matches its AR-IP and the | |||
AR-LEAF-set, the AR-REPLICATOR MUST forward the BM packet to | source IP address matches an IP of its own selective | |||
its flood-list #2, unless some AR-REPLICATOR within the BD has | AR-LEAF-set, the AR-REPLICATOR <bcp14>MUST</bcp14> forward the B | |||
advertised L=0. In the latter case, the node reverts back to | M packet to | |||
non-selective mode and flood-list #1 MUST be used. Non-BM | its flooding list #2, unless some AR-REPLICATOR within the BD ha | |||
overlay tunnels are skipped when sending BM packets.</t> | s | |||
advertised L = 0. In the latter case, the node reverts to | ||||
<t>If the destination IP address matches its AR-IP and the | Non-selective mode, and flooding list #1 <bcp14>MUST</bcp14> be | |||
used. Non-BM | ||||
overlay tunnels are skipped when sending BM packets.</li> | ||||
<li>If the destination IP address matches its AR-IP and the | ||||
source IP address does not match any IP address of its | source IP address does not match any IP address of its | |||
Selective AR-LEAF-set, the AR-REPLICATOR MUST forward the BM | selective AR-LEAF-set, the AR-REPLICATOR <bcp14>MUST</bcp14> for | |||
packet to flood-list #2 but skipping the AR-REPLICATOR-set. | ward the BM | |||
packet to flooding list #2, skipping the AR-REPLICATOR-set. | ||||
Non-BM overlay tunnels are skipped when sending BM | Non-BM overlay tunnels are skipped when sending BM | |||
packets.</t> | packets.</li> | |||
<li>If the destination IP address matches its IR-IP, the | ||||
<t>If the destination IP address matches its IR-IP, the | AR-REPLICATOR <bcp14>MUST</bcp14> use flooding list #1 but <bcp1 | |||
AR-REPLICATOR MUST use flood-list #1 but MUST skip all the | 4>MUST</bcp14> skip all the | |||
overlay tunnels from the flooding list, i.e. it will only | overlay tunnels from the flooding list, i.e., it will only | |||
replicate to local Attachment Circuits. This is the regular-IR | replicate to local ACs. This is the regular ingress replication | |||
behavior described in <xref target="RFC7432"/>. Non-BM overlay | behavior described in <xref target="RFC7432" format="default"/>. | |||
tunnels are skipped when sending BM packets.</t> | Non-BM overlay | |||
</list></t> | tunnels are skipped when sending BM packets.</li> | |||
</ul> | ||||
<t>In any case, the AR-REPLICATOR ensures the traffic is not sent | </li> | |||
<li>In any case, the AR-REPLICATOR ensures that the traffic is not sen | ||||
t | ||||
back to the originating source. If the encapsulation is MPLSoGRE | back to the originating source. If the encapsulation is MPLSoGRE | |||
or MPLSoUDP and the received BD label (the label that the | or MPLSoUDP and the received BD label (the label that the | |||
AR-REPLICATOR advertised in the Replicator-AR route) is not the | AR-REPLICATOR advertised in the Replicator-AR route) is not at the | |||
bottom of the stack, the AR-REPLICATOR MUST copy the rest of the | bottom of the stack, the AR-REPLICATOR <bcp14>MUST</bcp14> copy the | |||
labels when forwarding them to the egress overlay tunnels.</t> | rest of the | |||
</list></t> | labels when forwarding them to the egress overlay tunnels.</li> | |||
</ul> | ||||
</section> | </section> | |||
<section anchor="sect-6.2" numbered="true" toc="default"> | ||||
<name>Selective AR-LEAF Procedures</name> | ||||
<section anchor="sect-6.2" title="Selective AR-LEAF Procedures"> | <t>A selective AR-LEAF chooses a single selective AR-REPLICATOR per BD | |||
<t>A Selective AR-LEAF chooses a single Selective AR-REPLICATOR per BD | ||||
and:</t> | and:</t> | |||
<ul spacing="normal"> | ||||
<t><list style="symbols"> | <li>Sends all the BD's BM traffic to that AR-REPLICATOR and</li> | |||
<?rfc subcompact="yes"?> | <li>Expects to receive all the BM traffic for a given BD from the | |||
<t>Sends all the BD's BM traffic to that AR-REPLICATOR and</t> | ||||
<t>Expects to receive all the BM traffic for a given BD from the | ||||
same AR-REPLICATOR (except for the BM traffic from the RNVEs, | same AR-REPLICATOR (except for the BM traffic from the RNVEs, | |||
which comes directly from the RNVEs)</t> | which comes directly from the RNVEs)</li> | |||
</ul> | ||||
<?rfc subcompact="no"?> | <t>In the example in <xref target="selective-ar" format="default"/>, we | |||
</list></t> | consider | |||
NVE1/NVE2/NVE3 as selective AR-LEAFs. NVE1 selects PE1 as its | ||||
<t>In the example of <xref target="selective-ar"/>, we consider | selective AR-REPLICATOR. If that is so, NVE1 will send all its BM | |||
NVE1/NVE2/NVE3 as Selective AR-LEAFs. NVE1 selects PE1 as its | traffic for BD-1 to PE1. If other AR-LEAFs/REPLICATORs send BM traffic, | |||
Selective AR-REPLICATOR. If that is so, NVE1 will send all its BM | NVE1 will receive that traffic from PE1. A selective AR-LEAF and a non-s | |||
traffic for BD-1 to PE1. If other AR-LEAF/REPLICATORs send BM traffic, | elective AR-LEAF behave differently, as follows:</t> | |||
NVE1 will receive that traffic from PE1. These are the differences in | <ol spacing="normal" type="a"><li>The selective AR-LEAF role <bcp14>SHOU | |||
the behavior of a Selective AR-LEAF compared to a non-selective | LD</bcp14> be an | |||
AR-LEAF:<list style="letters"> | ||||
<t>The AR-LEAF role selective capability SHOULD be an | ||||
administrative choice in any NVE/PE that is part of an | administrative choice in any NVE/PE that is part of an | |||
Assisted-Replication-enabled BD. This administrative option to | AR-enabled BD. This administrative option to | |||
enable AR-LEAF capabilities MAY be implemented as a system level | enable AR-LEAF capabilities <bcp14>MAY</bcp14> be implemented as a s | |||
option as opposed to as per-BD option.</t> | ystem-level option as opposed to a per-BD option.</li> | |||
<li>The AR-LEAF <bcp14>MAY</bcp14> advertise a Regular-IR route if the | ||||
<t>The AR-LEAF MAY advertise a Regular-IR route if there are RNVEs | re are RNVEs | |||
in the BD. The Selective AR-LEAF MUST advertise a Leaf | in the BD. The selective AR-LEAF <bcp14>MUST</bcp14> advertise a Lea | |||
Auto-Discovery route after receiving a Replicator-AR route with | f | |||
L=1. It is RECOMMENDED that the Selective AR-LEAF waits for an | A-D route after receiving a Replicator-AR route with | |||
AR-LEAF-join-wait-timer (in seconds, default value is 3) before | L = 1. It is <bcp14>RECOMMENDED</bcp14> that the selective AR-LEAF w | |||
sending the Leaf Auto-Discovery route, so that the AR-LEAF can | ait for a period specified by an | |||
AR-LEAF-join-wait-timer (in seconds, with a default value of 3) befo | ||||
re | ||||
sending the Leaf A-D route, so that the AR-LEAF can | ||||
collect all the Replicator-AR routes for the BD before advertising | collect all the Replicator-AR routes for the BD before advertising | |||
the Leaf Auto-Discovery route. If the Replicator-AR route with L=1 | the Leaf A-D route. If the Replicator-AR route with L = 1 | |||
is withdrawn, the corresponding Leaf Auto-Discovery route is | is withdrawn, the corresponding Leaf A-D route is | |||
withdrawn too.</t> | withdrawn too.</li> | |||
<li> | ||||
<t>In a service where there is more than one Selective | <t>In a service where there is more than one selective | |||
AR-REPLICATOR the Selective AR-LEAF MUST locally select a single | AR-REPLICATOR, the selective AR-LEAF <bcp14>MUST</bcp14> locally sel | |||
Selective AR-REPLICATOR for the BD. Once selected: <list | ect a single | |||
style="symbols"> | selective AR-REPLICATOR for the BD. Once selected: </t> | |||
<t>The Selective AR-LEAF MUST send a Leaf Auto-Discovery route | <ul spacing="normal"> | |||
including the Route-key and IP-address-specific route-target | <li>The selective AR-LEAF <bcp14>MUST</bcp14> send a Leaf A-D rout | |||
of the selected AR-REPLICATOR.</t> | e, | |||
including the route key and IP-address-specific Route Target | ||||
<t>The Selective AR-LEAF MUST send all the BM packets received | of the selected AR-REPLICATOR.</li> | |||
on the attachment circuits (ACs) for a given BD to that | <li>The selective AR-LEAF <bcp14>MUST</bcp14> send all the BM pack | |||
AR-REPLICATOR.</t> | ets received | |||
on the ACs for a given BD to that | ||||
<t>In case of a failure on the selected AR-REPLICATOR | AR-REPLICATOR.</li> | |||
<li>In the case of failure of the selected AR-REPLICATOR | ||||
(detected when the Replicator-AR route becomes infeasible as | (detected when the Replicator-AR route becomes infeasible as | |||
the result of any of the underlying BGP mechanisms), another | a result of any of the underlying BGP mechanisms), another | |||
AR-REPLICATOR will be selected and a new Leaf Auto-Discovery | AR-REPLICATOR will be selected and a new Leaf A-D | |||
update will be issued for the new AR-REPLICATOR. This new | update will be issued for the new AR-REPLICATOR. This new | |||
route will update the selective list in the new Selective | route will update the selective list in the new selective | |||
AR-REPLICATOR. In case of failure of the active Selective | AR-REPLICATOR. In the case of failure of the active selective | |||
AR-REPLICATOR, it is RECOMMENDED for the Selective AR-LEAF to | AR-REPLICATOR, it is <bcp14>RECOMMENDED</bcp14> that the selecti | |||
revert to Ingress Replication behavior for a timer | ve AR-LEAF | |||
AR-REPLICATOR-activation-timer (in seconds, default value is | revert to ingress replication behavior for an | |||
AR-REPLICATOR-activation-timer (in seconds, with a default value | ||||
of | ||||
3) to mitigate the traffic impact. When the timer expires, the | 3) to mitigate the traffic impact. When the timer expires, the | |||
Selective AR-LEAF will resume its AR mode with the new | selective AR-LEAF will resume its AR mode with the new | |||
Selective AR-REPLICATOR. The AR-REPLICATOR-activation-timer | selective AR-REPLICATOR. The AR-REPLICATOR-activation-timer | |||
MAY be the same configurable parameter as in <xref | <bcp14>MAY</bcp14> be the same configurable parameter as the par | |||
target="sect-5.2"/>.</t> | ameter discussed in <xref target="sect-5.2" format="default"/>.</li> | |||
<li>A selective AR-LEAF <bcp14>MAY</bcp14> change the selection of | ||||
<t>A Selective AR-LEAF MAY change the AR-REPLICATOR(s) | AR-REPLICATOR(s) | |||
selection dynamically, due to an administrative or policy | dynamically due to an administrative or policy | |||
configuration change.</t> | configuration change.</li> | |||
</list></t> | </ul> | |||
</list></t> | </li> | |||
</ol> | ||||
<t>All the AR-LEAFs in a BD are expected to be configured as either | <t>All the AR-LEAFs in a BD are expected to be configured as either | |||
selective or non-selective. A mix of selective and non-selective | selective or non-selective. A mix of selective and non-selective | |||
AR-LEAFs SHOULD NOT coexist in the same BD. In case there is a | AR-LEAFs <bcp14>SHOULD NOT</bcp14> coexist in the same BD. If a | |||
non-selective AR-LEAF, its BM traffic sent to a selective | non-selective AR-LEAF is present, its BM traffic sent to a selective | |||
AR-REPLICATOR will not be replicated to other AR-LEAFs that are not in | AR-REPLICATOR will not be replicated to other AR-LEAFs that are not in | |||
its Selective AR-LEAF-set.</t> | its selective AR-LEAF-set.</t> | |||
<t>A selective AR-LEAF <bcp14>MUST</bcp14> follow a data path implementa | ||||
tion | ||||
compatible with the following rules:</t> | ||||
<ul spacing="normal"> | ||||
<li> | ||||
<t>The selective AR-LEAF nodes will build two flooding lists:</t> | ||||
<t>A Selective AR-LEAF MUST follow a data path implementation | <dl> | |||
compatible with the following rules:<list style="symbols"> | ||||
<t>The Selective AR-LEAF nodes will build two flood-lists:<list | ||||
style="numbers"> | ||||
<t>Flood-list #1 - composed of Attachment Circuits and the | ||||
overlay tunnel to the selected AR-REPLICATOR (using the AR-IP | ||||
as the tunnel destination IP address).</t> | ||||
<t>Flood-list #2 - composed of Attachment Circuits and overlay | <dt>Flooding list #1: | |||
tunnels to the remote IR-IP addresses.</t> | </dt> | |||
</list></t> | <dd>Composed of ACs and the overlay tunnel to the selected | |||
AR-REPLICATOR (using the AR-IP as the tunnel destination IP address). | ||||
</dd> | ||||
<t>Some of the overlay tunnels in the flood-lists MAY be flagged | <dt>Flooding list #2: | |||
as non-BM receivers based on the BM flag received from the remote | </dt> | |||
nodes in the routes.</t> | <dd>Composed of ACs and overlay tunnels to the remote IR-IP | |||
addresses. | ||||
</dd> | ||||
<t>When an AR-LEAF receives a BM packet on an Attachment Circuit, | </dl> | |||
it will check if there is any selected AR-REPLICATOR. If there is, | ||||
flood-list #1 MUST be used. Otherwise, flood-list #2 MUST be used. | ||||
Non-BM overlay tunnels are skipped when sending BM packets.</t> | ||||
<t>When an AR-LEAF receives a BM packet on an overlay tunnel, it | </li> | |||
MUST forward the BM packet to its local Attachment Circuits and | <li>Some of the overlay tunnels in the flooding lists <bcp14>MAY</bcp1 | |||
never to an overlay tunnel. This is the regular Ingress | 4> be flagged | |||
Replication behavior described in <xref target="RFC7432"/>.</t> | as non-BM receivers based on the BM flag received from the remote | |||
</list></t> | nodes in the routes.</li> | |||
<li>When an AR-LEAF receives a BM packet on an AC, | ||||
it will check to see if an AR-REPLICATOR was selected; if one is fou | ||||
nd, | ||||
flooding list #1 <bcp14>MUST</bcp14> be used. Otherwise, flooding li | ||||
st #2 <bcp14>MUST</bcp14> be used. | ||||
Non-BM overlay tunnels are skipped when sending BM packets.</li> | ||||
<li>When an AR-LEAF receives a BM packet on an overlay tunnel, it | ||||
<bcp14>MUST</bcp14> forward the BM packet to its local ACs and | ||||
never to an overlay tunnel. This is the regular ingress | ||||
replication behavior described in <xref target="RFC7432" format="def | ||||
ault"/>.</li> | ||||
</ul> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sect-7" numbered="true" toc="default"> | ||||
<section anchor="sect-7" title="Pruned-Flood-Lists (PFL)"> | <name>Pruned Flooding Lists (PFLs)</name> | |||
<t>In addition to AR, the second optimization supported by this solution | <t>In addition to AR, the second optimization supported by the ingress | |||
is the ability for the all the BD nodes to signal Pruned-Flood-Lists | replication optimization solution specified in this document | |||
(PFL). As described in <xref target="sect-4"/>, an EVPN node can signal | is the ability of all the BD nodes to signal PFLs. As described in <xref t | |||
a given value for the BM and U Pruned-Food-Lists flags in the | arget="sect-4" format="default"/>, an EVPN node can signal | |||
Regular-IR, Replicator-AR or Leaf Auto-Discovery routes, where:</t> | a given value for the BM and U PFLs flags in the | |||
Regular-IR, Replicator-AR, or Leaf A-D routes, where:</t> | ||||
<t><list style="symbols"> | <ul spacing="normal"> | |||
<t>BM is the Broadcast and Multicast flag. BM=1 means "prune-me" | <li>BM is the Broadcast and Multicast flag. BM = 1 means "prune me | |||
from the BM flood-list. BM=0 means regular behavior.</t> | from the BM flooding list". BM = 0 indicates regular behavior.</li> | |||
<li>U is the Unknown flag. U = 1 means "prune me from the Unknown | ||||
<t>U is the Unknown flag. U=1 means "prune-me" from the Unknown | flooding list". U = 0 indicates regular behavior.</li> | |||
flood-list. U=0 means regular behavior.</t> | </ul> | |||
</list></t> | <t>The ability to signal and process these PFLs flags | |||
<bcp14>SHOULD</bcp14> be an administrative choice. If a node is configured | ||||
<t>The ability to signal and process these Pruned-Flood-Lists flags | to process | |||
SHOULD be an administrative choice. If a node is configured to process | the PFLs flags, upon receiving a non-zero | |||
the Pruned-Flood-Lists flags, upon receiving a non-zero | PFLs flag for a route, an NVE/PE will add the | |||
Pruned-Flood-Lists flag for a route, the NVE/PE will add the | corresponding flag to the created overlay tunnel in the flooding list. Whe | |||
corresponding flag to the created overlay tunnel in the flood-list. When | n | |||
replicating a BM packet in the context of a flood-list, the NVE/PE will | replicating a BM packet in the context of a flooding list, the NVE/PE will | |||
skip the overlay tunnels marked with the flag BM=1, since the NVE/PE at | skip the overlay tunnels marked with the flag BM = 1, since the NVEs/PEs a | |||
t | ||||
the end of those tunnels are not expecting BM packets. Similarly, when | the end of those tunnels are not expecting BM packets. Similarly, when | |||
replicating Unknown unicast packets, the NVE/PE will skip the overlay | replicating unknown unicast packets, the NVE/PE will skip the overlay | |||
tunnels marked with U=1.</t> | tunnels marked with U = 1.</t> | |||
<t>An NVE/PE not following this document or not configured for this | <t>An NVE/PE not following this document or not configured for this | |||
optimization will ignore any of the received Pruned-Flood-Lists flags. | optimization will ignore any of the received PFLs flags. | |||
An AR-LEAF or RNVE receiving BUM traffic on an overlay tunnel MUST | An AR-LEAF or RNVE receiving BUM traffic on an overlay tunnel <bcp14>MUST< | |||
replicate the traffic to its local Attachment Circuits, regardless of | /bcp14> | |||
replicate the traffic to its local ACs, regardless of | ||||
the BM/U flags on the overlay tunnels.</t> | the BM/U flags on the overlay tunnels.</t> | |||
<t>This optimization <bcp14>MAY</bcp14> be used along with the Assisted Re | ||||
<t>This optimization MAY be used along with the Assisted-Replication | plication | |||
solution.</t> | solution.</t> | |||
<section anchor="sect-7.1" numbered="true" toc="default"> | ||||
<section anchor="sect-7.1" title="A Pruned-Flood-List Example"> | <name>Example of a Pruned Flooding List</name> | |||
<t>In order to illustrate the use of the solution described in this | <t>In order to illustrate the use of the PFLs solution, we will assume t | |||
document, we will assume that BD-1 in <xref | hat BD-1 in <xref target="ure-optimized-ir-scenario" format="default"/> is optim | |||
target="ure-optimized-ir-scenario"/> is optimized Ingress Replication | ized ingress replication | |||
enabled and:</t> | enabled and:</t> | |||
<ul spacing="normal"> | ||||
<t><list style="symbols"> | <li>PE1 and PE2 are administratively configured as AR-REPLICATORs | |||
<t>PE1 and PE2 are administratively configured as AR-REPLICATORs, | ||||
due to their high-performance replication capabilities. PE1 and | due to their high-performance replication capabilities. PE1 and | |||
PE2 will send a Replicator-AR route with BM/U flags = 00.</t> | PE2 will send a Replicator-AR route with BM/U flags = 00.</li> | |||
<li> | ||||
<t>NVE1 and NVE3 are administratively configured as AR-LEAF nodes, | <t>NVE1 and NVE3 are administratively configured as AR-LEAF nodes | |||
due to their low-performance software-based replication | due to their low-performance software-based replication | |||
capabilities. They will advertise a Regular-IR route with type | capabilities. They will advertise a Regular-IR route with type | |||
AR-LEAF. Assuming both NVEs advertise all the attached Virtual | AR-LEAF. Assuming that both NVEs advertise all of the attached VMs' | |||
Machines MAC and IP addresses in EVPN as soon as they come up, and | MAC and IP addresses in EVPNs as soon as they come up and | |||
these NVEs do not have any Virtual Machines interested in | these NVEs do not have any VMs interested in | |||
multicast applications, they will be configured to signal BM/U | multicast applications, they will be configured to signal BM/U | |||
flags = 11 for BD-1. That is, neither NVE1 nor NVE3 are interested | flags = 11 for BD-1. That is, neither NVE1 nor NVE3 is interested | |||
in receiving BM or Unknown Unicast traffic since:<list | in receiving BM or unknown unicast traffic, since:</t> | |||
style="symbols"> | <ul spacing="normal"> | |||
<t>Their attached VMs (VM11, VM12, VM31, VM32) do not support | <li>Their attached VMs (VM11, VM12, VM31, VM32) do not support | |||
multicast applications.</t> | multicast applications.</li> | |||
<li>Their attached VMs will not receive ARP Requests. Proxy ARP | ||||
<t>Their attached VMs will not receive ARP Requests. Proxy-ARP | <xref target="RFC9161" format="default"/> on the remote | |||
<xref target="I-D.ietf-bess-evpn-proxy-arp-nd"/> on the remote | NVEs/PEs will reply to ARP Requests locally, and no other | |||
NVE/PEs will reply ARP Requests locally, and no other | broadcast traffic is expected.</li> | |||
Broadcast is expected.</t> | <li>Their attached VMs will not receive unknown unicast | |||
<t>Their attached VMs will not receive unknown unicast | ||||
traffic, since the VMs' MAC and IP addresses are always | traffic, since the VMs' MAC and IP addresses are always | |||
advertised by EVPN as long as the VMs are active.</t> | advertised by EVPNs as long as the VMs are active.</li> | |||
</list></t> | </ul> | |||
</li> | ||||
<t>NVE2 is optimized Ingress Replication unaware; therefore it | <li>NVE2 is optimized ingress replication unaware; therefore, it | |||
takes on the RNVE role in BD-1.</t> | takes on the RNVE role in BD-1.</li> | |||
</list></t> | </ul> | |||
<t>Based on the above assumptions, the following forwarding behavior | ||||
<t>Based on the above assumptions the following forwarding behavior | ||||
will take place:</t> | will take place:</t> | |||
<ol spacing="normal" type="1"><li>Any BM packets sent from VM11 will be | ||||
sent to VM12 | ||||
and PE1. PE1 will then forward the BM packets on to TS1, the WAN lin | ||||
k, | ||||
PE2, and NVE2 but not to NVE3. PE2 and NVE2 will replicate the BM | ||||
packets to their local ACs, but NVE3 will be prevented from | ||||
having to replicate those BM packets to VM31 and | ||||
VM32 unnecessarily.</li> | ||||
<li>Any BM packets received on PE2 from the WAN will be | ||||
sent to PE1 and NVE2 but not to NVE1 and NVE3, sparing the two | ||||
hypervisors from replicating unnecessarily to their local VMs. | ||||
PE1 and NVE2 will replicate to their local ACs | ||||
only.</li> | ||||
<li>Any unknown unicast packet sent from VM31 will be | ||||
forwarded by NVE3 to NVE2, PE1, and PE2 but not to NVE1. The solutio | ||||
n | ||||
prevents unnecessary replication to NVE1, since the destination | ||||
of the unknown traffic cannot be NVE1.</li> | ||||
<li>Any unknown unicast packet sent from TS1 will be | ||||
forwarded by PE1 to the WAN link, PE2, and NVE2 but not to NVE1 and | ||||
NVE3, since the target of the unknown traffic cannot be NVE1 or | ||||
NVE3.</li> | ||||
</ol> | ||||
<t><list hangIndent="4" style="numbers"> | ||||
<t hangText="">Any BM packets sent from VM11 will be sent to VM12 | ||||
and PE1. PE1 will forward further the BM packets to TS1, WAN link, | ||||
PE2 and NVE2, but not to NVE3. PE2 and NVE2 will replicate the BM | ||||
packets to their local Attachment Circuits but we will avoid NVE3 | ||||
having to replicate unnecessarily those BM packets to VM31 and | ||||
VM32.</t> | ||||
<t hangText="">Any BM packets received on PE2 from the WAN will be | ||||
sent to PE1 and NVE2, but not to NVE1 and NVE3, sparing the two | ||||
hypervisors from replicating unnecessarily to their local Virtual | ||||
Machines. PE1 and NVE2 will replicate to their local Attachment | ||||
Circuits only.</t> | ||||
<t hangText="">Any Unknown unicast packet sent from VM31 will be | ||||
forwarded by NVE3 to NVE2, PE1 and PE2 but not NVE1. The solution | ||||
avoids the unnecessary replication to NVE1, since the destination | ||||
of the unknown traffic cannot be at NVE1.</t> | ||||
<t hangText="">Any Unknown unicast packet sent from TS1 will be | ||||
forwarded by PE1 to the WAN link, PE2 and NVE2 but not to NVE1 and | ||||
NVE3, since the target of the unknown traffic cannot be at those | ||||
NVEs.</t> | ||||
</list></t> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sect-8" numbered="true" toc="default"> | ||||
<section anchor="sect-8" | <name>AR Procedures for Single-IP AR-REPLICATORS</name> | |||
title="AR Procedures for Single-IP AR-REPLICATORS"> | <t>The procedures explained in Sections <xref target="sect-5" format= | |||
<t>The procedures explained in sections <xref target="sect-5"/> and | "counter"/> and | |||
<xref target="sect-6"/> assume that the AR-REPLICATOR can use two local | <xref target="sect-6" format="counter"/> assume that the AR-REPLICATOR can | |||
routable IP addresses to terminate and originate Network Virtualization | use two local | |||
Overlay tunnels, i.e. IR-IP and AR-IP addresses. This is usually the | routable IP addresses to terminate and originate NVO | |||
tunnels, i.e., IR-IP and AR-IP addresses. This is usually the | ||||
case for PE-based AR-REPLICATOR nodes.</t> | case for PE-based AR-REPLICATOR nodes.</t> | |||
<t>In some cases, the AR-REPLICATOR node does not support more than one | <t>In some cases, the AR-REPLICATOR node does not support more than one | |||
IP address to terminate and originate Network Virtualization Overlay | IP address to terminate and originate NVO | |||
tunnels, i.e. the IR-IP and AR-IP are the same IP addresses. This may be | tunnels, i.e., the IR-IP and AR-IP are the same IP addresses. This may be | |||
the case in some software-based or low-end AR-REPLICATOR nodes. If this | the case in some software-based or low-end AR-REPLICATOR nodes. If this | |||
is the case, the procedures in sections <xref target="sect-5"/> and | is the case, the procedures provided in Sections <xref target="sect-5 | |||
<xref target="sect-6"/> MUST be modified in the following way:</t> | " format="counter"/> and <xref target="sect-6" format="counter"/> <bcp14>MUST</b | |||
cp14> be modified in the following way:</t> | ||||
<t><list style="symbols"> | <ul spacing="normal"> | |||
<t>The Replicator-AR routes generated by the AR-REPLICATOR use an | <li>The Replicator-AR routes generated by the AR-REPLICATOR use an | |||
AR-IP that will match its IR-IP. In order to differentiate the data | AR-IP that will match its IR-IP. In order to differentiate the data | |||
plane packets that need to use Ingress Replication from the packets | plane packets that need to use ingress replication from the packets | |||
that must use Assisted Replication forwarding mode, the | that must use Assisted Replication forwarding mode, the | |||
Replicator-AR route MUST advertise a different VNI/VSID than the one | Replicator-AR route <bcp14>MUST</bcp14> advertise a different VNI/VSID than the one | |||
used by the Regular-IR route. For instance, the AR-REPLICATOR will | used by the Regular-IR route. For instance, the AR-REPLICATOR will | |||
advertise AR-VNI along with the Replicator-AR route and IR-VNI along | advertise an AR-VNI along with the Replicator-AR route and an IR-VNI a long | |||
with the Regular-IR route. Since both routes have the same key, | with the Regular-IR route. Since both routes have the same key, | |||
different Route Distinguishers are needed in each route.</t> | different Route Distinguishers are needed in each route.</li> | |||
<li>An AR-REPLICATOR will perform Ingress Replication forwarding mode or | ||||
<t>An AR-REPLICATOR will perform Ingress Replication or Assisted | Assisted | |||
Replication forwarding mode for the incoming Overlay packets based | Replication forwarding mode for the incoming overlay packets based | |||
on an ingress VNI lookup, as opposed to the tunnel IP DA lookup. | on an ingress VNI lookup as opposed to the tunnel IP DA lookup. | |||
Note that, when replicating to remote AR-REPLICATOR nodes, the use | Note that when replicating to remote AR-REPLICATOR nodes, the use | |||
of the IR-VNI or AR-VNI advertised by the egress node will determine | of the IR-VNI or AR-VNI advertised by the egress node will determine | |||
the Ingress Replication or Assisted Replication forwarding mode at | whether Ingress Replication forwarding mode or Assisted Replication fo | |||
the subsequent AR-REPLICATOR.</t> | rwarding mode is used at | |||
</list></t> | the subsequent AR-REPLICATOR.</li> | |||
</ul> | ||||
<t>The rest of the procedures will follow what is described in sections | <t>The rest of the procedures will follow those described in | |||
<xref target="sect-5"/> and <xref target="sect-6"/>.</t> | Sections <xref target="sect-5" format="counter"/> and | |||
<xref target="sect-6" format="counter"/>.</t> | ||||
</section> | </section> | |||
<section anchor="sect-9" numbered="true" toc="default"> | ||||
<section anchor="sect-9" | <name>AR Procedures and EVPN All-Active Multihoming Split-Horizon</name> | |||
title="AR Procedures and EVPN All-Active Multi-homing Split-Horizon | ||||
"> | ||||
<t>This section extends the procedures for the cases where two or more | <t>This section extends the procedures for the cases where two or more | |||
AR-LEAF nodes are attached to the same Ethernet Segment, and two or more | AR-LEAF nodes are attached to the same ES and two or more | |||
AR-REPLICATOR nodes are attached to the same Ethernet Segment in the BD. | AR-REPLICATOR nodes are attached to the same ES in the BD. | |||
The mixed case, that is, an AR-LEAF node and an AR-REPLICATOR node are | The mixed case -- where an AR-LEAF node and an AR-REPLICATOR node are | |||
attached to the same Ethernet Segment, would require extended procedures | attached to the same ES -- would require extended procedures | |||
and it is out of scope.</t> | that are out of scope for this document.</t> | |||
<section anchor="sect-9.1" numbered="true" toc="default"> | ||||
<section anchor="sect-9.1" title="Ethernet Segments on AR-LEAF Nodes"> | <name>Ethernet Segments on AR-LEAF Nodes</name> | |||
<t>If VXLAN or NVGRE are used, and if the Split-horizon is based on | <t>If a VXLAN or NVGRE is used and if the split-horizon is based on | |||
the tunnel IP Source Address and "Local-Bias" as described in <xref | the tunnel source IP address and "local bias" as described in <xref targ | |||
target="RFC8365"/>, the Split-horizon check will not work if there is | et="RFC8365" format="default"/>, the split-horizon check will not work if | |||
an Ethernet-Segment shared between two AR-LEAF nodes, and the | an ES is shared between two AR-LEAF nodes, and the | |||
AR-REPLICATOR replaces the tunnel IP Source Address of the packets | AR-REPLICATOR replaces the tunnel source IP address of the packets | |||
with its own AR-IP.</t> | with its own AR-IP.</t> | |||
<t>In order to be compatible with the source IP address split-horizon | ||||
<t>In order to be compatible with the IP Source Address split-horizon | check, the AR-REPLICATOR <bcp14>MAY</bcp14> keep the original received t | |||
check, the AR-REPLICATOR MAY keep the original received tunnel IP | unnel source IP | |||
Source Address when replicating packets to a remote AR-LEAF or RNVE. | address when replicating packets to a remote AR-LEAF or RNVE. | |||
This will allow AR-LEAF nodes to apply Split-horizon check procedures | This will allow AR-LEAF nodes to apply split-horizon check procedures | |||
for BM packets, before sending them to the local Ethernet-Segment. | for BM packets before sending them to the local ES. | |||
Even if the AR-LEAF's IP Source Address is preserved when replicating | Even if the AR-LEAF's source IP address is preserved when replicating | |||
to AR-LEAFs or RNVEs, the AR-REPLICATOR MUST always use its IR-IP as | to AR-LEAFs or RNVEs, the AR-REPLICATOR <bcp14>MUST</bcp14> always use i | |||
the IP Source Address when replicating to other AR-REPLICATORs.</t> | ts IR-IP as | |||
the source IP address when replicating to other AR-REPLICATORs.</t> | ||||
<t>When EVPN is used for MPLS over GRE (or UDP), the ESI-label based | <t>When EVPNs are used for MPLSoGRE or MPLSoUDP, the ESI-label-based | |||
split-horizon procedure as in <xref target="RFC7432"/> will not work | split-horizon procedure provided in <xref target="RFC7432" format="defau | |||
for multi-homed Ethernet-Segments defined on AR-LEAF nodes. | lt"/> will not work | |||
"Local-Bias" is recommended in this case, as in the case of VXLAN or | for multihomed ESs defined on AR-LEAF nodes. | |||
NVGRE explained above. The "Local-Bias" and tunnel IP Source Address | Local bias is recommended in this case, as it is in the case of a VXLAN | |||
or | ||||
NVGRE as explained above. The local-bias and tunnel source IP address | ||||
preservation mechanisms provide the required split-horizon behavior in | preservation mechanisms provide the required split-horizon behavior in | |||
non-selective or selective AR.</t> | non-selective or selective AR.</t> | |||
<t>Note that if the AR-REPLICATOR implementation keeps the received | <t>Note that if the AR-REPLICATOR implementation keeps the received | |||
tunnel IP Source Address, the use of uRPF (unicast Reverse Path | tunnel source IP address, the use of unicast Reverse Path | |||
Forwarding) checks in the IP fabric based on the tunnel IP Source | Forwarding (uRPF) checks in the IP fabric based on the tunnel source IP | |||
Address MUST be disabled.</t> | address <bcp14>MUST</bcp14> be disabled.</t> | |||
</section> | </section> | |||
<section anchor="sect-9.2" | <section anchor="sect-9.2" numbered="true" toc="default"> | |||
title="Ethernet Segments on AR-REPLICATOR nodes"> | <name>Ethernet Segments on AR-REPLICATOR Nodes</name> | |||
<t>AR-REPLICATOR nodes attached to the same all-active Ethernet | <t>AR-REPLICATOR nodes attached to the same all-active ES | |||
Segment will follow "Local-Bias" procedures <xref target="RFC8365"/>, | will follow local-bias procedures <xref target="RFC8365" format="default | |||
"/> | ||||
as follows:</t> | as follows:</t> | |||
<ol spacing="normal" type="a"><li>For BUM traffic received on a local AR | ||||
<t><list style="letters"> | -REPLICATOR's AC, | |||
<t>For BUM traffic received on a local AR-REPLICATOR's Attachment | local-bias procedures as provided in <xref target="RFC8365" format=" | |||
Circuit, "Local-Bias" procedures as in <xref target="RFC8365"/> | default"/> | |||
MUST be followed.</t> | <bcp14>MUST</bcp14> be followed.</li> | |||
<li>For BUM traffic received on an AR-REPLICATOR overlay tunnel | ||||
<t>For BUM traffic received on an AR-REPLICATOR overlay tunnel | with AR-IP as the IP DA, local bias <bcp14>MUST</bcp14> also | |||
with AR-IP as the IP Destination Address, "Local-Bias" MUST also | be followed. That is, traffic received with AR-IP as the IP DA | |||
be followed. That is, traffic received with AR-IP as IP | will be treated as though it had been received | |||
Destination Address will be treated as though it had been received | on a local AC that is part of the ES | |||
on a local Attachment Circuit that is part of the Ethernet Segment | and will be forwarded to all local ESs, irrespective | |||
and will be forwarded to all local Ethernet Segments, irrespective | of their DF or NDF state.</li> | |||
of their DF or NDF state.</t> | <li>BUM traffic received on an AR-REPLICATOR overlay tunnel with | |||
IR-IP as the IP DA will follow regular local-bias rules <xref target | ||||
<t>BUM traffic received on an AR-REPLICATOR overlay tunnel with | ="RFC8365" format="default"/> and will not be forwarded to | |||
IR-IP as the IP Destination Address, will follow regular <xref | local ESs that are shared with the AR-LEAF or | |||
target="RFC8365"/> "Local-Bias" rules and will not be forwarded to | AR-REPLICATOR originating the traffic.</li> | |||
local Ethernet Segments that are shared with the AR-LEAF or | <li>In cases where the AR-REPLICATOR supports a single IP address, | |||
AR-REPLICATOR originating the traffic.</t> | ||||
<t>In cases where the AR-REPLICATOR supports a single IP address, | ||||
the IR-IP and the AR-IP are the same IP address, as discussed in | the IR-IP and the AR-IP are the same IP address, as discussed in | |||
<xref target="sect-8"/>. The received BUM traffic will be treated | <xref target="sect-8" format="default"/>. The received BUM traffic w | |||
as in 'b' above if the received VNI is the AR-VNI, and as in 'c' | ill be treated | |||
if the VNI is the IR-VNI.</t> | as specified in item b above if the received VNI is the AR-VNI and a | |||
</list></t> | s specified in item c if the VNI is the IR-VNI.</li> | |||
</ol> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sect-10" numbered="true" toc="default"> | ||||
<section anchor="sect-10" title="Security Considerations"> | <name>Security Considerations</name> | |||
<t>The Security Considerations in <xref target="RFC7432"/> and <xref | <t>The security considerations in <xref target="RFC7432" format="default"/ | |||
target="RFC8365"/> apply to this document. The Security Considerations | > and <xref target="RFC8365" format="default"/> apply to this document. The secu | |||
related to the Leaf Auto-Discovery route in <xref | rity considerations | |||
target="I-D.ietf-bess-evpn-bum-procedure-updates"/> apply too.</t> | related to the Leaf A-D route in <xref target="RFC9572" format="default"/> | |||
apply too.</t> | ||||
<t>In addition, the Assisted-Replication method introduced by this | <t>In addition, the Assisted Replication method introduced by this | |||
document may bring some new risks for the successful delivery of BM | document may introduce some new risks that could affect the successful del | |||
traffic. Unicast traffic is not affected by Assisted-Replication | ivery of BM | |||
(although Unknown unicast traffic is affected by the Pruned-Flood-Lists | traffic. Unicast traffic is not affected by Assisted Replication | |||
procedures). The forwarding of Broadcast and Multicast (BM) traffic is | (although unknown unicast traffic is affected by the procedures for PFLs). | |||
modified, and BM traffic from the AR-LEAF nodes will be attracted by the | The forwarding of BM traffic is | |||
existence of AR-REPLICATORs in the BD. An AR-LEAF will forward BM | modified, and BM traffic from the AR-LEAF nodes will be drawn toward | |||
traffic to its selected AR-REPLICATOR, therefore an attack on the | AR-REPLICATORs in the BD. An AR-LEAF will forward BM | |||
traffic to its selected AR-REPLICATOR; therefore, an attack on the | ||||
AR-REPLICATOR could impact the delivery of the BM traffic using that | AR-REPLICATOR could impact the delivery of the BM traffic using that | |||
node. Also, an attack on the AR-REPLICATOR and change of the advertised | node. Also, an attack on the AR-REPLICATOR and any change to the advertise | |||
AR type will modify the selection on the AR-LEAF nodes. If no other | d | |||
AR type will modify the selections made by the AR-LEAF nodes. If no other | ||||
AR-REPLICATOR is selected, the AR-LEAF nodes will be forced to use | AR-REPLICATOR is selected, the AR-LEAF nodes will be forced to use | |||
Ingress Replication forwarding mode, which will impact on their | Ingress Replication forwarding mode, which will impact their | |||
performance, since the AR-LEAF nodes are usually NVEs/PEs with poor | performance, since the AR-LEAF nodes are usually NVEs/PEs with poor | |||
replication performance.</t> | replication performance.</t> | |||
<t>This document introduces the ability of the AR-REPLICATOR to forward | ||||
<t>This document introduces the ability for the AR-REPLICATOR to forward | ||||
traffic received on an overlay tunnel to another overlay tunnel. The | traffic received on an overlay tunnel to another overlay tunnel. The | |||
reader may interpret that this introduces the risk of BM loops. That is, | reader may determine that this introduces the risk of BM loops -- that is, | |||
an AR-LEAF receiving a BM encapsulated packet that the AR-LEAF | an AR-LEAF receiving a BM-encapsulated packet that the AR-LEAF | |||
originated in the first place, due to one or two AR-REPLICATORs | originated in the first place due to one or two AR-REPLICATORs | |||
"looping" the BM traffic back to the AR-LEAF. The procedures in this | "looping" the BM traffic back to the AR-LEAF. Following the procedures pro | |||
document prevent these BM loops, since the AR-REPLICATOR will always | vided in this | |||
forward the BM traffic using the correct tunnel IP Destination Address | document will prevent these BM loops, since the AR-REPLICATOR will always | |||
(or correct VNI in case of single-IP AR-REPLICATORs) that instructs the | forward the BM traffic using the correct tunnel IP DA | |||
remote nodes how to forward the traffic. This is true in both the | (or the correct VNI in the case of single-IP AR-REPLICATORs), which instru | |||
Non-Selective and Selective modes defined in this document. However, a | cts the | |||
wrong implementation of the procedures in this document may lead to | remote nodes regarding how to forward the traffic. This is true for both t | |||
he | ||||
Non-selective and Selective modes defined in this document. However, | ||||
incorrect implementation of the procedures provided in this document may l | ||||
ead to | ||||
those unexpected BM loops.</t> | those unexpected BM loops.</t> | |||
<t>The Selective mode provides a multi-stage replication solution, | ||||
<t>The Selective mode provides a multi-staged replication solution, | where proper configuration of all the AR-REPLICATORs will prevent any | |||
where a proper configuration of all the AR-REPLICATORs will avoid any | issues. A mix of mistakenly configured selective and non-selective | |||
issues. A mix of mistakenly configured Selective and Non-Selective | ||||
AR-REPLICATORs in the same BD could theoretically create packet | AR-REPLICATORs in the same BD could theoretically create packet | |||
duplication in some AR-LEAFs, however this document specifies a fall | duplication in some AR-LEAFs; however, this document specifies a fallback | |||
back solution to Non-Selective mode in case the AR-REPLICATORs | solution -- falling back to Non-selective mode in cases where the AR-REPLICATORs | |||
advertised an inconsistent AR Replication mode.</t> | advertised an inconsistent AR mode.</t> | |||
<t>This document allows the AR-REPLICATOR to preserve the tunnel source IP | ||||
<t>This document allows the AR-REPLICATOR to preserve the tunnel IP | address of the AR-LEAF (as an option) when forwarding BM packets | |||
Source Address of the AR-LEAF (as an option) when forwarding BM packets | ||||
from an overlay tunnel to another overlay tunnel. Preserving the AR-LEAF | from an overlay tunnel to another overlay tunnel. Preserving the AR-LEAF | |||
IP Source Address makes the "Local Bias" filtering procedures possible | source IP address makes the local-bias filtering procedures possible | |||
for AR-LEAF nodes that are attached to the same Ethernet Segment. If the | for AR-LEAF nodes that are attached to the same ES. If the | |||
AR-REPLICATOR does not preserve the AR-LEAF IP Source Address, AR-LEAF | AR-REPLICATOR does not preserve the AR-LEAF source IP address, AR-LEAF | |||
nodes attached to all-active Ethernet Segments will cause packet | nodes attached to all-active ESs will cause packet | |||
duplication on the multi-homed CE.</t> | duplication on the multihomed CE.</t> | |||
<t>The AR-REPLICATOR nodes are, by design, using more bandwidth than PEs < | ||||
<t>The AR-REPLICATOR nodes are, by design, using more bandwidth than | xref target="RFC7432" format="default"/> or NVEs <xref target="RFC8365" format=" | |||
<xref target="RFC7432"/> PEs or <xref target="RFC8365"/> NVEs would use. | default"/> would use. | |||
Certain network events or unexpected low performance may exceed the | Certain network events or unexpected low performance may exceed the | |||
AR-REPLICATOR local bandwidth and cause service disruption.</t> | AR-REPLICATOR's local bandwidth and cause service disruption.</t> | |||
<t>Finally, PFLs (<xref target="sect-7" format="default"/>) should be | ||||
<t>Finally, the use of PFL as in <xref target="sect-7"/>, should be | used with care. Intentional or unintentional misconfiguration of | |||
handled with care. An intentional or unintentional misconfiguration of | ||||
the BDs on a given leaf node may result in the leaf not receiving the | the BDs on a given leaf node may result in the leaf not receiving the | |||
required BM or Unknown unicast traffic.</t> | required BM or unknown unicast traffic.</t> | |||
</section> | </section> | |||
<section anchor="sect-11" numbered="true" toc="default"> | ||||
<section anchor="sect-11" title="IANA Considerations"> | <name>IANA Considerations</name> | |||
<t>IANA has allocated the following Border Gateway Protocol (BGP) | <t>IANA has allocated the following Border Gateway Protocol (BGP) | |||
Parameters:</t> | parameters:</t> | |||
<ul spacing="normal"> | ||||
<t><list hangIndent="3" style="symbols"> | <li>Allocation in the "P-Multicast Service Interface Tunnel (PMSI Tunnel | |||
<t hangText="">Allocation in the P-Multicast Service Interface | ) Tunnel Types" registry:</li> | |||
Tunnel (PMSI Tunnel) Tunnel Types registry:</t> | </ul> | |||
</list></t> | <table align="center"> | |||
<thead> | ||||
<figure> | <tr> | |||
<artwork><![CDATA[ | <th>Value</th> | |||
Value Meaning Reference | <th>Meaning</th> | |||
0x0A Assisted-Replication Tunnel [This document] | <th>Reference</th> | |||
]]></artwork> | </tr> | |||
</figure> | </thead> | |||
<tbody> | ||||
<t><list hangIndent="3" style="symbols"> | <tr> | |||
<t hangText="">Allocations in the P-Multicast Service Interface | <td>0x0A</td> | |||
(PMSI) Tunnel Attribute Flags registry:</t> | <td>Assisted Replication Tunnel</td> | |||
</list></t> | <td>RFC 9574</td> | |||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<ul spacing="normal"> | ||||
<li>Allocations in the "P-Multicast Service Interface (PMSI) Tunnel Attr | ||||
ibute Flags" registry:</li> | ||||
</ul> | ||||
<figure> | <table align="center"> | |||
<artwork><![CDATA[ | <thead> | |||
Value Name Reference | <tr> | |||
3-4 Assisted-Replication Type (T) [This document] | <th>Value</th> | |||
5 Broadcast and Multicast (BM) [This document] | <th>Name</th> | |||
6 Unknown (U) [This document] | <th>Reference</th> | |||
]]></artwork> | </tr> | |||
</figure> | </thead> | |||
<tbody> | ||||
<tr> | ||||
<td>3-4</td> | ||||
<td>Assisted Replication Type (T)</td> | ||||
<td>RFC 9574</td> | ||||
</tr> | ||||
<tr> | ||||
<td>5</td> | ||||
<td>Broadcast and Multicast (BM)</td> | ||||
<td>RFC 9574</td> | ||||
</tr> | ||||
<tr> | ||||
<td>6</td> | ||||
<td>Unknown (U)</td> | ||||
<td>RFC 9574</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</section> | </section> | |||
</middle> | ||||
<back> | ||||
<section title="Contributors"> | <references> | |||
<t>In addition to the names in the front page, the following co-authors | <name>References</name> | |||
also contributed to this document:</t> | <references> | |||
<name>Normative References</name> | ||||
<t><figure> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2 | |||
<artwork><![CDATA[ | 119.xml"/> | |||
Wim Henderickx | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | |||
Nokia | 174.xml"/> | |||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6 | ||||
Kiran Nagaraj | 514.xml"/> | |||
Nokia | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7 | |||
432.xml"/> | ||||
Ravi Shekhar | <!-- draft-ietf-bess-evpn-bum-procedure-updates (RFC 9572) --> | |||
Juniper Networks | <reference anchor="RFC9572" target="https://www.rfc-editor.org/info/rfc9572"> | |||
<front> | ||||
<title>Updates to EVPN Broadcast, Unknown Unicast, or Multicast (BUM) Procedures | ||||
</title> | ||||
<author initials='Z' surname='Zhang' fullname='Z. Zhang'> | ||||
<organization/> | ||||
</author> | ||||
<author initials='W' surname='Lin' fullname='W. Lin'> | ||||
<organization/> | ||||
</author> | ||||
<author initials='J' surname='Rabadan' fullname='J. Rabadan'> | ||||
<organization/> | ||||
</author> | ||||
<author initials='K' surname='Patel' fullname='K. Patel'> | ||||
<organization/> | ||||
</author> | ||||
<author initials='A' surname='Sajassi' fullname='A. Sajassi'> | ||||
<organization/> | ||||
</author> | ||||
<date month='May' year='2024'/> | ||||
</front> | ||||
<seriesInfo name="RFC" value="9572"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC9572"/> | ||||
</reference> | ||||
Nischal Sheth | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.79 | |||
Juniper Networks | 02.xml"/> | |||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6 | ||||
513.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | ||||
365.xml"/> | ||||
</references> | ||||
<references> | ||||
<name>Informative References</name> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7 | ||||
348.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4 | ||||
023.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.7 | ||||
637.xml"/> | ||||
Aldrin Isaac | <!-- draft-ietf-bess-evpn-proxy-arp-nd (RFC 9161; published) --> | |||
Juniper | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
161.xml"/> | ||||
Mudassir Tufail | </references> | |||
Citibank | </references> | |||
]]></artwork> | ||||
</figure></t> | ||||
</section> | ||||
<section title="Acknowledgments"> | <section numbered="false" toc="default"> | |||
<t>The authors would like to thank Neil Hart, David Motz, Dai Truong, | <name>Acknowledgements</name> | |||
Thomas Morin, Jeffrey Zhang, Shankar Murthy and Krzysztof Szarkowicz for | <t>The authors would like to thank <contact fullname="Neil Hart"/>, <conta | |||
their valuable feedback and contributions. Also thanks to John Scudder | ct fullname="David Motz"/>, <contact fullname="Dai Truong"/>, | |||
for his thorough review that improved the quality of the document | <contact fullname="Thomas Morin"/>, <contact fullname="Jeffrey Zhang"/>, < | |||
contact | ||||
fullname="Shankar Murthy"/>, and <contact fullname="Krzysztof Szarkowicz"/> for | ||||
their valuable feedback and contributions. Also, thanks to <contact fullna | ||||
me="John Scudder"/> for his thorough review, which improved the quality of the d | ||||
ocument | ||||
significantly. </t> | significantly. </t> | |||
</section> | </section> | |||
</middle> | ||||
<back> | ||||
<references title="Normative References"> | ||||
&RFC2119; | ||||
&RFC8174; | ||||
&RFC6514; | ||||
&RFC7432; | ||||
&I-D.ietf-bess-evpn-bum-procedure-updates; | <section numbered="false" toc="default"> | |||
<name>Contributors</name> | ||||
<t>In addition to the authors listed on the front page, the following peop | ||||
le also contributed to this document and should be considered coauthors:</t> | ||||
&RFC7902; | <contact fullname="Wim Henderickx"> | |||
<organization>Nokia</organization> | ||||
</contact> | ||||
&RFC6513; | <contact fullname="Kiran Nagaraj"> | |||
<organization>Nokia</organization> | ||||
</contact> | ||||
&RFC8365; | <contact fullname="Ravi Shekhar"> | |||
</references> | <organization>Juniper Networks</organization> | |||
</contact> | ||||
<references title="Informative References"> | <contact fullname="Nischal Sheth"> | |||
&RFC7348; | <organization>Juniper Networks</organization> | |||
</contact> | ||||
&RFC4023; | <contact fullname="Aldrin Isaac"> | |||
<organization>Juniper</organization> | ||||
</contact> | ||||
&RFC7637; | <contact fullname="Mudassir Tufail"> | |||
<organization>Citibank</organization> | ||||
</contact> | ||||
&I-D.ietf-bess-evpn-proxy-arp-nd; | </section> | |||
</references> | ||||
</back> | </back> | |||
</rfc> | </rfc> | |||
End of changes. 241 change blocks. | ||||
1299 lines changed or deleted | 1427 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. |