rfc8926xml2.original.xml   rfc8926.xml 
<?xml version='1.0' encoding='utf-8'?> <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC0768 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.0768.xml">
<!ENTITY RFC0792 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.0792.xml">
<!ENTITY RFC1112 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.1112.xml">
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.2119.xml">
<!ENTITY RFC4443 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.4443.xml">
<!ENTITY RFC6936 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.6936.xml">
<!ENTITY RFC8126 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8126.xml">
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8174.xml">
<!ENTITY I-D.ietf-nvo3-encap SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml3
/reference.I-D.draft-ietf-nvo3-encap-05.xml">
<!ENTITY I-D.ietf-nvo3-dataplane-requirements SYSTEM "https://xml2rfc.ietf.org/p
ublic/rfc/bibxml3/reference.I-D.draft-ietf-nvo3-dataplane-requirements-03.xml">
<!ENTITY I-D.ietf-intarea-tunnels SYSTEM "https://xml2rfc.ietf.org/public/rfc/bi
bxml3/reference.I-D.draft-ietf-intarea-tunnels-10.xml">
<!--ENTITY IEEE.802.1Q_2014 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml6/
reference.IEEE.802.1Q_2014.xml"-->
<!ENTITY RFC1191 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.1191.xml">
<!ENTITY RFC2003 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.2003.xml">
<!ENTITY RFC8200 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8200.xml">
<!ENTITY RFC2983 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.2983.xml">
<!ENTITY RFC3031 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.3031.xml">
<!ENTITY RFC3552 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.3552.xml">
<!ENTITY RFC3985 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.3985.xml">
<!ENTITY RFC4301 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.4301.xml">
<!ENTITY RFC5374 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.5374.xml">
<!ENTITY RFC6040 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.6040.xml">
<!ENTITY RFC6335 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.6335.xml">
<!ENTITY RFC6438 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.6438.xml">
<!ENTITY RFC7348 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.7348.xml">
<!ENTITY RFC7365 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.7365.xml">
<!ENTITY RFC7637 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.7637.xml">
<!ENTITY RFC8014 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8014.xml">
<!ENTITY RFC8085 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8085.xml">
<!ENTITY RFC8086 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8086.xml">
<!ENTITY RFC8201 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8201.xml">
<!ENTITY RFC8293 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RF
C.8293.xml">
]>
<rfc submissionType="IETF" docName="draft-ietf-nvo3-geneve-16" category="std"><?
rfc compact="yes"?>
<?rfc text-list-symbols="o*+-"?>
<?rfc subcompact="no"?>
<?rfc sortrefs="yes"?>
<?rfc symrefs="yes"?>
<?rfc strict="yes"?>
<?rfc toc="yes"?>
<front>
<title abbrev="Geneve Protocol">Geneve: Generic Network Virtualization En
capsulation</title>
<author fullname="Jesse Gross" initials="J." role="editor" surname="Gross
">
<organization></organization>
<address><email>jesse@kernel.org</email>
</address>
</author>
<author fullname="Ilango Ganga" initials="I." role="editor" surname="Gang <!DOCTYPE rfc SYSTEM "rfc2629-xhtml.ent">
a">
<organization abbrev="Intel">Intel Corporation</organization>
<address><postal><street>2200 Mission College Blvd.</street>
<street>Santa Clara, CA 95054</street>
<street>USA</street>
</postal>
<email>ilango.s.ganga@intel.com</email>
</address>
</author>
<author fullname="T. Sridhar" initials="T." role="editor" surname="Sridha <rfc xmlns:xi="http://www.w3.org/2001/XInclude" ipr="trust200902" docName="draft
r"> -ietf-nvo3-geneve-16" number="8926" submissionType="IETF" category="std" consens
<organization abbrev="VMware">VMware, Inc.</organization> us="true" obsoletes="" updates="" xml:lang="en" sortRefs="true" symRefs="true" t
<address><postal><street>3401 Hillview Ave.</street> ocInclude="true" version="3">
<street>Palo Alto, CA 94304</street>
<street>USA</street>
</postal>
<email>tsridhar@vmware.com</email>
</address>
</author>
<date day="07" month="March" year="2020"/> <front>
<abstract><t> <title abbrev="Geneve Protocol">Geneve: Generic Network Virtualization Encap
sulation</title>
<seriesInfo name="RFC" value="8926"/>
<author fullname="Jesse Gross" initials="J." role="editor" surname="Gross">
<organization/>
<address>
<email>jesse@kernel.org</email>
</address>
</author>
<author fullname="Ilango Ganga" initials="I." role="editor" surname="Ganga">
<organization abbrev="Intel">Intel Corporation</organization>
<address>
<postal>
<street>2200 Mission College Blvd.</street>
<city>Santa Clara</city><region>CA</region><code>95054</code>
<country>United States of America</country>
</postal>
<email>ilango.s.ganga@intel.com</email>
</address>
</author>
<author fullname="T. Sridhar" initials="T." role="editor" surname="Sridhar">
<organization abbrev="VMware">VMware, Inc.</organization>
<address>
<postal>
<street>3401 Hillview Ave.</street>
<city>Palo Alto</city><region>CA</region><code>94304</code>
<country>United States of America</country>
</postal>
<email>tsridhar@utexas.edu</email>
</address>
</author>
<date month="November" year="2020"/>
<keyword>overlay</keyword>
<keyword>tunnel</keyword>
<keyword>extensible</keyword>
<keyword>variable</keyword>
<keyword>metadata</keyword>
<keyword>options</keyword>
<keyword>endpoint</keyword>
<keyword>transit</keyword>
<abstract>
<t>
Network virtualization involves the cooperation of devices with a Network virtualization involves the cooperation of devices with a
wide variety of capabilities such as software and hardware tunnel wide variety of capabilities such as software and hardware tunnel
endpoints, transit fabrics, and centralized control clusters. As a endpoints, transit fabrics, and centralized control clusters. As a
result of their role in tying together different elements in the result of their role in tying together different elements of the
system, the requirements on tunnels are influenced by all of these system, the requirements on tunnels are influenced by all of these
components. Flexibility is therefore the most important aspect of a components. Therefore, flexibility is the most important aspect of a
tunnel protocol if it is to keep pace with the evolution of the tunneling protocol if it is to keep pace with the evolution of technology.
system. This document describes Geneve, an encapsulation protocol designed t This document describes Geneve, an encapsulation protocol designed to
o
recognize and accommodate these changing capabilities and needs.</t> recognize and accommodate these changing capabilities and needs.</t>
</abstract>
</abstract> </front>
</front> <middle>
<section anchor="sec-1" numbered="true" toc="default">
<middle> <name>Introduction</name>
<section title="Introduction" anchor="section-1"><t> <t>
Networking has long featured a variety of tunneling, tagging, and Networking has long featured a variety of tunneling, tagging, and
other encapsulation mechanisms. However, the advent of network other encapsulation mechanisms. However, the advent of network
virtualization has caused a surge of renewed interest and a virtualization has caused a surge of renewed interest and a
corresponding increase in the introduction of new protocols. The corresponding increase in the introduction of new protocols. The
large number of protocols in this space, for example, ranging all the way fro large number of protocols in this space -- for example, ranging all the way f
m rom
VLANs <xref target="IEEE.802.1Q_2018"/> and MPLS <xref target="RFC3031"/> thr VLANs <xref target="IEEE.802.1Q_2018" format="default"/> and MPLS <xref targe
ough the more recent t="RFC3031" format="default"/> through the more recent
VXLAN <xref target="RFC7348"/> (Virtual eXtensible Local Area Network) VXLAN (Virtual eXtensible Local Area Network) <xref target="RFC7348" format=
and NVGRE <xref target="RFC7637"/> (Network Virtualization Using Generic Rout "default"/>
ing Encapsulation), often and NVGRE (Network Virtualization
Using Generic Routing Encapsulation) <xref target="RFC7637"
format="default"/> -- often
leads to questions about the need for new encapsulation formats and leads to questions about the need for new encapsulation formats and
what it is about network virtualization in particular that leads to what it is about network virtualization in particular that leads to
their proliferation. Note that the list of protocols presented above is non-e xhaustive.</t> their proliferation. Note that the list of protocols presented above is non-e xhaustive.</t>
<t>
<t>
While many encapsulation protocols seek to simply partition the While many encapsulation protocols seek to simply partition the
underlay network or bridge between two domains, network underlay network or bridge two domains, network
virtualization views the transit network as providing connectivity virtualization views the transit network as providing connectivity
between multiple components of a distributed system. In many ways between multiple components of a distributed system. In many ways,
this system is similar to a chassis switch with the IP underlay this system is similar to a chassis switch with the IP underlay
network playing the role of the backplane and tunnel endpoints on the network playing the role of the backplane and tunnel endpoints on the
edge as line cards. When viewed in this light, the requirements edge as line cards. When viewed in this light, the requirements
placed on the tunnel protocol are significantly different in terms of placed on the tunneling protocol are significantly different in terms of
the quantity of metadata necessary and the role of transit nodes.</t> the quantity of metadata necessary and the role of transit nodes.</t>
<t>
<t> Work such as "VL2: A Scalable and Flexible Data Center Network" <xref target=
Work such as <xref target="VL2"/> (A Scalable and Flexible Data Center Networ "VL2" format="default"/> and "NVO3 Data Plane Requirements" <xref target="I-D.ie
k) tf-nvo3-dataplane-requirements" format="default"/>
and the NVO3 Data Plane Requirements <xref target="I-D.ietf-nvo3-dataplane-re
quirements"/>
have described some of the properties that the data plane must have to suppor t network have described some of the properties that the data plane must have to suppor t network
virtualization. However, one additional defining requirement is the virtualization. However, one additional defining requirement is the
need to carry metadata (e.g. system state) along with the packet data; need to carry metadata (e.g., system state) along with the packet data;
example use cases of metadata are noted below. The use of example use cases of metadata are noted below. The use of
some metadata is certainly not a foreign concept - nearly all some metadata is certainly not a foreign concept -- nearly all
protocols used for network virtualization have at least 24 bits of identifier protocols used for network virtualization have at least 24 bits of identifier
space as a way to partition between tenants. This is often described space as a way to partition between tenants. This is often described
as overcoming the limits of 12-bit VLANs, and when seen in that as overcoming the limits of 12-bit VLANs; when seen in that
context, or any context where it is a true tenant identifier, 16 context or any context where it is a true tenant identifier, 16
million possible entries is a large number. However, the reality is million possible entries is a large number. However, the reality is
that the metadata is not exclusively used to identify tenants and that the metadata is not exclusively used to identify tenants, and
encoding other information quickly starts to crowd the space. In encoding other information quickly starts to crowd the space. In
fact, when compared to the tags used to exchange metadata between fact, when compared to the tags used to exchange metadata between
line cards on a chassis switch, 24-bit identifiers start to look line cards on a chassis switch, 24-bit identifiers start to look
quite small. There are nearly endless uses for this metadata, quite small. There are nearly endless uses for this metadata,
ranging from storing input port identifiers for simple security policies to ranging from storing input port identifiers for simple security policies to
sending service based context for advanced middlebox applications sending service-based context for advanced middlebox applications
that terminate and re-encapsulate Geneve traffic.</t> that terminate and re-encapsulate Geneve traffic.</t>
<t>
<t> Existing tunneling protocols have each attempted to solve different
Existing tunnel protocols have each attempted to solve different aspects of these new requirements only to be quickly rendered out of
aspects of these new requirements, only to be quickly rendered out of
date by changing control plane implementations and advancements. date by changing control plane implementations and advancements.
Furthermore, software and hardware components and controllers all Furthermore, software and hardware components and controllers all
have different advantages and rates of evolution - a fact that should have different advantages and rates of evolution -- a fact that should
be viewed as a benefit, not a liability or limitation. This draft be viewed as a benefit, not a liability or limitation. This document describ
describes Geneve, a protocol which seeks to avoid these problems by es Geneve, a protocol that seeks to avoid these problems by
providing a framework for tunneling for network virtualization rather providing a framework for tunneling for network virtualization rather
than being prescriptive about the entire system.</t> than being prescriptive about the entire system.</t>
<section anchor="sec-1.1" numbered="true" toc="default">
<section title="Requirements Language" anchor="section-1.1"><t> <name>Requirements Language</name>
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", <t>
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONA The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQUI
L" in this RED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>",
document are to be interpreted as described in BCP 14 <xref target="RFC2119"/ "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", "<bcp14>RECOMMENDED</bc
> p14>", "<bcp14>NOT RECOMMENDED</bcp14>", "<bcp14>MAY</bcp14>", and "<bcp14>OPTIO
<xref target="RFC8174"/> when, and only when, they appear in all NAL</bcp14>" in this
document are to be interpreted as described in BCP 14 <xref target="RFC2119"
format="default"/>
<xref target="RFC8174" format="default"/> when, and only when, they ap
pear in all
capitals, as shown here.</t> capitals, as shown here.</t>
</section>
</section> <section anchor="sec-1.2" numbered="true" toc="default">
<name>Terminology</name>
<section title="Terminology" anchor="section-1.2"><t> <t>
The NVO3 Framework <xref target="RFC7365"/> defines many of the concepts comm The Network
only Virtualization over Layer 3 (NVO3) Framework <xref target="RFC7365" format="d
efault"/> defines many of the concepts commonly
used in network virtualization. In addition, the following terms are used in network virtualization. In addition, the following terms are
specifically meaningful in this document:</t> specifically meaningful in this document:</t>
<dl newline="false" spacing="normal">
<t> <dt>Checksum offload:</dt>
Checksum offload. An optimization implemented by many NICs (Network Interfac <dd>An optimization implemented by many NICs (Network Interface Controllers)
e Controller) which that enables computation and verification of upper-layer protocol
enables computation and verification of upper layer protocol
checksums in hardware on transmit and receive, respectively. This checksums in hardware on transmit and receive, respectively. This
typically includes IP and TCP/UDP checksums which would otherwise be typically includes IP and TCP/UDP checksums that would otherwise be
computed by the protocol stack in software.</t> computed by the protocol stack in software.</dd>
<t> <dt>Clos network:</dt> <dd>A technique for composing network fabrics larger tha
Clos network. A technique for composing network fabrics larger than n
a single switch while maintaining non-blocking bandwidth across a single switch while maintaining non-blocking bandwidth across
connection points. ECMP is used to divide traffic across the connection points. ECMP is used to divide traffic across the
multiple links and switches that constitute the fabric. Sometimes multiple links and switches that constitute the fabric. Sometimes
termed "leaf and spine" or "fat tree" topologies.</t> termed "leaf and spine" or "fat tree" topologies.</dd>
<t> <dt>ECMP:</dt>
ECMP. Equal Cost Multipath. A routing mechanism for selecting from <dd>Equal Cost Multipath. A routing mechanism for selecting from
among multiple best next hop paths by hashing packet headers in order among multiple best next-hop paths by hashing packet headers in order
to better utilize network bandwidth while avoiding reordering of packets to better utilize network bandwidth while avoiding reordering of packets
within a flow.</t> within a flow.</dd>
<t> <dt>Geneve:</dt><dd>Generic Network Virtualization Encapsulation. The tunneling
Geneve. Generic Network Virtualization Encapsulation. The tunnel protocol described in this document.</dd>
protocol described in this document.</t>
<t> <dt>LRO:</dt><dd>Large Receive Offload. The receiver-side equivalent function
LRO. Large Receive Offload. The receive-side equivalent function of of LSO, in which multiple protocol segments (primarily TCP) are coalesced into
LSO, in which multiple protocol segments (primarily TCP) are larger data units.</dd>
coalesced into larger data units.</t>
<t> <dt>LSO:</dt><dd> Large Segmentation Offload. A function provided by many
LSO. Large Segmentation Offload. A function provided by many
commercial NICs that allows data units larger than the MTU to be commercial NICs that allows data units larger than the MTU to be
passed to the NIC to improve performance, the NIC being responsible passed to the NIC to improve performance, the NIC being responsible
for creating smaller segments of size less than or equal to the MTU for creating smaller segments of a size less than or equal to the MTU
with correct protocol headers. When referring specifically to TCP/ with correct protocol headers. When referring specifically to TCP/IP, this
IP, this feature is often known as TSO (TCP Segmentation Offload).</t> feature is often known as TSO (TCP Segmentation Offload).</dd>
<dt>
<t> Middlebox:</dt><dd> In the context of this document, the term "middlebox" re
Middlebox. The term middlebox in the context of this document refers to netw fers to network
ork service functions or service interposition appliances that typically implemen
service functions or appliances for service interposition that would typicall t tunnel endpoint functionality, terminating and re-encapsulating Geneve traffic
y .</dd>
implement NVE functionality, which terminate or re-encapsulate Geneve traffic <dt>NIC:</dt><dd>Network Interface Controller. Also called "Network Inte
.</t> rface Card" or "Network Adapter".
A NIC could be part of a tunnel endpoint or transit device and can either
<t> process or aid in the processing of Geneve packets.</dd>
NIC. Network Interface Controller. Also called as Network Interface Card or <dt>
Network Adapter. Transit device:</dt> <dd> A forwarding element (e.g., router or switch) along
A NIC could be part of a tunnel endpoint or transit device and can either pro the path of the tunnel
cess Geneve packets or making up part of the underlay network. A transit device may be
aid in the processing of Geneve packets.</t>
<t>
Transit device. A forwarding element (e.g. router or switch) along the path
of the tunnel
making up part of the Underlay Network. A transit device may be
capable of understanding the Geneve packet format but does not capable of understanding the Geneve packet format but does not
originate or terminate Geneve packets.</t> originate or terminate Geneve packets.</dd>
<dt>
<t> Tunnel endpoint:</dt><dd> A component performing encapsulation and
Tunnel endpoint. A component performing encapsulation and
decapsulation of packets, such as Ethernet frames or IP datagrams, in decapsulation of packets, such as Ethernet frames or IP datagrams, in
Geneve headers. As the ultimate consumer of any tunnel metadata, Geneve headers. As the ultimate consumer of any tunnel metadata,
tunnel endpoints have the highest level of requirements for parsing and tunnel endpoints have the highest level of requirements for parsing and
interpreting tunnel headers. Tunnel endpoints may consist of either interpreting tunnel headers. Tunnel endpoints may consist of either
software or hardware implementations or a combination of the two. software or hardware implementations or a combination of the two.
Tunnel endpoints are frequently a component of an NVE (Network Virtualization Tunnel endpoints are frequently a component of a Network Virtualization Edge
Edge) (NVE)
but may also be found in middleboxes or other elements making up an NVO3 Netw but may also be found in middleboxes or other elements making up an NVO3 netw
ork.</t> ork.</dd>
<dt>VM:</dt><dd>Virtual Machine.</dd>
<t> </dl>
VM. Virtual Machine.</t> </section>
</section>
</section> <section anchor="sec-2" numbered="true" toc="default">
<name>Design Requirements</name>
</section> <t>
Geneve is designed to support network virtualization use cases for data cente
<section title="Design Requirements" anchor="section-2"><t> r environments. In these situations,
Geneve is designed to support network virtualization use cases for data cente
r environments, where
tunnels are typically established to act as a backplane between the tunnels are typically established to act as a backplane between the
virtual switches residing in hypervisors, physical switches, or virtual switches residing in hypervisors, physical switches, or
middleboxes or other appliances. An arbitrary IP network can be used middleboxes or other appliances. An arbitrary IP network can be used
as an underlay although Clos networks composed using ECMP links are a as an underlay, although Clos networks composed using ECMP links are a
common choice to provide consistent bisectional bandwidth across all common choice to provide consistent bisectional bandwidth across all
connection points. Many of the concepts of network virtualization overlays connection points. Many of the concepts of network virtualization overlays
over Layer 3 IP networks are described in the NVO3 Framework <xref target="RF over IP networks are described in the NVO3 Framework <xref target="RFC7365" f
C7365"/>. ormat="default"/>.
Figure 1 shows an example of a hypervisor, top of <xref target="ref-sample-geneve-deployment"/> shows an example of a
rack switch for connectivity to physical servers, and a WAN uplink hypervisor, a top-of-rack switch for connectivity to physical servers, and a
WAN uplink
connected using Geneve tunnels over a simplified Clos network. These connected using Geneve tunnels over a simplified Clos network. These
tunnels are used to encapsulate and forward frames from the attached tunnels are used to encapsulate and forward frames from the attached
components such as VMs or physical links.</t> components, such as VMs or physical links.</t>
<figure anchor="ref-sample-geneve-deployment">
<figure title="Sample Geneve Deployment" anchor="ref-sample-geneve-deploy <name>Sample Geneve Deployment</name>
ment"><artwork><![CDATA[ <artwork name="" type="" align="left" alt=""><![CDATA[
+---------------------+ +-------+ +------+ +---------------------+ +-------+ +------+
| +--+ +-------+---+ | |Transit|--|Top of|==Physical | +--+ +-------+---+ | |Transit|--|Top of|==Physical
| |VM|--| | | | +------+ /|Router | | Rack |==Servers | |VM|--| | | | +------+ /|Router | | Rack |==Servers
| +--+ |Virtual|NIC|---|Top of|/ +-------+\/+------+ | +--+ |Virtual|NIC|---|Top of|/ +-------+\/+------+
| +--+ |Switch | | | | Rack |\ +-------+/\+------+ | +--+ |Switch | | | | Rack |\ +-------+/\+------+
| |VM|--| | | | +------+ \|Transit| |Uplink| WAN | |VM|--| | | | +------+ \|Transit| |Uplink| WAN
| +--+ +-------+---+ | |Router |--| |=========> | +--+ +-------+---+ | |Router |--| |=========>
+---------------------+ +-------+ +------+ +---------------------+ +-------+ +------+
Hypervisor Hypervisor
()===================================() ()===================================()
Switch-Switch Geneve Tunnels Switch-Switch Geneve Tunnels
]]></artwork> ]]></artwork>
</figure> </figure>
<t>
To support the needs of network virtualization, the tunnel protocol <t>
To support the needs of network virtualization, the tunneling protocol
should be able to take advantage of the differing (and evolving) should be able to take advantage of the differing (and evolving)
capabilities of each type of device in both the underlay and overlay capabilities of each type of device in both the underlay and overlay
networks. This results in the following requirements being placed on networks. This results in the following requirements being placed on
the data plane tunneling protocol:</t> the data plane tunneling protocol:</t>
<ul spacing="normal">
<t><list style="symbols"><t>The data plane is generic and extensible enou <li>The data plane is generic and extensible enough to support current
gh to support current and future control planes.</li>
and future control planes.</t> <li>Tunnel components are efficiently implementable in both hardware
<t>Tunnel components are efficiently implementable in both hardware
and software without restricting capabilities to the lowest common and software without restricting capabilities to the lowest common
denominator.</t> denominator.</li>
<li>High performance over existing IP fabrics is maintained.</li>
<t>High performance over existing IP fabrics.</t> </ul>
<t>
</list>
</t>
<t>
These requirements are described further in the following These requirements are described further in the following
subsections.</t> subsections.</t>
<section anchor="sec-2.1" numbered="true" toc="default">
<section title="Control Plane Independence" anchor="section-2.1"><t> <name>Control Plane Independence</name>
<t>
Although some protocols for network virtualization have included a Although some protocols for network virtualization have included a
control plane as part of the tunnel format specification (most control plane as part of the tunnel format specification (most
notably, VXLAN <xref target="RFC7348"/> prescribed a multicast learning- notably, VXLAN <xref target="RFC7348" format="default"/> prescribed a multica
based control plane), these specifications have largely been treated st-learning-based control plane), these specifications have largely been treated
as describing only the data format. The VXLAN packet format has as describing only the data format. The VXLAN packet format has
actually seen a wide variety of control planes built on top of it.</t> actually seen a wide variety of control planes built on top of it.</t>
<t>
<t>
There is a clear advantage in settling on a data format: most of the There is a clear advantage in settling on a data format: most of the
protocols are only superficially different and there is little protocols are only superficially different and there is little
advantage in duplicating effort. However, the same cannot be said of advantage in duplicating effort. However, the same cannot be said of
control planes, which are diverse in very fundamental ways. The case control planes, which are diverse in very fundamental ways. The case
for standardization is also less clear given the wide variety in for standardization is also less clear given the wide variety in
requirements, goals, and deployment scenarios.</t> requirements, goals, and deployment scenarios.</t>
<t>
<t>
As a result of this reality, Geneve is a pure tunnel format As a result of this reality, Geneve is a pure tunnel format
specification that is capable of fulfilling the needs of many control specification that is capable of fulfilling the needs of many control
planes by explicitly not selecting any one of them. This planes by explicitly not selecting any one of them. This
simultaneously promotes a shared data format and reduces the simultaneously promotes a shared data format and reduces the
chance of obsolescence by future control plane chance of obsolescence by future control plane
enhancements.</t> enhancements.</t>
</section>
</section> <section anchor="sec-2.2" numbered="true" toc="default">
<name>Data Plane Extensibility</name>
<section title="Data Plane Extensibility" anchor="section-2.2"><t> <t>
Achieving the level of flexibility needed to support current and Achieving the level of flexibility needed to support current and
future control planes effectively requires an options infrastructure future control planes effectively requires an options infrastructure
to allow new metadata types to be defined, deployed, and either to allow new metadata types to be defined, deployed, and either
finalized or retired. Options also allow for differentiation of finalized or retired. Options also allow for differentiation of
products by encouraging independent development in each vendor's core products by encouraging independent development in each vendor's core
specialty, leading to an overall faster pace of advancement. By far specialty, leading to an overall faster pace of advancement. By far,
the most common mechanism for implementing options is Type-Length- the most common mechanism for implementing options is the Type-Length-Value (
Value (TLV) format.</t> TLV) format.</t>
<t> <t>
It should be noted that while options can be used to support non- It should be noted that, while options can be used to support non-wirespeed
wirespeed control packets, they are equally important on data packets control packets, they are equally important in data packets
as well to segregate and direct forwarding (for instance, the as well for segregating and directing forwarding. (For instance, the
examples given before of input port based security policies and examples given before regarding input-port-based security policies and
terminating/re-encapsulating service interposition both require tags to be pl terminating/re-encapsulating service interposition both require tags
aced on data to be placed on data packets.) Therefore, while it would be desirable to lim
packets). Therefore, while it would be desirable to limit the it the
extensibility to only control packets for the purposes of simplifying extensibility to only control packets for the purposes of simplifying
the datapath, that would not satisfy the design requirements.</t> the datapath, that would not satisfy the design requirements.</t>
<section anchor="sec-2.2.1" numbered="true" toc="default">
<section title="Efficient Implementation" anchor="section-2.2.1"><t> <name>Efficient Implementation</name>
<t>
There is often a conflict between software flexibility and hardware There is often a conflict between software flexibility and hardware
performance that is difficult to resolve. For a given set of performance that is difficult to resolve. For a given set of
functionality, it is obviously desirable to maximize performance. functionality, it is obviously desirable to maximize performance.
However, that does not mean new features that cannot be run at a desired However, that does not mean new features that cannot be run at a desired
speed today should be disallowed. Therefore, for a protocol to be speed today should be disallowed. Therefore, for a protocol to be considered
efficiently implementable means that a set of common capabilities can efficiently implementable, it is expected to have a set of common capabilitie
be reasonably handled across platforms along with a graceful s that can
be reasonably handled across platforms as well as a graceful
mechanism to handle more advanced features in the appropriate mechanism to handle more advanced features in the appropriate
situations.</t> situations.</t>
<t> <t>
The use of a variable length header and options in a protocol often The use of a variable-length header and options in a protocol often
raises questions about whether it is truly efficiently implementable raises questions about whether the protocol is truly efficiently
in hardware. To answer this question in the context of Geneve, it is implementable in hardware. To answer this question in the context of Geneve,
it is
important to first divide "hardware" into two categories: tunnel important to first divide "hardware" into two categories: tunnel
endpoints and transit devices.</t> endpoints and transit devices.</t>
<t>
<t> Tunnel endpoints must be able to parse the variable-length header, including
Tunnel endpoints must be able to parse the variable header, including any any
options, and take action. Since these devices are actively options, and take action. Since these devices are actively
participating in the protocol, they are the most affected by Geneve. participating in the protocol, they are the most affected by Geneve.
However, as tunnel endpoints are the ultimate consumers of the data, However, as tunnel endpoints are the ultimate consumers of the data,
transmitters can tailor their output to the capabilities of the transmitters can tailor their output to the capabilities of the
recipient.</t> recipient.</t>
<t> <t>
Transit devices may be able to interpret the options, however, Transit devices may be able to interpret the options; however,
as non-terminating devices, transit devices as non-terminating devices, transit devices
do not originate or terminate the Geneve packet, hence MUST NOT modify Geneve do not originate or terminate the Geneve packet. Hence, they <bcp14>MUST NOT<
headers and /bcp14> modify Geneve headers and
MUST NOT insert or delete options, which is the responsibility of tunnel endp <bcp14>MUST NOT</bcp14> insert or delete options, as that is the responsibili
oints. ty of tunnel endpoints.
Options, if present in the packet, MUST only be generated and terminated by t Options, if present in the packet, <bcp14>MUST</bcp14> only be generated and
unnel endpoints. terminated by tunnel endpoints.
The participation of transit devices in interpreting options is The participation of transit devices in interpreting options is
OPTIONAL.</t> <bcp14>OPTIONAL</bcp14>.</t>
<t>
<t> Further, either tunnel endpoints or transit devices <bcp14>MAY</bcp14> use of
Further, either tunnel endpoints or transit devices MAY use offload fload
capabilities of NICs such as checksum offload to improve the capabilities of NICs, such as checksum offload, to improve the
performance of Geneve packet processing. The presence of a Geneve performance of Geneve packet processing. The presence of a Geneve
variable length header should not prevent the tunnel endpoints and variable-length header should not prevent the tunnel endpoints and
transit devices from using such offload capabilities.</t> transit devices from using such offload capabilities.</t>
</section>
</section> </section>
<section anchor="sec-2.3" numbered="true" toc="default">
</section> <name>Use of Standard IP Fabrics</name>
<t>
<section title="Use of Standard IP Fabrics" anchor="section-2.3"><t> IP has clearly cemented its place as the dominant transport mechanism,
IP has clearly cemented its place as the dominant transport mechanism
and many techniques have evolved over time to make it robust, and many techniques have evolved over time to make it robust,
efficient, and inexpensive. As a result, it is natural to use IP efficient, and inexpensive. As a result, it is natural to use IP
fabrics as a transit network for Geneve. Fortunately, the use of IP fabrics as a transit network for Geneve. Fortunately, the use of IP
encapsulation and addressing is enough to achieve the primary goal of encapsulation and addressing is enough to achieve the primary goal of
delivering packets to the correct point in the network through delivering packets to the correct point in the network through
standard switching and routing.</t> standard switching and routing.</t>
<t>
<t>
In addition, nearly all underlay fabrics are designed to exploit In addition, nearly all underlay fabrics are designed to exploit
parallelism in traffic to spread load across multiple links without parallelism in traffic to spread load across multiple links without
introducing reordering in individual flows. These equal cost introducing reordering in individual flows. These ECMP techniques typically
multipathing (ECMP) techniques typically involve parsing and hashing involve parsing and hashing
the addresses and port numbers from the packet to select an outgoing the addresses and port numbers from the packet to select an outgoing
link. However, the use of tunnels often results in poor ECMP link. However, the use of tunnels often results in poor ECMP
performance without additional knowledge of the protocol as the performance, as without additional knowledge of the protocol, the
encapsulated traffic is hidden from the fabric by design and only encapsulated traffic is hidden from the fabric by design, and only
tunnel endpoint addresses are available for hashing.</t> tunnel endpoint addresses are available for hashing.</t>
<t>
<t>
Since it is desirable for Geneve to perform well on these existing Since it is desirable for Geneve to perform well on these existing
fabrics, it is necessary for entropy from encapsulated packets to be fabrics, it is necessary for entropy from encapsulated packets to be
exposed in the tunnel header. The most common technique for this is exposed in the tunnel header. The most common technique for this is
to use the UDP source port, which is discussed further in to use the UDP source port, which is discussed further in
<xref target="section-3.3"/>.</t> <xref target="sec-3.3" format="default"/>.</t>
</section>
</section> </section>
<section anchor="sec-3" numbered="true" toc="default">
</section> <name>Geneve Encapsulation Details</name>
<t>
<section title="Geneve Encapsulation Details" anchor="section-3"><t>
The Geneve packet format consists of a compact tunnel header The Geneve packet format consists of a compact tunnel header
encapsulated in UDP over either IPv4 or IPv6. A small fixed tunnel encapsulated in UDP over either IPv4 or IPv6. A small fixed tunnel
header provides control information plus a base level of header provides control information plus a base level of
functionality and interoperability with a focus on simplicity. This functionality and interoperability with a focus on simplicity. This
header is then followed by a set of variable options to allow for header is then followed by a set of variable-length options to allow for
future innovation. Finally, the payload consists of a protocol data future innovation. Finally, the payload consists of a protocol data
unit of the indicated type, such as an Ethernet frame. <xref target="section unit of the indicated type, such as an Ethernet frame. Sections <xref target=
-3.1"/> "sec-3.1" format="counter"/>
and <xref target="section-3.2"/> illustrate the Geneve packet format transpor and <xref target="sec-3.2" format="counter"/> illustrate the Geneve packet fo
ted (for rmat transported (for
example) over Ethernet along with an Ethernet payload.</t> example) over Ethernet along with an Ethernet payload.</t>
<section anchor="sec-3.1" numbered="true" toc="default">
<section title="Geneve Packet Format Over IPv4" anchor="section-3.1"> <name>Geneve Packet Format over IPv4</name>
<figure>
<figure><artwork><![CDATA[ <name>Geneve Packet Format over IPv4</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
0 1 2 3 0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
Outer Ethernet Header: Outer Ethernet Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | | Outer Destination MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | Outer Source MAC Address | | Outer Destination MAC Address | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Source MAC Address | | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
skipping to change at line 430 skipping to change at line 386
Outer Ethernet Header: Outer Ethernet Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | | Outer Destination MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | Outer Source MAC Address | | Outer Destination MAC Address | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Source MAC Address | | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Ethertype=0x0800 | | Ethertype = 0x0800 IPv4 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Outer IPv4 Header: Outer IPv4 Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| IHL |Type of Service| Total Length | |Version| IHL |Type of Service| Total Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification |Flags| Fragment Offset | | Identification |Flags| Fragment Offset |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time to Live |Protocol=17 UDP| Header Checksum | | Time to Live |Protocol=17 UDP| Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Source IPv4 Address | | Outer Source IPv4 Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination IPv4 Address | | Outer Destination IPv4 Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Outer UDP Header: Outer UDP Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Port = xxxx | Dest Port = 6081 | | Source Port = xxxx | Dest Port = 6081 Geneve |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| UDP Length | UDP Checksum | | UDP Length | UDP Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Geneve Header: Geneve Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Ver| Opt Len |O|C| Rsvd. | Protocol Type | |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Virtual Network Identifier (VNI) | Reserved | | Virtual Network Identifier (VNI) | Reserved |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Variable Length Options | | |
~ ~ ~ Variable-Length Options ~
| | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Inner Ethernet Header (example payload): Inner Ethernet Header (example payload):
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Inner Destination MAC Address | | Inner Destination MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Inner Destination MAC Address | Inner Source MAC Address | | Inner Destination MAC Address | Inner Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Inner Source MAC Address | | Inner Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | |Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Payload: Payload:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Ethertype of Original Payload | | | Ethertype of Original Payload | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| Original Ethernet Payload | | Original Ethernet Payload |
| | | |
| (Note that the original Ethernet Frame's Preamble, Start Frame| ~ (Note that the original Ethernet frame's preamble, start ~
| Delimiter(SFD) & Frame Check Sequence(FCS) are not included | | frame delimiter (SFD), and frame check sequence (FCS) are not |
| and the Ethernet Payload need not be 4-byte aligned) | | included, and the Ethernet payload need not be 4-byte aligned)|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Frame Check Sequence: Frame Check Sequence:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| New Frame Check Sequence (FCS) for Outer Ethernet Frame | | New Frame Check Sequence (FCS) for Outer Ethernet Frame |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
]]></artwork> ]]></artwork>
</figure> </figure>
</section> </section>
<section anchor="sec-3.2" numbered="true" toc="default">
<section title="Geneve Packet Format Over IPv6" anchor="section-3.2"> <name>Geneve Packet Format over IPv6</name>
<figure><name>Geneve Packet Format over IPv6</name>
<figure><artwork><![CDATA[ <artwork name="" type="" align="left" alt=""><![CDATA[
0 1 2 3 0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
Outer Ethernet Header: Outer Ethernet Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | | Outer Destination MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | Outer Source MAC Address | | Outer Destination MAC Address | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Source MAC Address | | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
skipping to change at line 509 skipping to change at line 466
Outer Ethernet Header: Outer Ethernet Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | | Outer Destination MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Destination MAC Address | Outer Source MAC Address | | Outer Destination MAC Address | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Outer Source MAC Address | | Outer Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information | |Optional Ethertype=C-Tag 802.1Q| Outer VLAN Tag Information |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Ethertype=0x86DD | | Ethertype = 0x86DD IPv6 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Outer IPv6 Header: Outer IPv6 Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Version| Traffic Class | Flow Label | |Version| Traffic Class | Flow Label |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Payload Length | NxtHdr=17 UDP | Hop Limit | | Payload Length | NxtHdr=17 UDP | Hop Limit |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| | | |
+ + + +
skipping to change at line 537 skipping to change at line 494
+ + + +
| | | |
+ Outer Destination IPv6 Address + + Outer Destination IPv6 Address +
| | | |
+ + + +
| | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Outer UDP Header: Outer UDP Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Source Port = xxxx | Dest Port = 6081 | | Source Port = xxxx | Dest Port = 6081 Geneve |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| UDP Length | UDP Checksum | | UDP Length | UDP Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Geneve Header: Geneve Header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Ver| Opt Len |O|C| Rsvd. | Protocol Type | |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Virtual Network Identifier (VNI) | Reserved | | Virtual Network Identifier (VNI) | Reserved |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Variable Length Options | | |
~ ~ ~ Variable-Length Options ~
| | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Inner Ethernet Header (example payload): Inner Ethernet Header (example payload):
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Inner Destination MAC Address | | Inner Destination MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Inner Destination MAC Address | Inner Source MAC Address | | Inner Destination MAC Address | Inner Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Inner Source MAC Address | | Inner Source MAC Address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information | |Optional Ethertype=C-Tag 802.1Q| Inner VLAN Tag Information |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Payload: Payload:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Ethertype of Original Payload | | | Ethertype of Original Payload | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
| Original Ethernet Payload | | Original Ethernet Payload |
| | | |
| (Note that the original Ethernet Frame's Preamble, Start Frame| ~ (Note that the original Ethernet frame's preamble, start ~
| Delimiter(SFD) & Frame Check Sequence(FCS) are not included | | frame delimiter (SFD), and frame check sequence (FCS) are not |
| and the Ethernet Payload need not be 4-byte aligned) | | included, and the Ethernet payload need not be 4-byte aligned)|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Frame Check Sequence: Frame Check Sequence:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| New Frame Check Sequence (FCS) for Outer Ethernet Frame | | New Frame Check Sequence (FCS) for Outer Ethernet Frame |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
]]></artwork> ]]></artwork>
</figure> </figure>
</section> </section>
<section anchor="sec-3.3" numbered="true" toc="default">
<section title="UDP Header" anchor="section-3.3"><t> <name>UDP Header</name>
The use of an encapsulating UDP <xref target="RFC0768"/> header follows the <t>
The use of an encapsulating UDP <xref target="RFC0768" format="default"/> hea
der follows the
connectionless semantics of Ethernet and IP in addition to providing connectionless semantics of Ethernet and IP in addition to providing
entropy to routers performing ECMP. The header fields are therefore entropy to routers performing ECMP. Therefore, header fields are
interpreted as follows:</t> interpreted as follows:</t>
<dl newline="false" spacing="normal" indent="3">
<t><list style="hanging" hangIndent="3"><t hangText="Source port:"> <dt>Source Port:</dt>
A source port selected by the originating tunnel <dd>
<vspace blankLines="0"/> <t>
endpoint. This source port SHOULD be the same for all packets A source port selected by the originating tunnel endpoint. This source p
ort <bcp14>SHOULD</bcp14> be the same for all packets
belonging to a single encapsulated flow to prevent reordering due belonging to a single encapsulated flow to prevent reordering due
to the use of different paths. To encourage an even distribution to the use of different paths. To encourage an even distribution
of flows across multiple links, the source port SHOULD be of flows across multiple links, the source port <bcp14>SHOULD</bcp14> be
calculated using a hash of the encapsulated packet headers using, calculated using a hash of the encapsulated packet headers using,
for example, a traditional 5-tuple. Since the port represents a for example, a traditional 5-tuple. Since the port represents a
flow identifier rather than a true UDP connection, the entire flow identifier rather than a true UDP connection, the entire
16-bit range MAY be used to maximize entropy. In addition to setting the s 16-bit range <bcp14>MAY</bcp14> be used to maximize entropy. In addition t
ource port, o setting the source port,
for IPv6, flow label MAY also be used for providing entropy. For an exampl for IPv6, the flow label <bcp14>MAY</bcp14> also be used for providing ent
e of ropy. For an example of
using IPv6 flow label for tunnel use cases, see <xref target="RFC6438"/>. using the IPv6 flow label for tunnel use cases, see <xref target="RFC6438"
<vspace blankLines="1"/> format="default"/>.
</t>
<t>
If Geneve traffic is shared with other UDP listeners If Geneve traffic is shared with other UDP listeners
on the same IP address, tunnel endpoints SHOULD implement a mechanism on the same IP address, tunnel endpoints <bcp14>SHOULD</bcp14> implement a mechanism
to ensure ICMP return traffic arising from network errors is directed to ensure ICMP return traffic arising from network errors is directed
to the correct listener. The definition of such a mechanism is beyond to the correct listener. The definition of such a mechanism is beyond
the scope of this document. the scope of this document.
</t> </t>
</dd>
<t hangText="Dest port:"> <dt>Dest Port:</dt>
IANA has assigned port 6081 as the fixed well-known <dd>
<vspace blankLines="0"/> <t>
destination port for Geneve. Although the well-known value should IANA has assigned port 6081 as the fixed well-known destination port
be used by default, it is RECOMMENDED that implementations make for Geneve. Although the well-known value should be used by default, it
is <bcp14>RECOMMENDED</bcp14> that implementations make
this configurable. The chosen port is used for identification of this configurable. The chosen port is used for identification of
Geneve packets and MUST NOT be reversed for different ends of a Geneve packets and <bcp14>MUST NOT</bcp14> be reversed for different ends
connection as is done with TCP. It is the responsibility of the control pl of a
ane for connection as is done with TCP. It is the responsibility of the control pl
any reconfiguration of the assigned port and its interpretation by respect ane to manage any reconfiguration of the assigned port and its interpretation by
ive devices. respective devices.
The definition of the control plane is beyond the scope of this document. The definition of the control plane is beyond the scope of this document.
</t> </t>
</dd>
<t hangText="UDP length:"> <dt>UDP Length:</dt>
The length of the UDP packet including the UDP header. <dd>
<vspace blankLines="0"/> <t>
</t> The length of the UDP packet including the UDP header.</t>
</dd>
<t hangText="UDP checksum:"> <dt>UDP Checksum:</dt>
In order to protect the Geneve header, options and <dd>
<vspace blankLines="0"/> <t>
payload from potential data corruption, UDP checksum SHOULD be generated In order to protect the Geneve header, options, and payload from
as potential data corruption, the UDP checksum <bcp14>SHOULD</bcp14> be gene
specified in <xref target="RFC0768"/> and <xref target="RFC1112"/> when rated as
specified in <xref target="RFC0768" format="default"/> and <xref target="
RFC1122" format="default"/> when
Geneve is encapsulated in IPv4. To protect the IP header, Geneve header, Geneve is encapsulated in IPv4. To protect the IP header, Geneve header,
options and payload from potential data corruption, the UDP checksum MUST options, and payload from potential data corruption, the UDP checksum <bc
be generated by default as specified in <xref target="RFC0768"/> p14>MUST</bcp14>
and <xref target="RFC8200"/> when Geneve be generated by default as specified in <xref target="RFC0768" format="de
is encapsulated in IPv6, except for certain conditions, which are outline fault"/>
d in the next paragraph. and <xref target="RFC8200" format="default"/> when Geneve
Upon receiving such packets with non-zero UDP checksum, is encapsulated in IPv6, except under certain conditions, which are outli
the receiving tunnel endpoints MUST validate the checksum. ned in the next paragraph.
If the checksum is not correct, the packet MUST be dropped, otherwise Upon receiving such packets with a non-zero UDP checksum,
the packet MUST be accepted for decapsulation. the receiving tunnel endpoints <bcp14>MUST</bcp14> validate the checksum.
<vspace blankLines="1"/> If the checksum is not correct, the packet <bcp14>MUST</bcp14> be dropped
Under certain conditions, the UDP checksum MAY be set to zero on transmit ; otherwise,
for packets encapsulated in both IPv4 and IPv6 <xref target="RFC8200"/>. the packet <bcp14>MUST</bcp14> be accepted for decapsulation.
See <xref target="section-4.3"/> for additional requirements that apply w </t>
hen using zero <t>
Under certain conditions, the UDP checksum <bcp14>MAY</bcp14> be set to z
ero on transmit
for packets encapsulated in both IPv4 and IPv6 <xref target="RFC8200" for
mat="default"/>.
See <xref target="sec-4.3" format="default"/> for additional
requirements that apply when using zero
UDP checksum with IPv4 and IPv6. Disabling the use of UDP checksums is UDP checksum with IPv4 and IPv6. Disabling the use of UDP checksums is
an operational consideration that should take into account the risks an operational consideration that should take into account the risks
and effects of packet corruption. and effects of packet corruption.
</t> </t>
</dd>
</list> </dl>
</t> </section>
<section anchor="sec-3.4" numbered="true" toc="default">
</section> <name>Tunnel Header Fields</name>
<dl newline="false" spacing="normal" indent="3">
<section title="Tunnel Header Fields" anchor="section-3.4"><t><list style <dt>Ver (2 bits):</dt>
="hanging" hangIndent="3"><t hangText="Ver (2 bits):"> <dd>
The current version number is 0. Packets received by <t>
<vspace blankLines="0"/> The current version number is 0. Packets received by a tunnel endpoint w
a tunnel endpoint with an unknown version MUST be dropped. Transit ith an unknown version <bcp14>MUST</bcp14> be dropped. Transit
devices interpreting Geneve packets with an unknown devices interpreting Geneve packets with an unknown
version number MUST treat them as UDP packets with an unknown version number <bcp14>MUST</bcp14> treat them as UDP packets with an unkno wn
payload. payload.
</t> </t>
</dd>
<t hangText="Opt Len (6 bits):"> <dt>Opt Len (6 bits):</dt>
The length of the options fields, expressed in <dd>
<vspace blankLines="0"/> <t>
four byte multiples, not including the eight byte fixed tunnel The length of the option fields, expressed in 4-byte multiples, not inclu
ding the 8-byte fixed tunnel
header. This results in a minimum total Geneve header size of 8 header. This results in a minimum total Geneve header size of 8
bytes and a maximum of 260 bytes. The start of the payload bytes and a maximum of 260 bytes. The start of the payload
headers can be found using this offset from the end of the base headers can be found using this offset from the end of the base
Geneve header. Geneve header.
<vspace blankLines="1"/> </t>
Transit devices MUST maintain consistent forwarding behavior <t>
Transit devices <bcp14>MUST</bcp14> maintain consistent forwarding behavior
irrespective of the value of 'Opt Len', including ECMP link irrespective of the value of 'Opt Len', including ECMP link
selection. selection.
</t> </t>
</dd>
<t hangText="O (1 bit):"> <dt>O (1 bit):</dt>
Control packet. This packet contains a control message. <dd>
<vspace blankLines="0"/> <t>
Control messages are sent between tunnel endpoints. Control packet. This packet contains a control message. Control messages
Tunnel endpoints MUST NOT forward the payload are sent between tunnel endpoints.
and transit devices MUST NOT attempt to interpret it. Tunnel endpoints <bcp14>MUST NOT</bcp14> forward the payload,
Since control messages are less frequent, it is RECOMMENDED and transit devices <bcp14>MUST NOT</bcp14> attempt to interpret it.
that tunnel endpoints direct these packets to a high priority control Since control messages are less frequent, it is <bcp14>RECOMMENDED</bcp14>
that tunnel endpoints direct these packets to a high-priority control
queue (for example, to direct the packet to a general purpose CPU queue (for example, to direct the packet to a general purpose CPU
from a forwarding ASIC or to separate out control traffic on a from a forwarding Application-Specific Integrated Circuit (ASIC) or to sep
NIC). Transit devices MUST NOT alter forwarding behavior on the arate out control traffic on a
NIC). Transit devices <bcp14>MUST NOT</bcp14> alter forwarding behavior o
n the
basis of this bit, such as ECMP link selection. basis of this bit, such as ECMP link selection.
</t> </t>
</dd>
<t hangText="C (1 bit):"> <dt>C (1 bit):</dt>
Critical options present. One or more options has the <dd>
<vspace blankLines="0"/> <t>
critical bit set (see <xref target="section-3.5"/>). If this bit is set Critical options present. One or more options has the critical bit set (
then see <xref target="sec-3.5" format="default"/>). If this bit is set, then
tunnel endpoints MUST parse the options list to interpret any tunnel endpoints <bcp14>MUST</bcp14> parse the options list to interpret a
ny
critical options. On tunnel endpoints where option parsing is not critical options. On tunnel endpoints where option parsing is not
supported the packet MUST be dropped on the basis of the 'C' bit supported, the packet <bcp14>MUST</bcp14> be dropped on the basis of the '
in the base header. If the bit is not set tunnel endpoints MAY C' bit
in the base header. If the bit is not set, tunnel endpoints <bcp14>MAY</b
cp14>
strip all options using 'Opt Len' and forward the decapsulated strip all options using 'Opt Len' and forward the decapsulated
packet. Transit devices MUST NOT drop packets on the packet. Transit devices <bcp14>MUST NOT</bcp14> drop packets on the
basis of this bit. basis of this bit.
</t> </t>
</dd>
<t hangText="Rsvd. (6 bits):"> <dt>Rsvd. (6 bits):</dt>
Reserved field, which MUST be zero on transmission <dd>
<vspace blankLines="0"/> <t>
and MUST be ignored on receipt. Reserved field, which <bcp14>MUST</bcp14> be zero on transmission and <bc
</t> p14>MUST</bcp14> be ignored on receipt.
</t>
<t hangText="Protocol Type (16 bits):"> </dd>
The type of the protocol data unit <dt>Protocol Type (16 bits):</dt>
<vspace blankLines="0"/> <dd>
appearing after the Geneve header. This follows the EtherType <t>
<xref target="ETYPES"/> convention; with Ethernet itself being represented The type of protocol data unit appearing after the Geneve header. This f
by the ollows the Ethertype
<xref target="ETYPES" format="default"/> convention, with Ethernet itself
being represented by the
value 0x6558. value 0x6558.
</t> </t>
</dd>
<t hangText="Virtual Network Identifier (VNI) (24 bits):"> <dt>Virtual Network Identifier (VNI) (24 bits):</dt>
An identifier for a <dd>
<vspace blankLines="0"/> <t>
unique element of a virtual network. In many situations this may An identifier for a unique element of a virtual network. In many situati
represent an L2 segment, however, the control plane defines the ons, this may
forwarding semantics of decapsulated packets. The VNI MAY be used represent an L2 segment; however, the control plane defines the
as part of ECMP forwarding decisions or MAY be used as a mechanism forwarding semantics of decapsulated packets. The VNI <bcp14>MAY</bcp14>
be used
as part of ECMP forwarding decisions or <bcp14>MAY</bcp14> be used as a me
chanism
to distinguish between overlapping address spaces contained in the to distinguish between overlapping address spaces contained in the
encapsulated packet when load balancing across CPUs. encapsulated packet when load balancing across CPUs.
</t> </t>
</dd>
<t hangText="Reserved (8 bits):"> <dt>Reserved (8 bits):</dt>
Reserved field which MUST be zero on transmission <dd>
<vspace blankLines="0"/> <t>
and ignored on receipt. Reserved field, which <bcp14>MUST</bcp14> be zero on transmission and ign
</t> ored on receipt.
</t>
</list> </dd>
</t> </dl>
</section> </section>
<section anchor="sec-3.5" numbered="true" toc="default">
<section title="Tunnel Options" anchor="section-3.5"><figure><artwork><![ <name>Tunnel Options</name>
CDATA[ <figure anchor="geneve-options">
<name>Geneve Option</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
0 1 2 3 0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Option Class | Type |R|R|R| Length | | Option Class | Type |R|R|R| Length |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Variable Option Data | | |
~ ~ ~ Variable-Length Option Data ~
| | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Geneve Option
]]></artwork> ]]></artwork>
</figure> </figure>
<t> <t>
The base Geneve header is followed by zero or more options in Type- The base Geneve header is followed by zero or more options in Type-Length-Val
Length-Value format. Each option consists of a four byte option ue format. Each option consists of a 4-byte option
header and a variable amount of option data interpreted according to header and a variable amount of option data interpreted according to
the type.</t> the type.</t>
<dl newline="false" spacing="normal" indent="3">
<t><list style="hanging" hangIndent="3"><t hangText="Option Class (16 bit <dt>Option Class (16 bits):</dt>
s):"> <dd>
Namespace for the 'Type' field. IANA will <t>
<vspace blankLines="0"/> Namespace for the 'Type' field. IANA has created a "Geneve Option Class"
be requested to create a "Geneve Option Class" registry to registry to
allocate identifiers for organizations, technologies, and vendors allocate identifiers for organizations, technologies, and vendors
that have an interest in creating types for options. Each that have an interest in creating types for options. Each
organization may allocate types independently to allow organization may allocate types independently to allow
experimentation and rapid innovation. It is expected that over experimentation and rapid innovation. It is expected that, over
time certain options will become well known and a given time, certain options will become well known, and a given
implementation may use option types from a variety of sources. In implementation may use option types from a variety of sources. In
addition, IANA will be requested to reserve specific ranges for addition, IANA has reserved specific ranges for
allocation by IETF Review and for Experimental Use (see <xref target="sect allocation by IETF Review and for Experimental Use (see <xref target="sec-
ion-7"/>). 7" format="default"/>).
</t> </t>
</dd>
<t hangText="Type (8 bits):"> <dt>Type (8 bits):</dt>
Type indicating the format of the data contained in <dd>
<vspace blankLines="0"/> <t>
this option. Options are primarily designed to encourage future Type indicating the format of the data contained in this option. Options
extensibility and innovation and so standardized forms of these are primarily designed to encourage future
extensibility and innovation, and standardized forms of these
options will be defined in separate documents. options will be defined in separate documents.
<vspace blankLines="1"/> </t>
The high order bit of the option type indicates that this is a <t>
The high-order bit of the option type indicates that this is a
critical option. If the receiving tunnel endpoint does not recognize critical option. If the receiving tunnel endpoint does not recognize
this option and this bit is set then the packet MUST be dropped. the option and this bit is set, then the packet <bcp14>MUST</bcp14> be dro
If this bit is set in any option then the 'C' bit in the pped.
Geneve base header MUST also be set. Transit devices MUST NOT If this bit is set in any option, then the 'C' bit in the
Geneve base header <bcp14>MUST</bcp14> also be set. Transit devices <bcp1
4>MUST NOT</bcp14>
drop packets on the basis of this bit. The following figure shows drop packets on the basis of this bit. The following figure shows
the location of the 'C' bit in the 'Type' field: the location of the 'C' bit in the 'Type' field:
</t> </t>
</dd>
</list> </dl>
</t> <figure><name>&apos;C&apos; Bit in the &apos;Type&apos; Field</name>
<artwork name="" type="" align="left" alt=""><![CDATA[
<figure><artwork><![CDATA[
0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6 7 8
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
|C| Type | |C| Type |
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
]]></artwork> ]]></artwork>
</figure> </figure>
<t><list hangIndent="3" style="hanging"><t> <dl newline="false" spacing="normal" indent="3">
<dt/>
<dd>
The requirement to drop a packet with an unknown option with the 'C' bit s et The requirement to drop a packet with an unknown option with the 'C' bit s et
applies to the entire tunnel endpoint system and not a particular applies to the entire tunnel endpoint system and not a particular
component of the implementation. For example, in a system component of the implementation. For example, in a system
comprised of a forwarding ASIC and a general purpose CPU, this comprised of a forwarding ASIC and a general purpose CPU, this
does not mean that the packet must be dropped in the ASIC. An does not mean that the packet must be dropped in the ASIC. An
implementation may send the packet to the CPU using a rate-limited implementation may send the packet to the CPU using a rate-limited
control channel for slow-path exception handling.</t> control channel for slow-path exception handling.</dd>
</dl>
</list> <dl newline="false" spacing="normal" indent="3">
</t> <dt>R (3 bits):</dt>
<dd>
<t><list style="hanging" hangIndent="3"><t hangText="R (3 bits):"> Option control flags reserved for future use. These bits <bcp14>MUST</bc
Option control flags reserved for future use. These bits MUST be p14> be
zero on transmission and MUST be ignored on receipt. zero on transmission and <bcp14>MUST</bcp14> be ignored on receipt.
</t> </dd>
<dt>Length (5 bits):</dt>
<t hangText="Length (5 bits):"> <dd>
Length of the option, expressed in four byte <t>
<vspace blankLines="0"/> Length of the option, expressed in 4-byte
multiples excluding the option header. The total length of each multiples, excluding the option header. The total length of each
option may be between 4 and 128 bytes. A value of 0 in the Length field i option may be between 4 and 128 bytes. A value of 0 in the 'Length' field
mplies implies
an option with only an option header and no variable option data. an option with only an option header and no option data. Packets
Packets in which the total in which the total
length of all options is not equal to the 'Opt Len' in the base length of all options is not equal to the 'Opt Len' in the base
header are invalid and MUST be silently dropped if received by a header are invalid and <bcp14>MUST</bcp14> be silently dropped if received by a
tunnel endpoint that processes the options. tunnel endpoint that processes the options.
</t> </t>
</dd>
<t hangText="Variable Option Data:"> <dt>Variable-Length Option Data:</dt>
<dd>
<t>
Option data interpreted according to 'Type'. Option data interpreted according to 'Type'.
<vspace blankLines="0"/> </t>
</t>
</list>
</t>
<section title="Options Processing" anchor="section-3.5.1"><t> </dd>
</dl>
<section anchor="sec-3.5.1" numbered="true" toc="default">
<name>Options Processing</name>
<t>
Geneve options are intended to be originated and processed Geneve options are intended to be originated and processed
by tunnel endpoints. However, options MAY be interpreted by transit by tunnel endpoints. However, options <bcp14>MAY</bcp14> be interpreted by t ransit
devices along the tunnel path. Transit devices not devices along the tunnel path. Transit devices not
interpreting Geneve headers (which may or may not include options) MUST handl e interpreting Geneve headers (which may or may not include options) <bcp14>MUS T</bcp14> handle
Geneve packets as any other UDP packet and maintain consistent forwarding beh avior.</t> Geneve packets as any other UDP packet and maintain consistent forwarding beh avior.</t>
<t>
<t>
In tunnel endpoints, the generation and interpretation of options is In tunnel endpoints, the generation and interpretation of options is
determined by the control plane, which is beyond the the scope of this determined by the control plane, which is beyond the scope of this
document. However, to ensure interoperability between heterogeneous document. However, to ensure interoperability between heterogeneous
devices some requirements are imposed on options and the devices that devices, some requirements are imposed on options and the devices that
process them:</t> process them:</t>
<ul spacing="normal">
<t><list style="symbols"><t>Receiving tunnel endpoints MUST drop packets <li>Receiving tunnel endpoints <bcp14>MUST</bcp14> drop packets cont
containing unknown options aining unknown options
with the 'C' bit set in the option type. Conversely, transit with the 'C' bit set in the option type. Conversely, transit
devices MUST NOT drop packets as a result of encountering unknown devices <bcp14>MUST NOT</bcp14> drop packets as a result of encountering u
options, including those with the 'C' bit set.</t> nknown
options, including those with the 'C' bit set.</li>
<t>The contents of the options and their ordering MUST NOT be <li>The contents of the options and their ordering <bcp14>MUST NOT</
modified by transit devices.</t> bcp14> be
modified by transit devices.</li>
<t>If a tunnel endpoint receives a Geneve packet with 'Opt Len' (total le <li>If a tunnel endpoint receives a Geneve packet with an 'Opt Len'
ngth of all options) (the total length of all options)
that exceeds the options processing capability of the tunnel endpoint the that exceeds the options-processing capability of the tunnel endpoint, th
n en
the tunnel endpoint MUST drop such packets. An implementation may raise a the tunnel endpoint <bcp14>MUST</bcp14> drop such packets. An implementat
n ion may raise an
exception to the control plane of such an event. It is the responsibility exception to the control plane in such an event. It is the responsibility
of the control plane to ensure the communicating peer tunnel endpoints of the control plane to ensure the communicating peer tunnel endpoints
have the processing capability to handle the total length of options. have the processing capability to handle the total length of options.
The definition of the control plane is beyond the scope of this document. The definition of the control plane is beyond the scope of this document.
</t> </li>
</list> </ul>
</t> <t>
<t>
When designing a Geneve option, it is important to consider how the When designing a Geneve option, it is important to consider how the
option will evolve in the future. Once an option is defined it is option will evolve in the future. Once an option is defined, it is
reasonable to expect that implementations may come to depend on a reasonable to expect that implementations may come to depend on a
specific behavior. As a result, the scope of any future changes must specific behavior. As a result, the scope of any future changes must
be carefully described upfront.</t> be carefully described upfront.</t>
<t>
<t> Architecturally, options are intended to be self descriptive and independent.
Architecturally, options are intended to be self-descriptive and independent. This enables parallelism in options processing and reduces implementation com
This enables parallelism in option processing and reduces implementation comp plexity.
lexity. However, the control plane may impose certain ordering restrictions, as
However, the control plane may impose certain ordering restrictions as described in <xref target="sec-4.5.1" format="default"/>.</t>
described in <xref target="section-4.5.1"/>.</t> <t>
<t>
Unexpectedly significant interoperability issues may result from Unexpectedly significant interoperability issues may result from
changing the length of an option that was defined to be a certain changing the length of an option that was defined to be a certain
size. A particular option is specified to have either a fixed size. A particular option is specified to have either a fixed
length, which is constant, or a variable length, which may change length, which is constant, or a variable length, which may change
over time or for different use cases. This property is part of the over time or for different use cases. This property is part of the
definition of the option and conveyed by the 'Type'. For fixed definition of the option and is conveyed by the 'Type'. For fixed-length opt
length options, some implementations may choose to ignore the length ions, some implementations may choose to ignore the 'Length'
field in the option header and instead parse based on the well known field in the option header and instead parse based on the well-known
length associated with the type. In this case, redefining the length length associated with the type. In this case, redefining the length
will impact not only parsing of the option in question but also any will impact not only the parsing of the option in question but also any
options that follow. Therefore, options that are defined to be fixed options that follow. Therefore, options that are defined to be a fixed
length in size MUST NOT be redefined to a different length. Instead, length in size <bcp14>MUST NOT</bcp14> be redefined to a different length. I
nstead,
a new 'Type' should be allocated. Actual definition of the option type is bey ond a new 'Type' should be allocated. Actual definition of the option type is bey ond
the scope of this document. The option type and its interpretation should be the scope of this document. The option type and its interpretation should be
defined by the entity that owns the option class.</t> defined by the entity that owns the option class.</t>
<t>
<t> Options may be processed by NIC hardware utilizing offloads (e.g., LSO and LR
Options may be processed by NIC hardware utilizing offloads (e.g. LSO and LRO O)
) as described in <xref target="sec-4.6" format="default"/>. Careful considerat
as described in <xref target="section-4.6"/>. Careful consideration should be ion should be
given to how the offload capabilities outlined in <xref target="section-4.6"/ given to how the offload capabilities outlined in <xref target="sec-4.6" form
> at="default"/>
impact an option's design. impact an option's design.
</t> </t>
</section>
</section> </section>
</section>
</section> <section anchor="sec-4" numbered="true" toc="default">
<name>Implementation and Deployment Considerations</name>
</section> <section anchor="sec-4.1" numbered="true" toc="default">
<name>Applicability Statement</name>
<section title="Implementation and Deployment Considerations" anchor="sec <t>
tion-4"> Geneve is a UDP-based network virtualization overlay encapsulation protoc
<section title="Applicability Statement" anchor="section-4.1"><t> ol
Geneve is a network virtualization overlay encapsulation protocol
designed to establish tunnels between NVEs over an existing IP network. designed to establish tunnels between NVEs over an existing IP network.
It is intended for use in public or private data center environments, It is intended for use in public or private data center environments,
for deploying multi-tenant overlay networks over an existing IP underlay network.</t> for deploying multi-tenant overlay networks over an existing IP underlay network.</t>
<t>
<t> As a UDP-based protocol, Geneve adheres
Geneve is a UDP based encapsulation protocol transported over existing to the UDP usage guidelines as specified in <xref target="RFC8085" format
IPv4 and IPv6 networks. Hence, as a UDP based protocol, Geneve adheres ="default"/>.
to the UDP usage guidelines as specified in <xref target="RFC8085"/>. The applicability of these guidelines is dependent on the underlay
The applicability of these guidelines are dependent on the underlay IP network and the nature of the Geneve payload protocol
IP network and the nature of Geneve payload protocol (for example, TCP/IP, IP/Ethernet).</t>
(example TCP/IP, IP/Ethernet).</t> <t>
<t>
Geneve is intended to be deployed in a data center network environment Geneve is intended to be deployed in a data center network environment
operated by a single operator or adjacent set of cooperating network operated by a single operator or an adjacent set of cooperating network
operators that fits with the definition of controlled environments operators that fits with the definition of controlled environments
in <xref target="RFC8085"/>. A network in a controlled environmen in <xref target="RFC8085" format="default"/>. A network in a cont
t can be rolled environment can be
managed to operate under certain conditions whereas in the general managed to operate under certain conditions, whereas in the general
Internet this cannot be done. Hence requirements for a tunnel Internet, this cannot be done. Hence, requirements for a tunneling
protocol operating under a controlled environment can be less protocol operating under a controlled environment can be less
restrictive than the requirements of the general Internet. restrictive than the requirements of the general Internet.
</t> </t>
<t>
<t>
For the purpose of this document, a traffic-managed controlled environmen t For the purpose of this document, a traffic-managed controlled environmen t
(TMCE) is defined as an IP network that is traffic-engineered and/or othe rwise (TMCE) is defined as an IP network that is traffic engineered and/or othe rwise
managed (e.g., via use of traffic rate limiters) to avoid congestion. The concept managed (e.g., via use of traffic rate limiters) to avoid congestion. The concept
of TMCE is outlined in <xref target="RFC8086"/>. Significant portions of of a TMCE is outlined in <xref target="RFC8086" format="default"/>. Signi
the text ficant portions of the text
in <xref target="section-4.1"/> through <xref target="section-4.3"/> are in <xref target="sec-4.1" format="default"/> through <xref target="sec-4.
based 3" format="default"/> are based
on <xref target="RFC8086"/> as applicable to Geneve.</t> on <xref target="RFC8086" format="default"/> as applicable to Geneve.</t>
<t>
<t>
It is the responsibility of the operator to ensure that the guidelines/re quirements It is the responsibility of the operator to ensure that the guidelines/re quirements
in this section are followed as applicable to their Geneve deployment(s). </t> in this section are followed as applicable to their Geneve deployment(s). </t>
</section> </section>
<section anchor="sec-4.2" numbered="true" toc="default">
<section title="Congestion Control Functionality" anchor="section-4.2"><t <name>Congestion-Control Functionality</name>
> <t>
Geneve does not natively provide congestion control functionality and rel Geneve does not natively provide congestion-control functionality and rel
ies ies
on the payload protocol traffic for congestion control. As such Geneve MU on the payload protocol traffic for congestion control. As such, Geneve <
ST bcp14>MUST</bcp14>
be used with congestion controlled traffic or within a network that is be used with congestion-controlled traffic or within a TMCE to avoid cong
traffic managed to avoid congestion (TMCE). An operator of a traffic estion. An operator of a TMCE may avoid congestion through careful provisioning
managed network (TMCE) may avoid congestion by careful provisioning of their networks, rate-limiting user data traffic, and managing traffic
of their networks, rate-limiting of user data traffic and traffic
engineering according to path capacity.</t> engineering according to path capacity.</t>
</section> </section>
<section anchor="sec-4.3" numbered="true" toc="default">
<section title="UDP Checksum" anchor="section-4.3"><t> <name>UDP Checksum</name>
In order to provide integrity of Geneve headers, options and payload, <t>
(for example to avoid misdelivery of payload to different tenant systems) The outer UDP checksum <bcp14>SHOULD</bcp14> be used with Geneve when tra
in case of data corruption, the outer UDP checksum SHOULD be used with Ge nsported
neve over IPv4; this is to provide integrity for the Geneve headers,
when transported over IPv4. The UDP checksum provides a statistical guara options, and payload in case of data corruption (for example, to
ntee avoid misdelivery of the payload to different tenant systems). The UDP check
sum provides a statistical guarantee
that a payload was not corrupted in transit. These integrity checks are n ot that a payload was not corrupted in transit. These integrity checks are n ot
strong from a coding or cryptographic perspective and are not designed to strong from a coding or cryptographic perspective and are not designed to
detect physical-layer errors or malicious modification of the datagram detect physical-layer errors or malicious modification of the datagram
(see Section 3.4 of <xref target="RFC8085"/>). In deployments where such (see <xref target="RFC8085" sectionFormat="of" section="3.4"/>). In deplo
a risk exists, yments where such a risk exists,
an operator SHOULD use additional data integrity mechanisms such as offer an operator <bcp14>SHOULD</bcp14> use additional data integrity
ed mechanisms such as those offered
by IPsec (see <xref target="section-6.2"/>).</t> by IPsec (see <xref target="sec-6.2" format="default"/>).</t>
<t>
An operator MAY choose to disable UDP checksums
and use zero checksums if Geneve packet integrity is provided by other da
ta
integrity mechanisms such as IPsec or additional checksums or if one of
the conditions in <xref target="section-4.3.1"/> a, b, c are met.</t>
<t> <t>
By default, UDP checksums MUST be used when Geneve is transported over IP An operator <bcp14>MAY</bcp14> choose to disable UDP checksums
v6. and use zero UDP checksum if Geneve packet integrity is provided by other
A tunnel endpoint MAY be configured for use with zero UDP checksum if data
additional requirements in <xref target="section-4.3.1"/> are met.</t> integrity mechanisms, such as IPsec or additional checksums, or if one of
the conditions (a, b, or c) in <xref target="sec-4.3.1" format="default"/
<section title="UDP Zero Checksum Handling with IPv6" anchor="section-4.3 > is met.</t>
.1"><t> <t>
By default, UDP checksums <bcp14>MUST</bcp14> be used when Geneve is tran
sported over IPv6.
A tunnel endpoint <bcp14>MAY</bcp14> be configured for use with zero UDP
checksum if
additional requirements in <xref target="sec-4.3.1" format="default"/> ar
e met.</t>
<section anchor="sec-4.3.1" numbered="true" toc="default">
<name>Zero UDP Checksum Handling with IPv6</name>
<t>
When Geneve is used over IPv6, the UDP checksum is used to protect IPv6 h eaders, When Geneve is used over IPv6, the UDP checksum is used to protect IPv6 h eaders,
UDP headers and Geneve headers, options and payload from potential data c UDP headers, and Geneve headers, options, and payload from potential data
orruption. corruption.
As such by default Geneve MUST use UDP checksums when transported over IP As such, by default, Geneve <bcp14>MUST</bcp14> use UDP checksums when tr
v6. ansported over IPv6.
An operator MAY choose to configure to operate with zero UDP checksum if An operator <bcp14>MAY</bcp14> choose to configure zero UDP checksum if
operating in a traffic managed controlled environment as stated in operating in a TMCE as stated in
<xref target="section-4.1"/> if one of the following conditions are met.< <xref target="sec-4.1" format="default"/> if one of the following conditi
/t> ons is met.</t>
<ol spacing="normal" type="a">
<t><list style="letters"><t>It is known that the packet corruption is exc <li>It is known that packet corruption is exceptionally
eptionally
unlikely (perhaps based on knowledge of equipment types in their underlay unlikely (perhaps based on knowledge of equipment types in their underlay
network) and the operator is willing to take a risk of undetected packet network) and the operator is willing to risk undetected packet
corruption</t> corruption.</li>
<li>It is judged through observational measurements (perhaps through
<t>It is judged through observational measurements (perhaps through histo historic
ric or current traffic flows that use non-zero checksum) that the level of pa
or current traffic flows that use non zero checksum) that the level of pa cket
cket corruption is tolerably low and is where the operator is willing to risk
corruption is tolerably low and where the operator is willing to take undetected corruption.</li>
the risk of undetected corruption.</t> <li>The Geneve payload is carrying applications that are tolerant of
misdelivered
<t>Geneve payload is carrying applications that are tolerant of misdelive or corrupted packets (perhaps through higher-layer checksum validation
red and/or reliability through retransmission). </li>
or corrupted packets (perhaps through higher layer checksum validation </ol>
and/or reliability through retransmission) </t> <t> In addition, Geneve tunnel implementations using zero UDP checksum
</list> <bcp14>MUST</bcp14> meet
</t>
<t> In addition Geneve tunnel implementations using zero UDP checksum MUS
T meet
the following requirements:</t> the following requirements:</t>
<ol spacing="normal" type="1">
<t><list style="numbers"><t>Use of UDP checksum over IPv6 MUST be the def <li>Use of UDP checksum over IPv6 <bcp14>MUST</bcp14> be the default
ault configuration for all Geneve tunnels.</li>
configuration for all Geneve tunnels.</t> <li>If Geneve is used with zero UDP checksum over IPv6, then such
a tunnel
<t>If Geneve is used with zero UDP checksum over IPv6 then such tunnel endpoint implementation <bcp14>MUST</bcp14> meet all the requirements spe
endpoint implementation MUST meet all the requirements specified cified
in Section 4 of <xref target="RFC6936"/> and requirement 1 as specified i in <xref target="RFC6936" sectionFormat="of" section="4"/> and requiremen
n t 1 as specified in <xref target="RFC6936" sectionFormat="of" section="5"/> sinc
Section 5 of <xref target="RFC6936"/> as that is relevant to Geneve.</t> e it is relevant to Geneve.</li>
<li>The Geneve tunnel endpoint that decapsulates the tunnel
<t>The Geneve tunnel endpoint that decapsulates the tunnel SHOULD check t <bcp14>SHOULD</bcp14> check that the
he source and destination IPv6 addresses are valid for the Geneve tunnel tha
source and destination IPv6 addresses are valid for the Geneve tunnel th t
at
is configured to receive zero UDP checksum and discard other packets is configured to receive zero UDP checksum and discard other packets
for which such check fails.</t> for which such a check fails.</li>
<li>
<t>The Geneve tunnel endpoint that encapsulates the tunnel MAY use differ <t>The Geneve tunnel endpoint that encapsulates the tunnel <bcp14>
ent MAY</bcp14> use different
IPv6 source addresses for each Geneve tunnel that uses zero UDP checksum mode IPv6 source addresses for each Geneve tunnel that uses zero UDP checksum mode
in order to strengthen the decapsulator's check of the IPv6 source addres s in order to strengthen the decapsulator's check of the IPv6 source addres s
(i.e the same IPv6 source address is not to be used with more than one IP v6 (i.e., the same IPv6 source address is not to be used with more than one IPv6
destination address, irrespective of whether that destination address is destination address, irrespective of whether that destination address is
a unicast or multicast address). When this is not possible, it is RECOMME NDED a unicast or multicast address). When this is not possible, it is <bcp14> RECOMMENDED</bcp14>
to use each source address for as few Geneve tunnels that use zero UDP to use each source address for as few Geneve tunnels that use zero UDP
checksum as is feasible. checksum as is feasible.
<vspace blankLines="1"/> </t>
Note that (for requirements 3 and 4) the receiving tunnel endpoint can ap <t>
ply Note that for requirements 3 and 4, the receiving tunnel endpoint can app
ly
these checks only if it has out-of-band knowledge that the encapsulating tunnel these checks only if it has out-of-band knowledge that the encapsulating tunnel
endpoint is applying the indicated behavior. One possibility to obtain th is out-of-band endpoint is applying the indicated behavior. One possibility to obtain th is out-of-band
knowledge is through signaling by the control plane. The definition of knowledge is through signaling by the control plane. The definition of
the control plane is beyond the scope of this document.</t> the control plane is beyond the scope of this document.</t>
</li>
<t>Measures SHOULD be taken to prevent Geneve traffic over IPv6 with zero <li>Measures <bcp14>SHOULD</bcp14> be taken to prevent Geneve traffi
UDP c over IPv6 with zero UDP
checksum from escaping into the general Internet. Examples of such measur es include checksum from escaping into the general Internet. Examples of such measur es include
employing packet filters at the gateways or edge of Geneve network and/or employing packet filters at the gateways or edge of the Geneve network an d/or
keeping logical or physical separation of the Geneve network from network s keeping logical or physical separation of the Geneve network from network s
carrying the general Internet traffic.</t> carrying general Internet traffic.</li>
</list> </ol>
</t> <t> The above requirements do not change the requirements
specified in either <xref target="RFC8200" format="default"/> or
<t> The above requirements do not change either the requirements <xref target="RFC6936" format="default"/>.
specified in <xref target="RFC8200"/> or </t>
the requirements specified in <xref target="RFC6936"/>. <t>The use of the source IPv6 address in addition to the
</t>
<t>The use of the source IPv6 address in addition to the
destination IPv6 address, plus the recommendation against destination IPv6 address, plus the recommendation against
reuse of source IPv6 addresses among Geneve tunnels collectively reuse of source IPv6 addresses among Geneve tunnels, collectively
provide some mitigation for the absence of UDP checksum coverage of provide some mitigation for the absence of UDP checksum coverage of
the IPv6 header. A traffic-managed controlled environment that satisfies the IPv6 header. A traffic-managed controlled environment that satisfies
at least one of three conditions listed at the beginning of at least one of the three conditions listed at the beginning of
this section provides additional assurance. this section provides additional assurance.
</t> </t>
</section>
<t> Editorial Note (The following paragraph to be removed by the </section>
RFC Editor before publication) </t> <section anchor="sec-4.4" numbered="true" toc="default">
<t> It was discussed during TSVART early review if the level of requireme <name>Encapsulation of Geneve in IP</name>
nt for using <t>
different IPv6 source addresses for different tunnel destinations As an IP-based tunneling protocol, Geneve shares many properties and
would need to be "MAY"
or "SHOULD". The discussion concluded that it was appropriate to
keep this
as "MAY", since it was considered not realistic for control plane
s having to
maintain a high level of state on a per tunnel destination basis.
In addition, the
text above provides sufficient guidance to operators and implemen
tors on possible mitigations.</t>
</section>
</section>
<section title="Encapsulation of Geneve in IP" anchor="section-4.4"><t>
As an IP-based tunnel protocol, Geneve shares many properties and
techniques with existing protocols. The application of some of these techniques with existing protocols. The application of some of these
are described in further detail, although in general most concepts are described in further detail, although, in general, most concepts
applicable to the IP layer or to IP tunnels generally also function applicable to the IP layer or to IP tunnels generally also function
in the context of Geneve.</t> in the context of Geneve.</t>
<section anchor="sec-4.4.1" numbered="true" toc="default">
<section title="IP Fragmentation" anchor="section-4.4.1"> <name>IP Fragmentation</name>
<t>
<t> It is <bcp14>RECOMMENDED</bcp14> that Path MTU Discovery (see <xref
It is strongly RECOMMENDED that Path MTU Discovery (<xref target="RFC1191"/>, target="RFC1191" format="default"/> and <xref target="RFC8201" format="defaul
<xref target="RFC8201"/>) be used to prevent or minimize fragmentation. t"/>) be used to prevent or minimize fragmentation.
The use of Path MTU Discovery on the transit network provides the The use of Path MTU Discovery on the transit network provides the
encapsulating tunnel endpoint with soft-state about the link that it may use encapsulating tunnel endpoint with soft-state information about the link that it may use
to prevent or minimize fragmentation depending on its role in the to prevent or minimize fragmentation depending on its role in the
virtualized network. The NVE can maintain this state (the MTU size of virtualized network. The NVE can maintain this state (the MTU size of
the tunnel link(s) associated with the tunnel endpoint), so if a the tunnel link(s) associated with the tunnel endpoint), so if a
tenant system sends large packets that when encapsulated exceed the tenant system sends large packets that, when encapsulated, exceed the
MTU size of the tunnel link, the tunnel endpoint can discard such MTU size of the tunnel link, the tunnel endpoint can discard such
packets and send exception messages to the tenant system(s). If the packets and send exception messages to the tenant system(s). If the
tunnel endpoint is associated with a routing or forwarding function and/or ha s the capability tunnel endpoint is associated with a routing or forwarding function and/or ha s the capability
to send ICMP messages, the encapsulating tunnel endpoint MAY send ICMP fragme to send ICMP messages, the encapsulating tunnel endpoint <bcp14>MAY</bcp14> s
ntation end ICMP fragmentation
needed <xref target="RFC0792"/> or Packet Too Big <xref target="RFC4443"/> me needed <xref target="RFC0792" format="default"/> or Packet Too Big <xref targ
ssages to the tenant system(s). et="RFC4443" format="default"/> messages to the tenant system(s).
When determining the MTU size of a tunnel link, maximum length of options MUS When determining the MTU size of a tunnel link, the maximum length of options
T be assumed as options may vary <bcp14>MUST</bcp14> be assumed as options may vary
on a per-packet basis. For example, recommendations/guidance for handling fra on a per-packet basis. Recommendations and guidance for handling fragmentatio
gmentation in n in
similar overlay encapsulation services like PWE3 are provided in similar overlay encapsulation services like Pseudowire Emulation
Section 5.3 of <xref target="RFC3985"/>.</t> Edge-to-Edge (PWE3) are provided in <xref target="RFC3985"
sectionFormat="of" section="5.3"/>.</t>
<t> <t>
Note that some implementations may not be capable of supporting Note that some implementations may not be capable of supporting
fragmentation or other less common features of the IP header, such as fragmentation or other less common features of the IP header, such as
options and extension headers. For example, some of the issues associated options and extension headers. Some of the issues associated
with MTU size and fragmentation in IP tunneling and use of ICMP messages is with MTU size and fragmentation in IP tunneling and use of ICMP messages are
outlined in Section 4.2 of <xref target="I-D.ietf-intarea-tunnels"/>.</t> outlined in <xref target="I-D.ietf-intarea-tunnels"
sectionFormat="of" section="4.2"/>.</t>
</section> </section>
<section anchor="sec-4.4.2" numbered="true" toc="default">
<section title="DSCP, ECN and TTL" anchor="section-4.4.2"><t> <name>DSCP, ECN, and TTL</name>
<t>
When encapsulating IP (including over Ethernet) packets in Geneve, When encapsulating IP (including over Ethernet) packets in Geneve,
there are several considerations for propagating DSCP and ECN bits there are several considerations for propagating Differentiated Services
Code Point (DSCP) and Explicit Congestion Notification (ECN) bits
from the inner header to the tunnel on transmission and the reverse from the inner header to the tunnel on transmission and the reverse
on reception.</t> on reception.</t>
<t> <t>
<xref target="RFC2983"/> provides guidance for mapping DSCP between inner and <xref target="RFC2983" format="default"/> provides guidance for mapping DSCP
outer between inner and outer
IP headers. Network virtualization is typically more closely aligned IP headers. Network virtualization is typically more closely aligned
with the Pipe model described, where the DSCP value on the tunnel with the Pipe model described, where the DSCP value on the tunnel
header is set based on a policy (which may be a fixed value, one header is set based on a policy (which may be a fixed value, one
based on the inner traffic class, or some other mechanism for based on the inner traffic class or some other mechanism for
grouping traffic). Aspects of the Uniform model (which treats the grouping traffic). Aspects of the Uniform model (which treats the
inner and outer DSCP value as a single field by copying on ingress inner and outer DSCP values as a single field by copying on ingress
and egress) may also apply, such as the ability to remark the inner and egress) may also apply, such as the ability to re-mark the inner
header on tunnel egress based on transit marking. However, the header on tunnel egress based on transit marking. However, the
Uniform model is not conceptually consistent with network Uniform model is not conceptually consistent with network
virtualization, which seeks to provide strong isolation between virtualization, which seeks to provide strong isolation between
encapsulated traffic and the physical network.</t> encapsulated traffic and the physical network.</t>
<t>
<t> <xref target="RFC6040" format="default"/> describes the mechanism for exposin
<xref target="RFC6040"/> describes the mechanism for exposing ECN capabilitie g ECN capabilities on IP
s on IP
tunnels and propagating congestion markers to the inner packets. tunnels and propagating congestion markers to the inner packets.
This behavior MUST be followed for IP packets encapsulated in Geneve.</t> This behavior <bcp14>MUST</bcp14> be followed for IP packets encapsulated in
<t> Geneve.</t>
Though Uniform or Pipe models could be used for TTL (or Hop Limit in case of <t>
IPv6) Though either the Uniform or Pipe models could be used for handling TTL (or H
handling when tunneling IP packets, the Pipe model is more aligned with netwo op Limit in case of IPv6) when tunneling IP packets, the Pipe model is more cons
rk virtualization. istent with network virtualization.
<xref target="RFC2003"/> provides guidance on handling TTL between inner IP h <xref target="RFC2003" format="default"/> provides guidance on handling TTL b
eader and outer IP tunnels; etween inner IP header and outer IP tunnels;
this model is more aligned with the Pipe model and is RECOMMENDED for this model is similar to the Pipe model and is <bcp14>RECOMMENDED</bcp14> for
use with Geneve for network virtualization applications.</t> use with Geneve for network virtualization applications.</t>
</section>
</section> <section anchor="sec-4.4.3" numbered="true" toc="default">
<name>Broadcast and Multicast</name>
<section title="Broadcast and Multicast" anchor="section-4.4.3"><t> <t>
Geneve tunnels may either be point-to-point unicast between two Geneve tunnels may either be point-to-point unicast between two
tunnel endpoints or may utilize broadcast or multicast addressing. It is tunnel endpoints or utilize broadcast or multicast addressing. It is
not required that inner and outer addressing match in this respect. not required that inner and outer addressing match in this respect.
For example, in physical networks that do not support multicast, For example, in physical networks that do not support multicast,
encapsulated multicast traffic may be replicated into multiple encapsulated multicast traffic may be replicated into multiple
unicast tunnels or forwarded by policy to a unicast location unicast tunnels or forwarded by policy to a unicast location
(possibly to be replicated there).</t> (possibly to be replicated there).</t>
<t>
<t> With physical networks that do support multicast, it may be desirable
With physical networks that do support multicast it may be desirable
to use this capability to take advantage of hardware replication for to use this capability to take advantage of hardware replication for
encapsulated packets. In this case, multicast addresses may be encapsulated packets. In this case, multicast addresses may be
allocated in the physical network corresponding to tenants, allocated in the physical network corresponding to tenants,
encapsulated multicast groups, or some other factor. The allocation encapsulated multicast groups, or some other factor. The allocation
of these groups is a component of the control plane and therefore of these groups is a component of the control plane and, therefore,
is beyond the scope of this document.</t> is beyond the scope of this document.</t>
<t>
<t>
When physical multicast is in When physical multicast is in
use, devices with heterogeneous capabilities may be present in the same group . use, devices with heterogeneous capabilities may be present in the same group .
Some options may only be interpretable by a subset of the devices in the grou p. Some options may only be interpretable by a subset of the devices in the grou p.
Other devices can safely ignore such options unless the 'C' bit is set to Other devices can safely ignore such options unless the 'C' bit is set to
mark the unknown option as critical. Requirements outlined in <xref target=" section-3.4"/> mark the unknown option as critical. The requirements outlined in <xref targ et="sec-3.4" format="default"/>
apply for critical options.</t> apply for critical options.</t>
<t>
<t> In addition, <xref target="RFC8293" format="default"/> provides examples of v
In addition, <xref target="RFC8293"/> provides examples of various mechanisms arious mechanisms that can
that can
be used for multicast handling in network virtualization overlay networks.</t > be used for multicast handling in network virtualization overlay networks.</t >
</section>
</section> <section anchor="sec-4.4.4" numbered="true" toc="default">
<name>Unidirectional Tunnels</name>
<section title="Unidirectional Tunnels" anchor="section-4.4.4"><t> <t>
Generally speaking, a Geneve tunnel is a unidirectional concept. IP Generally speaking, a Geneve tunnel is a unidirectional concept. IP
is not a connection oriented protocol and it is possible for two is not a connection-oriented protocol, and it is possible for two
tunnel endpoints to communicate with each other using different paths or to tunnel endpoints to communicate with each other using different paths or to
have one side not transmit anything at all. As Geneve is an IP-based have one side not transmit anything at all. As Geneve is an IP-based
protocol, the tunnel layer inherits these same characteristics.</t> protocol, the tunnel layer inherits these same characteristics.</t>
<t>
<t>
It is possible for a tunnel to encapsulate a protocol, such as TCP, It is possible for a tunnel to encapsulate a protocol, such as TCP,
which is connection oriented and maintains session state at that that is connection oriented and maintains session state at that
layer. In addition, implementations MAY model Geneve tunnels as layer. In addition, implementations <bcp14>MAY</bcp14> model Geneve tunnels
connected, bidirectional links, such as to provide the abstraction of as
connected, bidirectional links, for example, to provide the abstraction of
a virtual port. In both of these cases, bidirectionality of the a virtual port. In both of these cases, bidirectionality of the
tunnel is handled at a higher layer and does not affect the operation tunnel is handled at a higher layer and does not affect the operation
of Geneve itself.</t> of Geneve itself.</t>
</section>
</section> </section>
<section anchor="sec-4.5" numbered="true" toc="default">
</section> <name>Constraints on Protocol Features</name>
<t>
<section title="Constraints on Protocol Features" anchor="section-4.5"><t Geneve is intended to be flexible for use with a wide range of current and
>
Geneve is intended to be flexible to a wide range of current and
future applications. As a result, certain constraints may be placed future applications. As a result, certain constraints may be placed
on the use of metadata or other aspects of the protocol in order to on the use of metadata or other aspects of the protocol in order to
optimize for a particular use case. For example, some applications optimize for a particular use case. For example, some applications
may limit the types of options which are supported or enforce a may limit the types of options that are supported or enforce a
maximum number or length of options. Other applications may only maximum number or length of options. Other applications may only
handle certain encapsulated payload types, such as Ethernet or IP. handle certain encapsulated payload types, such as Ethernet or IP.
This could be either globally throughout the system or, for example, These optimizations can be implemented either globally (throughout
restricted to certain classes of devices or network paths.</t> the system) or locally (for example, restricted to certain classes
of devices or network paths).</t>
<t> <t>
These constraints may be communicated to tunnel endpoints either These constraints may be communicated to tunnel endpoints either
explicitly through a control plane or implicitly by the nature of the explicitly through a control plane or implicitly by the nature of the
application. As Geneve is defined as a data plane protocol that is application. As Geneve is defined as a data plane protocol that is
control plane agnostic, definition of such mechanisms are beyond the scope of this control plane agnostic, definition of such mechanisms is beyond the scope of this
document.</t> document.</t>
<section anchor="sec-4.5.1" numbered="true" toc="default">
<section title="Constraints on Options" anchor="section-4.5.1"><t> <name>Constraints on Options</name>
<t>
While Geneve options are flexible, a control plane may restrict While Geneve options are flexible, a control plane may restrict
the number of option TLVs as well as the order and size of the TLVs the number of option TLVs as well as the order and size of the TLVs
between tunnel endpoints to make it simpler for a data plane between tunnel endpoints to make it simpler for a data plane
implementation in software or hardware to handle <xref target="I-D.ietf-nvo3- implementation in software or hardware to handle (see <xref target="I-D.ietf-
encap"/>. nvo3-encap" format="default"/>).
For example, there may be some critical information such as a secure For example, there may be some critical information, such as a secure
hash that must be processed in a certain order to provide lowest hash, that must be processed in a certain order to provide the lowest
latency or there may be other scenarios where the options must be processed latency, or there may be other scenarios where the options must be
in a certain order due to protocol semantics.</t> processed in a given order due to protocol semantics.</t>
<t>
<t>
A control plane may negotiate a subset of option TLVs and certain TLV A control plane may negotiate a subset of option TLVs and certain TLV
ordering, as well may limit the total number of option TLVs present ordering; it may also limit the total number of option TLVs present
in the packet, for example, to accommodate hardware capable of in the packet, for example, to accommodate hardware capable of
processing fewer options <xref target="I-D.ietf-nvo3-encap"/>. Hence, a cont processing fewer options. Hence, a control plane
rol plane needs to have the ability to describe the supported TLV subset and
needs to have the ability to describe the supported TLVs subset and its ordering to the tunnel endpoints. In the absence of a control
their order to the tunnel endpoints. In the absence of a control
plane, alternative configuration mechanisms may be used for this plane, alternative configuration mechanisms may be used for this
purpose. Such mechanisms are beyond the scope of this document.</t> purpose. Such mechanisms are beyond the scope of this document.</t>
</section>
</section>
<section anchor="sec-4.6" numbered="true" toc="default">
<name>NIC Offloads</name>
</section> <t>
</section>
<section title="NIC Offloads" anchor="section-4.6"><t>
Modern NICs currently provide a variety of offloads to enable the Modern NICs currently provide a variety of offloads to enable the
efficient processing of packets. The implementation of many of these efficient processing of packets. The implementation of many of these
offloads requires only that the encapsulated packet be easily parsed offloads requires only that the encapsulated packet be easily parsed
(for example, checksum offload). However, optimizations such as LSO (for example, checksum offload). However, optimizations such as LSO
and LRO involve some processing of the options themselves since they and LRO involve some processing of the options themselves since they
must be replicated/merged across multiple packets. In these must be replicated/merged across multiple packets. In these
situations, it is desirable to not require changes to the offload situations, it is desirable not to require changes to the offload
logic to handle the introduction of new options. To enable this, logic to handle the introduction of new options. To enable this,
some constraints are placed on the definitions of options to allow some constraints are placed on the definitions of options to allow
for simple processing rules:</t> for simple processing rules:</t>
<ul spacing="normal">
<t><list style="symbols"><t>When performing LSO, a NIC MUST replicate the <li>When performing LSO, a NIC <bcp14>MUST</bcp14> replicate the entir
entire Geneve header e Geneve header
and all options, including those unknown to the device, onto each and all options, including those unknown to the device, onto each
resulting segment unless an option allows an exception. resulting segment unless an option allows an exception.
Conversely, when performing LRO, a NIC may assume that a Conversely, when performing LRO, a NIC may assume that a
binary comparison of the options (including unknown options) is binary comparison of the options (including unknown options) is
sufficient to ensure equality and MAY merge packets with equal sufficient to ensure equality and <bcp14>MAY</bcp14> merge packets with eq
Geneve headers.</t> ual
Geneve headers.</li>
<t>Options MUST NOT be reordered during the course of offload <li>Options <bcp14>MUST NOT</bcp14> be reordered during the course of
processing, including when merging packets for the purpose of LRO.</t> offload
processing, including when merging packets for the purpose of LRO.</li>
<t>NICs performing offloads MUST NOT drop packets with unknown <li>NICs performing offloads <bcp14>MUST NOT</bcp14> drop packets with
options, including those marked as critical, unless explicitly configured. unknown
</t> options, including those marked as critical, unless explicitly configured
to do so.</li>
</list> </ul>
</t> <t>
<t>
There is no requirement that a given implementation of Geneve employ There is no requirement that a given implementation of Geneve employ
the offloads listed as examples above. However, as these offloads the offloads listed as examples above. However, as these offloads
are currently widely deployed in commercially available NICs, the are currently widely deployed in commercially available NICs, the
rules described here are intended to enable efficient handling of rules described here are intended to enable efficient handling of
current and future options across a variety of devices.</t> current and future options across a variety of devices.</t>
</section>
</section> <section anchor="sec-4.7" numbered="true" toc="default">
<name>Inner VLAN Handling</name>
<section title="Inner VLAN Handling" anchor="section-4.7"><t> <t>
Geneve is capable of encapsulating a wide range of protocols and Geneve is capable of encapsulating a wide range of protocols; therefore, a gi
therefore a given implementation is likely to support only a small ven implementation is likely to support only a small
subset of the possibilities. However, as Ethernet is expected to be subset of the possibilities. However, as Ethernet is expected to be
widely deployed, it is useful to describe the behavior of VLANs widely deployed, it is useful to describe the behavior of VLANs
inside encapsulated Ethernet frames.</t> inside encapsulated Ethernet frames.</t>
<t>
<t> As with any protocol, support for inner VLAN headers is <bcp14>OPTIONAL</bcp1
As with any protocol, support for inner VLAN headers is OPTIONAL. In 4>. In
many cases, the use of encapsulated VLANs may be disallowed due to many cases, the use of encapsulated VLANs may be disallowed due to
security or implementation considerations. However, in other cases security or implementation considerations. However, in other cases, the trun
trunking of VLAN frames across a Geneve tunnel can prove useful. As king of VLAN frames across a Geneve tunnel can prove useful. As
a result, the processing of inner VLAN tags upon ingress or egress a result, the processing of inner VLAN tags upon ingress or egress
from a tunnel endpoint is based upon the configuration of the tunnel from a tunnel endpoint is based upon the configuration of the tunnel
endpoint and/or control plane and not explicitly defined as part of endpoint and/or control plane and is not explicitly defined as part of
the data format.</t> the data format.</t>
</section>
</section> </section>
<section anchor="sec-5" numbered="true" toc="default">
</section> <name>Transition Considerations</name>
<t>
<section title="Transition Considerations" anchor="section-5"><t>
Viewed exclusively from the data plane, Geneve is compatible with existing IP networks Viewed exclusively from the data plane, Geneve is compatible with existing IP networks
as it appears to most devices as UDP packets. as it appears to most devices as UDP packets.
However, as there are already a number of tunnel protocols deployed However, as there are already a number of tunneling protocols deployed
in network virtualization environments, there is a practical question in network virtualization environments, there is a practical question
of transition and coexistence.</t> of transition and coexistence.</t>
<t>
<t>
Since Geneve builds on the base data plane functionality provided by the most Since Geneve builds on the base data plane functionality provided by the most
common protocols used for network virtualization (VXLAN, NVGRE) common protocols used for network virtualization (VXLAN and NVGRE),
it should be straightforward to port an existing control plane it should be straightforward to port an existing control plane
to run on top of it with minimal effort. With both the old and new to run on top of it with minimal effort. With both the old and new
packet formats supporting the same set of capabilities, there is no packet formats supporting the same set of capabilities, there is no
need for a hard transition - tunnel endpoints directly communicating with need for a hard transition; tunnel endpoints directly communicating with
each other can use any common protocol, which may be different even each other can use any common protocol, which may be different even
within a single overall system. As transit devices are primarily within a single overall system.
As transit devices are primarily
forwarding packets on the basis of the IP header, all protocols forwarding packets on the basis of the IP header, all protocols
appear similar and these devices do not introduce additional appear to be similar, and these devices do not introduce additional
interoperability concerns.</t> interoperability concerns.</t>
<t>
<t>
To assist with this transition, it is strongly suggested that To assist with this transition, it is strongly suggested that
implementations support simultaneous operation of both Geneve and implementations support simultaneous operation of both Geneve and
existing tunnel protocols as it is expected to be common for a single existing tunneling protocols, as it is expected to be common for a single
node to communicate with a mixture of other nodes. Eventually, older node to communicate with a mixture of other nodes. Eventually, older
protocols may be phased out as they are no longer in use.</t> protocols may be phased out as they are no longer in use.</t>
</section>
<section anchor="sec-6" numbered="true" toc="default">
<name>Security Considerations</name>
</section> <t>
As it is encapsulated within a UDP/IP packet, Geneve does not have any inhere
<section title="Security Considerations" anchor="section-6"><t> nt security
As encapsulated within a UDP/IP packet, Geneve does not have any inherent sec mechanisms.
urity As a result, an attacker with access to the underlay
mechanisms. As a result, an attacker with access to the underlay network transporting the IP packets has the ability to snoop on, alter, or
network transporting the IP packets has the ability to snoop, alter or
inject packets. Compromised tunnel endpoints or transit devices may also inject packets. Compromised tunnel endpoints or transit devices may also
spoof identifiers in the tunnel header to gain access to networks spoof identifiers in the tunnel header to gain access to networks
owned by other tenants.</t> owned by other tenants.</t>
<t>
<t>
Within a particular security domain, such as a data center operated Within a particular security domain, such as a data center operated
by a single service provider, the most common and highest performing security by a single service provider, the most common and highest-performing security
mechanism is isolation of trusted components. Tunnel traffic can be mechanism is isolation of trusted components. Tunnel traffic can be
carried over a separate VLAN and filtered at any untrusted carried over a separate VLAN and filtered at any untrusted
boundaries.</t> boundaries.</t>
<t>
<t>
When crossing an untrusted link, such as the general Internet, VPN technologi es such as IPsec When crossing an untrusted link, such as the general Internet, VPN technologi es such as IPsec
<xref target="RFC4301"/> should be used to provide authentication and/or encr <xref target="RFC4301" format="default"/> should be used to provide authentic
yption of ation and/or encryption of
the IP packets formed as part of Geneve encapsulation (See <xref target="sect the IP packets formed as part of Geneve encapsulation (see <xref target="sec-
ion-6.1.1"/>).</t> 6.1.1" format="default"/>).</t>
<t>
<t>
Geneve does not otherwise affect the security of the encapsulated Geneve does not otherwise affect the security of the encapsulated
packets. As per the guidelines of BCP 72 <xref target="RFC3552"/>, the follow ing sections packets. As per the guidelines of BCP 72 <xref target="RFC3552" format="defau lt"/>, the following sections
describe potential security risks that may be applicable to Geneve deployment s describe potential security risks that may be applicable to Geneve deployment s
and approaches to mitigate such risks. It is also noted that not all such ris ks are applicable and approaches to mitigate such risks. It is also noted that not all such ris ks are applicable
to all Geneve deployment scenarios, i.e., only a subset may be applicable to certain deployments. to all Geneve deployment scenarios, i.e., only a subset may be applicable to certain deployments.
So an operator has to make an assessment based on their network environment a An operator has to make an assessment based on their network
nd determine the risks environment, determine the risks that are applicable to their specific enviro
that are applicable to their specific environment and use appropriate mitigat nment, and use appropriate mitigation approaches as applicable. </t>
ion approaches as applicable. </t> <section anchor="sec-6.1" numbered="true" toc="default">
<name>Data Confidentiality</name>
<section title="Data Confidentiality" anchor="section-6.1"><t> <t>
Geneve is a network virtualization overlay encapsulation protocol Geneve is a network virtualization overlay encapsulation protocol
designed to establish tunnels between NVEs designed to establish tunnels between NVEs
over an existing IP network. It can be used to deploy multi-tenant overla y networks over an existing IP network. It can be used to deploy multi-tenant overla y networks
over an existing IP underlay network in a public or private data center. over an existing IP underlay network in a public or private data center.
The overlay service is typically provided by a service provider, for exam
ple a The overlay service is typically provided by a service provider, such as
cloud services provider or a private data center operator, this may or no a
t may be cloud service provider or a private data center operator. This may or not
may be
the same provider as an underlay service provider. Due to the nature of m ulti-tenancy in such environments, the same provider as an underlay service provider. Due to the nature of m ulti-tenancy in such environments,
a tenant system may expect data confidentiality to ensure its packet data is not tampered with a tenant system may expect data confidentiality to ensure its packet data is not tampered with
(active attack) in transit or a target of unauthorized monitoring (passiv (i.e., active attack) in transit or is a target of unauthorized
e attack) monitoring (i.e., passive attack), for example, by other tenant systems o
for example by other tenant systems or underlay service provider. r underlay service provider.
A compromised network node or a transit device within a A compromised network node or a transit device within a
data center may passively monitor Geneve packet data between NVEs; or rou te data center may passively monitor Geneve packet data between NVEs or rout e
traffic for further inspection. A tenant may traffic for further inspection. A tenant may
expect the overlay service provider to provide data confidentiality as pa rt of the service or expect the overlay service provider to provide data confidentiality as pa rt of the service, or
a tenant may bring its own data confidentiality mechanisms like IPsec or TLS to protect the data a tenant may bring its own data confidentiality mechanisms like IPsec or TLS to protect the data
end to end between its tenant systems. The overlay provider is expected t o provide end to end between its tenant systems. The overlay provider is expected t o provide
cryptographic protection in cases where the underlay provider is not the cryptographic protection in cases where the underlay provider is not the
same as the overlay provider to ensure the payload is not exposed to the underlay.</t> same as the overlay provider to ensure the payload is not exposed to the underlay.</t>
<t> <t>
If an operator determines data confidentiality is necessary in their envi ronment If an operator determines data confidentiality is necessary in their envi ronment
based on their risk analysis, for example as in multi-tenant environments based on their risk analysis -- for example, in multi-tenant
, environments -- then an encryption mechanism <bcp14>SHOULD</bcp14> be use
then an encryption mechanism SHOULD be used to encrypt the tenant d to encrypt the tenant
data end to end between the NVEs. The NVEs may use existing well establis data end to end between the NVEs. The NVEs may use existing well-establis
hed hed
encryption mechanisms such as IPsec, DTLS, etc.</t> encryption mechanisms, such as IPsec, DTLS, etc.</t>
<section anchor="sec-6.1.1" numbered="true" toc="default">
<section title="Inter-Data Center Traffic" anchor="section-6.1.1"><t> <name>Inter-Data Center Traffic</name>
<t>
A tenant system in a customer premises (private data center) may want to connect A tenant system in a customer premises (private data center) may want to connect
to tenant systems on their tenant overlay network in a public cloud data to tenant systems on their tenant overlay network in a public cloud data
center center, or a tenant may want to have its tenant systems located in multiple geog
or a tenant may want to have its tenant systems located in multiple geogr raphically
aphically
separated data centers for high availability. Geneve data traffic between tenant systems separated data centers for high availability. Geneve data traffic between tenant systems
across such separated networks should be protected from threats when trav ersing public networks. across such separated networks should be protected from threats when trav ersing public networks.
Any Geneve overlay data leaving the data center network beyond the operat or's security domain Any Geneve overlay data leaving the data center network beyond the operat or's security domain
SHOULD be secured by encryption mechanisms such as <bcp14>SHOULD</bcp14> be secured by encryption mechanisms, such as
IPsec or other VPN technologies to protect the communications between the IPsec or other VPN technologies, to protect the communications between th
NVEs e NVEs
when they are geographically separated over untrusted network links. Spec ification of when they are geographically separated over untrusted network links. Spec ification of
data protection mechanisms employed between data centers is beyond the sc ope of this document.</t> data protection mechanisms employed between data centers is beyond the sc ope of this document.</t>
<t> <t>
The principles described in <xref target="section-4"/> regarding controll The principles described in <xref target="sec-4" format="default"/> regar
ed environments still apply to ding controlled environments still apply to
the geographically separated data center usage outlined in this section.< /t> the geographically separated data center usage outlined in this section.< /t>
</section> </section>
</section>
</section> <section anchor="sec-6.2" numbered="true" toc="default">
<name>Data Integrity</name>
<section title="Data Integrity" anchor="section-6.2"><t> <t>
Geneve encapsulation is used between NVEs to establish overlay tunnels ov er an existing Geneve encapsulation is used between NVEs to establish overlay tunnels ov er an existing
IP underlay network. In a multi-tenant data center, a rogue or compromis ed tenant system IP underlay network. In a multi-tenant data center, a rogue or compromis ed tenant system
may try to launch a passive attack such as monitoring the traffic of othe may try to launch a passive attack, such as monitoring the traffic of oth
r tenants, or an er tenants, or an
active attack such as trying to inject unauthorized Geneve encapsulated t active attack, such as trying to inject unauthorized Geneve encapsulated
raffic such traffic such
as spoofing, replay, etc., into the network. To prevent such attacks, an as spoofing, replay, etc., into the network. To prevent such attacks, an
NVE MUST NOT NVE <bcp14>MUST NOT</bcp14>
propagate Geneve packets beyond the NVE to tenant systems and SHOULD empl propagate Geneve packets beyond the NVE to tenant systems and <bcp14>SHOU
oy packet filtering LD</bcp14> employ packet-filtering
mechanisms so as not to forward unauthorized traffic between tenant syste ms in different tenant networks. mechanisms so as not to forward unauthorized traffic between tenant syste ms in different tenant networks.
An NVE MUST NOT interpret Geneve packets from tenant systems other than a An NVE <bcp14>MUST NOT</bcp14> interpret Geneve packets from tenant syste
s frames to be encapsulated.</t> ms other than as frames to be encapsulated.</t>
<t>
<t>
A compromised network node or a transit device within a data center may l aunch an active A compromised network node or a transit device within a data center may l aunch an active
attack trying to tamper with the Geneve packet data between NVEs. Malicio us tampering of attack trying to tamper with the Geneve packet data between NVEs. Malicio us tampering of
Geneve header fields may cause the packet from one tenant to be forwarded to a different Geneve header fields may cause the packet from one tenant to be forwarded to a different
tenant network. If an operator determines the possibility of such threat in their environment, tenant network. If an operator determines there is a possibility of such a threat in their environment,
the operator may choose to employ data integrity mechanisms between NVEs. In order to prevent the operator may choose to employ data integrity mechanisms between NVEs. In order to prevent
such risks, a data integrity mechanism SHOULD be used in such environment such risks, a data integrity mechanism <bcp14>SHOULD</bcp14> be used in s
s to protect the uch environments to protect the
integrity of Geneve packets including packet headers, options and payload integrity of Geneve packets, including packet headers, options, and paylo
on communications ad on communications
between NVE pairs. A cryptographic data protection mechanism such as IPse between NVE pairs. A cryptographic data protection mechanism, such as IPs
c may be used to ec, may be used to
provide data integrity protection. A data center operator may choose to d eploy any other provide data integrity protection. A data center operator may choose to d eploy any other
data integrity mechanisms as applicable and supported in their underlay n etworks, data integrity mechanisms as applicable and supported in their underlay n etworks,
although non-cryptographic mechanisms may not protect the Geneve portion of the packet from tampering. </t> although non-cryptographic mechanisms may not protect the Geneve portion of the packet from tampering. </t>
</section> </section>
<section anchor="sec-6.3" numbered="true" toc="default">
<section title="Authentication of NVE peers" anchor="section-6.3"><t> <name>Authentication of NVE Peers</name>
<t>
A rogue network device or a compromised NVE in a data center environment might be able to A rogue network device or a compromised NVE in a data center environment might be able to
spoof Geneve packets as if it came from a legitimate NVE. In order to mit igate such a risk, spoof Geneve packets as if it came from a legitimate NVE. In order to mit igate such a risk,
an operator SHOULD use an authentication mechanism, such as IPsec to ensu an operator <bcp14>SHOULD</bcp14> use an authentication mechanism, such a
re that the s IPsec, to ensure that the
Geneve packet originated from the intended NVE peer, in environments wher Geneve packet originated from the intended NVE peer in environments where
e the operator the operator
determines spoofing or rogue devices is a potential threat. Other simpler determines spoofing or rogue devices are potential threats. Other simpler
source checks source checks,
such as ingress filtering for VLAN/MAC/IP address, reverse path forwardin such as ingress filtering for VLAN/MAC/IP addresses, reverse path forward
g checks, etc., ing checks, etc.,
may be used in certain trusted environments to ensure Geneve packets orig inated may be used in certain trusted environments to ensure Geneve packets orig inated
from the intended NVE peer.</t> from the intended NVE peer.</t>
</section> </section>
<section anchor="sec-6.4" numbered="true" toc="default">
<section title="Options Interpretation by Transit Devices" anchor="sectio <name>Options Interpretation by Transit Devices</name>
n-6.4"><t> <t>
Options, if present in the packet, are generated and terminated by tunnel endpoints. As indicated Options, if present in the packet, are generated and terminated by tunnel endpoints. As indicated
in <xref target="section-2.2.1"/>, transit devices may interpret the opti in <xref target="sec-2.2.1" format="default"/>, transit devices may inter
ons. However, pret the options. However,
if the packet is protected by tunnel endpoint to tunnel endpoint encrypti if the packet is protected by encryption from tunnel endpoint
on, for example to tunnel endpoint (for example, through IPsec), transit devices will not
through IPsec, transit devices will not have visibility into the Geneve h have visibility into the Geneve header or options
eader or options in the packet. In such cases, transit devices <bcp14>MUST</bcp14> handle
in the packet. In such cases transit devices MUST handle Geneve packets Geneve packets as any other IP packet
as any other IP packet
and maintain consistent forwarding behavior. In cases where options are i nterpreted by transit devices, the operator and maintain consistent forwarding behavior. In cases where options are i nterpreted by transit devices, the operator
MUST ensure that transit devices are trusted and not compromised. The def inition of <bcp14>MUST</bcp14> ensure that transit devices are trusted and not compr omised. The definition of
a mechanism to ensure this trust is beyond the scope of this document.</t > a mechanism to ensure this trust is beyond the scope of this document.</t >
</section> </section>
<section anchor="sec-6.5" numbered="true" toc="default">
<section title="Multicast/Broadcast" anchor="section-6.5"><t> <name>Multicast/Broadcast</name>
<t>
In typical data center networks where IP multicasting is not supported in the underlay In typical data center networks where IP multicasting is not supported in the underlay
network, multicasting may be supported using multiple unicast tunnels. Th e same security network, multicasting may be supported using multiple unicast tunnels. Th e same security
requirements as described in the above sections can be used to protect Ge neve communications requirements as described in the above sections can be used to protect Ge neve communications
between NVE peers. If IP multicasting is supported in the underlay networ k and the operator between NVE peers. If IP multicasting is supported in the underlay networ k and the operator
chooses to use it for multicast traffic among tunnel endpoints, then the operator in such chooses to use it for multicast traffic among tunnel endpoints, then the operator in such
environments may use data protection mechanisms such as IPsec with multic environments may use data protection mechanisms, such as IPsec with multi
ast cast
extensions <xref target="RFC5374"/> to protect multicast traffic among Ge extensions <xref target="RFC5374" format="default"/>, to protect multicas
neve NVE groups.</t> t traffic among Geneve NVE groups.</t>
</section> </section>
<section anchor="sec-6.6" numbered="true" toc="default">
<section title="Control Plane Communications" anchor="section-6.6"><t> <name>Control Plane Communications</name>
A Network Virtualization Authority (NVA) as outlined in <xref target="RFC <t>
8014"/> may A Network Virtualization Authority (NVA) as outlined in <xref target="RFC
8014" format="default"/> may
be used as a control plane for configuring and managing the Geneve NVEs. The data center be used as a control plane for configuring and managing the Geneve NVEs. The data center
operator is expected to use security mechanisms to protect the communicat ions between operator is expected to use security mechanisms to protect the communicat ions between
the NVA to NVEs and use authentication mechanisms to detect any rogue or compromised the NVA and NVEs and to use authentication mechanisms to detect any rogue or compromised
NVEs within their administrative domain. Data protection mechanisms for control plane NVEs within their administrative domain. Data protection mechanisms for control plane
communication or authentication mechanisms between the NVA and the NVEs a re beyond communication or authentication mechanisms between the NVA and NVEs are b eyond
the scope of this document.</t> the scope of this document.</t>
</section> </section>
</section>
</section> <section anchor="sec-7" numbered="true" toc="default">
<name>IANA Considerations</name>
<section title="IANA Considerations" anchor="section-7"><t> <t>
IANA has allocated UDP port 6081 in the Service Name and Transport Protoc IANA has allocated UDP port 6081 in the "Service Name and Transport Proto
ol col
Port Number Registry <xref target="IANA-SN"/> as the well-known destinati Port Number Registry" <xref target="IANA-SN" format="default"/> as the we
on port ll-known destination port
for Geneve based on early registration.</t> for Geneve:</t>
<dl newline="false" spacing="compact">
<t>Upon publication of this document, this registration will have its ref <dt>Service Name:</dt><dd>geneve</dd>
erence changed to cite <dt>Transport Protocol(s):</dt><dd>UDP</dd>
this document [RFC-to-be] and inline with <xref target="RFC6335"/> the as <dt>Assignee:</dt><dd>IESG &lt;iesg@ietf.org&gt;</dd>
signee and contact of the port entry should be <dt>Contact:</dt><dd>IETF Chair &lt;chair@ietf.org&gt;</dd>
changed to IESG &lt;iesg@ietf.org&gt; and IETF Chair &lt;chair@ietf.org&g <dt>Description:</dt><dd>Generic Network Virtualization Encapsulation (Geneve)</
t; respectively:</t> dd>
<dt>Reference:</dt><dd>[RFC8926]</dd>
<figure><artwork><![CDATA[ <dt>Port Number:</dt><dd>6081</dd>
Service Name: geneve </dl>
Transport Protocol(s): UDP <t>
Assignee: IESG <iesg@ietf.org> In addition, IANA has created a new subregistry titled "Geneve Option Class"
Contact: IETF Chair <chair@ietf.org> for option classes. This registry has been placed under
Description: Generic Network Virtualization Encapsulation (Geneve) a new "Network Virtualization Overlay (NVO3)" heading in the IANA protocol re
Reference: [RFC-to-be] gistries <xref target="IANA-PR" format="default"/>.
Port Number: 6081 The "Geneve Option Class" registry consists of
]]></artwork> 16-bit hexadecimal values along with descriptive strings, assignee/contact in
</figure> formation, and references.
The registration rules for the new registry are (as defined by <xref target="
<t> RFC8126" format="default"/>):</t>
In addition, IANA is requested to create a new "Geneve Option Class" <table align="center"> <name>Geneve Option Class Registry Ranges</name>
registry to allocate Option Classes. This registry is to be placed under <thead>
a new Network Virtualization Overlay (NVO3) protocols page (to be created) in <tr>
IANA protocol registries <xref target="IANA-PR"/>. <th align="left"> Range</th>
The Geneve Option Class registry shall consist of <th align="left"> Registration Procedures</th>
16-bit hexadecimal values along with descriptive strings, assignee/contact in </tr>
formation and references. </thead>
The registration rules for the new registry are (as defined by <xref target=" <tbody>
RFC8126"/>):</t> <tr>
<td align="left">0x0000-0x00FF</td>
<texttable style="full"><ttcol> Range</ttcol> <td align="left">IETF Review</td>
<ttcol> Registration Procedures</ttcol> </tr>
<c>0x0000..0x00FF</c> <tr>
<c>IETF Review</c> <td align="left">0x0100-0xFEFF</td>
<c>0x0100..0xFEFF</c> <td align="left">First Come First Served</td>
<c>First Come First Served</c> </tr>
<c>0xFF00..0xFFFF</c> <tr>
<c>Experimental Use</c> <td align="left">0xFF00-0xFFFF</td>
</texttable> <td align="left">Experimental Use</td>
</tr>
<t> </tbody>
Initial registrations in the new registry are as follows:</t> </table>
</section>
<texttable style="full"><ttcol> Option Class</ttcol>
<ttcol> Description</ttcol> <ttcol> Assignee/Contact </ttcol> <tt
col> References</ttcol>
<c>0x0100</c>
<c>Linux</c>
<c></c>
<c></c>
<c>0x0101</c>
<c>Open vSwitch (OVS)</c>
<c></c>
<c></c>
<c>0x0102</c>
<c>Open Virtual Networking (OVN)</c>
<c></c>
<c></c>
<c>0x0103</c>
<c>In-band Network Telemetry (INT)</c>
<c></c>
<c></c>
<c>0x0104</c>
<c>VMware, Inc.</c>
<c></c>
<c></c>
<c>0x0105</c>
<c>Amazon.com, Inc.</c>
<c></c>
<c></c>
<c>0x0106</c>
<c>Cisco Systems, Inc.</c>
<c></c>
<c></c>
<c>0x0107</c>
<c>Oracle Corporation</c>
<c></c>
<c></c>
<c>0x0108..0x0110</c>
<c>Amazon.com, Inc.</c>
<c></c>
<c></c>
</texttable>
</section>
<section title="Contributors" anchor="section-8"><t>
The following individuals were authors of an earlier version of this
document and made significant contributions:</t>
<figure><artwork><![CDATA[
Pankaj Garg
Microsoft Corporation
1 Microsoft Way
Redmond, WA 98052
USA
Email: pankajg@microsoft.com
Chris Wright
Red Hat Inc.
1801 Varsity Drive
Raleigh, NC 27606
USA
Email: chrisw@redhat.com
Kenneth Duda
Arista Networks
5453 Great America Parkway
Santa Clara, CA 95054
USA
Email: kduda@arista.com
Dinesh G. Dutt
Independent
Email: didutt@gmail.com
Jon Hudson
Independent
Email: jon.hudson@gmail.com
Ariel Hendel
Facebook, Inc.
1 Hacker Way
Menlo Park, CA 94025
USA
Email: ahendel@fb.com
]]></artwork>
</figure>
</section>
<section title="Acknowledgements" anchor="section-9"> </middle>
<t> <back>
The authors wish to acknowledge Puneet Agarwal, David Black, Sami Boutros
, Scott Bradner,
Martin Casado, Alissa Cooper, Roman Danyliw, Bruce Davie, Anoop Ghanwani,
Benjamin Kaduk,
Suresh Krishnan, Mirja Kuhlewind, Barry Leiba, Daniel Migault, Greg Mirks
y, Tal Mizrahi,
Kathleen Moriarty, Magnus Nystrom, Adam Roach, Sabrina Tanamal, Dave Thal
er, Eric Vyncke,
Magnus Westerlund and many other members of the NVO3 WG for their reviews
, comments and suggestions.</t>
<t> <displayreference target="I-D.ietf-nvo3-encap" to="NVO3-ENCAP"/>
The authors would like to thank Sam Aldrin, Alia Atlas, Matthew Bocci, Be <displayreference target="I-D.ietf-nvo3-dataplane-requirements" to="NVO3-DATAPLA
nson Schliesser, and Martin Vigoureux NE"/>
for their guidance throughout the process.</t> <displayreference target="I-D.ietf-intarea-tunnels" to="INTAREA-TUNNELS"/>
</section> <references>
<name>References</name>
<references>
<name>Normative References</name>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.0768.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.0792.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.1122.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.1191.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.2003.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.2119.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.4443.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.6040.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.6936.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.7365.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8085.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8126.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8174.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8200.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8201.xml"/>
</references>
<references>
<name>Informative References</name>
</middle> <reference anchor="ETYPES" target="https://www.iana.org/assignments/ieee
-802-numbers">
<front>
<title>IEEE 802 Numbers</title>
<author>
<organization>IANA</organization>
</author>
</front>
</reference>
<back> <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/refere
<references title="Normative References"> nce.I-D.ietf-nvo3-encap.xml"/>
&RFC0768;
&RFC0792;
&RFC1112;
&RFC1191;
&RFC2003;
&RFC2119;
&RFC4443;
&RFC6040;
&RFC6936;
&RFC7365;
&RFC8085;
&RFC8126;
&RFC8174;
&RFC8200;
&RFC8201;
</references>
<references title="Informative References">
<reference anchor="ETYPES" <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/refere
target="https://www.iana.org/assignments/ieee-802-numbers"> nce.I-D.ietf-nvo3-dataplane-requirements.xml"/>
<front>
<title>IEEE 802 Numbers</title>
<author>
<organization>The IEEE Registration Authority</organization>
</author>
<date/>
</front>
</reference>
&I-D.ietf-nvo3-encap; <xi:include href="https://datatracker.ietf.org/public/rfc/bibxml3/refere
&I-D.ietf-nvo3-dataplane-requirements; nce.I-D.ietf-intarea-tunnels.xml"/>
&I-D.ietf-intarea-tunnels;
<reference anchor="IANA-PR" <reference anchor="IANA-PR" target="https://www.iana.org/protocols">
target="https://www.iana.org/protocols"> <front>
<front>
<title>Protocol Registries</title> <title>Protocol Registries</title>
<author> <author>
<organization>IANA</organization> <organization>IANA</organization>
</author> </author>
<date/> </front>
</front> </reference>
</reference>
<reference anchor="IANA-SN" <reference anchor="IANA-SN" target="https://www.iana.org/assignments/ser
target="https://www.iana.org/assignments/service-names-port-numbers"> vice-names-port-numbers">
<front> <front>
<title>Service Name and Transport Protocol Port Number Registry</tit le> <title>Service Name and Transport Protocol Port Number Registry</tit le>
<author> <author>
<organization>IANA</organization> <organization>IANA</organization>
</author> </author>
<date/> </front>
</front> </reference>
</reference>
<!--&IEEE.802.1Q_2014;-->
<reference anchor='IEEE.802.1Q_2018' target='http://ieeexplore.ieee.org/servlet/
opac?punumber=8403925'>
<front>
<title>IEEE Standard for Local and Metropolitan Area Networks--Bridges and Bri
dged Networks</title>
<author>
<organization>IEEE</organization>
</author>
<date day='06' month='July' year='2018' />
<abstract><t>This standard specifies how the Media Access Control (MAC) Servic
e is supported by Bridged Networks,
the principles of operation of those networks, and the operation of MAC B
ridges and VLAN Bridges,
including management, protocols, and algorithms</t>
</abstract>
</front>
<seriesInfo name='IEEE' value='802.1Q-2018' />
<seriesInfo name='DOI' value='10.1109/ieeestd.2018.8403927' />
</reference>
&RFC2983; <reference anchor="IEEE.802.1Q_2018" target="http://ieeexplore.ieee.org/servlet/
&RFC3031; opac?punumber=8403925">
&RFC3552; <front>
&RFC3985; <title>IEEE Standard for Local and Metropolitan Area Networks--Bridg
&RFC4301; es and Bridged Networks</title>
&RFC5374; <seriesInfo name="DOI" value="10.1109/IEEESTD.2018.8403927"/>
&RFC6335; <seriesInfo name="IEEE" value="802.1Q-2018"/>
&RFC6438; <author>
&RFC7348; <organization>IEEE</organization>
&RFC7637; </author>
&RFC8014; <date month="July" year="2018"/>
&RFC8086; </front>
&RFC8293; </reference>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.2983.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.3031.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.3552.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.3985.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.4301.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.5374.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.6438.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.7348.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.7637.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8014.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8086.xml"/>
<xi:include href="https://xml2rfc.ietf.org/public/rfc/bibxml/reference.R
FC.8293.xml"/>
<reference anchor="VL2" <reference anchor="VL2" target="https://dl.acm.org/doi/10.1145/1594977.1
target="https://www.sigcomm.org/sites/default/files/ccr/papers/2009/Octo 592576">
ber/1594977-1592576.pdf"> <front>
<front>
<title>VL2: A Scalable and Flexible Data Center Network</title> <title>VL2: A Scalable and Flexible Data Center Network</title>
<seriesInfo name="DOI" value="10.1145/1594977.1592576"/>
<author surname="Greenberg, A., et al."> <author surname="Greenberg, A., et al.">
<organization></organization> <organization/>
</author> </author>
<date month="August" year="2009"/>
</front>
<refcontent>ACM SIGCOMM Computer Communication Review</refcontent>
</reference>
</references>
</references>
<section anchor="sec-9" numbered="false" toc="default">
<name>Acknowledgements</name>
<t>
The authors wish to acknowledge <contact fullname="Puneet Agarwal"/>,
<contact fullname="David Black"/>, <contact fullname="Sami Boutros"/>,
<contact fullname="Scott Bradner"/>,
<contact fullname="Martín Casado"/>, <contact fullname="Alissa Cooper"/>,
<contact fullname="Roman Danyliw"/>, <contact fullname="Bruce Davie"/>,
<contact fullname="Anoop Ghanwani"/>, <contact fullname="Benjamin
Kaduk"/>, <contact fullname="Suresh Krishnan"/>, <contact
fullname="Mirja Kühlewind"/>, <contact fullname="Barry Leiba"/>,
<contact fullname="Daniel Migault"/>, <contact fullname="Greg
Mirksy"/>, <contact fullname="Tal Mizrahi"/>,
<contact fullname="Kathleen Moriarty"/>, <contact fullname="Magnus
Nyström"/>, <contact fullname="Adam Roach"/>, <contact fullname="Sabrin
a
Tanamal"/>, <contact fullname="Dave Thaler"/>, <contact fullname="Éric
Vyncke"/>,
<contact fullname="Magnus Westerlund"/>, and many other members of the NV
O3 Working Group for their reviews, comments, and suggestions.</t>
<t>
The authors would like to thank <contact fullname="Sam Aldrin"/>,
<contact fullname="Alia Atlas"/>, <contact fullname="Matthew Bocci"/>,
<contact fullname="Benson Schliesser"/>, and <contact fullname="Martin
Vigoureux"/>
for their guidance throughout the process.</t>
</section>
<date year="2009" /> <section anchor="sec-8" numbered="false" toc="default">
</front> <name>Contributors</name>
<seriesInfo name="ACM SIGCOMM" value="Computer Communication <t>
Review"/> The following individuals were authors of an earlier version of this
<seriesInfo name="DOI" value="10.1145/1594977.1592576"/> document and made significant contributions:</t>
</reference>
</references> <contact fullname="Pankaj Garg" >
</back> <organization>Microsoft Corporation</organization>
<address>
<postal>
<street>1 Microsoft Way</street>
<city>Redmond</city>
<region>WA</region><code>98052</code>
<country>United States of America</country>
</postal>
<email>pankajg@microsoft.com</email>
</address>
</contact>
</rfc> <contact fullname="Chris Wright" >
<organization>Red Hat Inc.</organization>
<address>
<postal>
<street>1801 Varsity Drive</street>
<city>Raleigh</city>
<region>NC</region><code>27606</code>
<country>United States of America</country>
</postal>
<email>chrisw@redhat.com</email>
</address>
</contact>
<contact fullname="Kenneth Duda" >
<organization>Arista Networks</organization>
<address>
<postal>
<street>5453 Great America Parkway</street>
<city>Santa Clara</city>
<region>CA</region><code>95054</code>
<country>United States of America</country>
</postal>
<email>kduda@arista.com</email>
</address>
</contact>
<contact fullname="Dinesh G. Dutt" >
<organization>Independent</organization>
<address>
<postal>
<street></street>
<city></city>
<region></region><code></code>
<country></country>
</postal>
<email>didutt@gmail.com</email>
</address>
</contact>
<contact fullname="Jon Hudson" >
<organization>Independent</organization>
<address>
<postal>
<street></street>
<city></city>
<region></region><code></code>
<country></country>
</postal>
<email>jon.hudson@gmail.com</email>
</address>
</contact>
<contact fullname="Ariel Hendel" >
<organization>Facebook, Inc.</organization>
<address>
<postal>
<street>1 Hacker Way</street>
<city>Menlo Park</city>
<region>CA</region><code>94025</code>
<country>United States of America</country>
</postal>
<email>ahendel@fb.com</email>
</address>
</contact>
</section>
</back>
</rfc>
 End of changes. 253 change blocks. 
1306 lines changed or deleted 1195 lines changed or added

This html diff was produced by rfcdiff 1.48. The latest version is available from http://tools.ietf.org/tools/rfcdiff/