| rfc9406xml2.original.xml | rfc9406.xml | |||
|---|---|---|---|---|
| <?xml version="1.0"?> | <?xml version="1.0" encoding="UTF-8"?> | |||
| <!DOCTYPE rfc SYSTEM "rfc2629.dtd"[ | ||||
| <!ENTITY rfc2119 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!-- [CS] updated by Chris 03/15/23 --> | |||
| ence.RFC.2119.xml"> | ||||
| <!ENTITY rfc5681 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!DOCTYPE rfc [ | |||
| ence.RFC.5681.xml"> | <!ENTITY nbsp " "> | |||
| <!ENTITY rfc8312 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!ENTITY zwsp "​"> | |||
| ence.RFC.8312.xml"> | <!ENTITY nbhy "‑"> | |||
| <!ENTITY rfc9002 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | <!ENTITY wj "⁠"> | |||
| ence.RFC.9002.xml"> | ||||
| <!ENTITY rfc9260 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.9260.xml"> | ||||
| <!ENTITY rfc8174 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.8174.xml"> | ||||
| <!ENTITY rfc1191 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.1191.xml"> | ||||
| <!ENTITY rfc1122 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.1122.xml"> | ||||
| <!ENTITY rfc4821 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/refer | ||||
| ence.RFC.4821.xml"> | ||||
| ]> | ]> | |||
| <?rfc toc='yes' ?> | ||||
| <?rfc symrefs='yes' ?> | ||||
| <?rfc sortrefs='yes'?> | ||||
| <?rfc compact='yes'?> | ||||
| <?rfc comments="yes"?> | ||||
| <?rfc inline="yes" ?> | ||||
| <!-- <?rfc-ext parse-xml-in-artwork='yes' ?> --> | ||||
| <!-- <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> --> | ||||
| <rfc docName="draft-ietf-tcpm-hystartplusplus-14" category="std" ipr="trust20090 | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" docName="draft-ietf-tcpm-hystart | |||
| 2"> | plusplus-14" number="9406" submissionType="IETF" category="std" consensus="true" | |||
| ipr="trust200902" obsoletes="" updates="" xml:lang="en" tocInclude="true" symRe | ||||
| fs="true" sortRefs="true" version="3"> | ||||
| <!-- xml2rfc v2v3 conversion 3.16.0 --> | ||||
| <front> | <front> | |||
| <title abbrev='HyStart++'>HyStart++: Modified Slow Start for& | <title abbrev="HyStart++">HyStart++: Modified Slow Start for TCP</title> | |||
| nbsp;TCP</title> | <seriesInfo name="RFC" value="9406"/> | |||
| <author initials='P.' surname='Balasubramanian' fullname='Praveen Balasubram | <author initials="P." surname="Balasubramanian" fullname="Praveen Balasubram | |||
| anian'> | anian"> | |||
| <organization>Confluent</organization> | <organization>Confluent</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>899 West Evelyn Ave</street> | <street>899 West Evelyn Ave</street> | |||
| <city>Mountain View</city> | <city>Mountain View</city> | |||
| <region>CA</region> | <region>CA</region> | |||
| <code>94041</code> | <code>94041</code> | |||
| <country>USA</country> | <country>United States of America</country> | |||
| </postal> | </postal> | |||
| <email>pravb.ietf@gmail.com</email> | <email>pravb.ietf@gmail.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials='Y.' surname='Huang' fullname='Yi Huang'> | <author initials="Y." surname="Huang" fullname="Yi Huang"> | |||
| <organization>Microsoft</organization> | <organization>Microsoft</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>One Microsoft Way</street> | <street>One Microsoft Way</street> | |||
| <city>Redmond</city> | <city>Redmond</city> | |||
| <region>WA</region> | <region>WA</region> | |||
| <code>94052</code> | <code>98052</code> | |||
| <country>USA</country> | <country>United States of America</country> | |||
| </postal> | </postal> | |||
| <phone>+1 425 703 0447</phone> | <phone>+1 425 703 0447</phone> | |||
| <email>huanyi@microsoft.com</email> | <email>huanyi@microsoft.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials='M.' surname='Olson' fullname='Matt Olson'> | <author initials="M." surname="Olson" fullname="Matt Olson"> | |||
| <organization>Microsoft</organization> | <organization>Microsoft</organization> | |||
| <address> | <address> | |||
| <postal> | ||||
| <street>One Microsoft Way</street> | ||||
| <city>Redmond</city> | ||||
| <region>WA</region> | ||||
| <code>98052</code> | ||||
| <country>United States of America</country> | ||||
| </postal> | ||||
| <phone>+1 425 538 8598</phone> | <phone>+1 425 538 8598</phone> | |||
| <email>maolson@microsoft.com</email> | <email>maolson@microsoft.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <date/> | <date year="2023" month="May" /> | |||
| <area>Transport</area> | <area>tsv</area> | |||
| <workgroup>tcpm</workgroup> | ||||
| <keyword>TCP</keyword> | <keyword>TCP</keyword> | |||
| <keyword>congestion control</keyword> | <keyword>congestion control</keyword> | |||
| <abstract> | <abstract> | |||
| <t> This document describes HyStart++, a simple modification to | <t> This document describes HyStart++, a simple modification to | |||
| the slow start phase of congestion control algorithms. | the slow start phase of congestion control algorithms. | |||
| Slow start can overshoot the ideal send rate | Slow start can overshoot the ideal send rate | |||
| in many cases, causing high packet loss and poor performance. | in many cases, causing high packet loss and poor performance. | |||
| HyStart++ uses increase in round-trip delay as a heuristic to | HyStart++ uses increase in round-trip delay as a heuristic to | |||
| find an exit point before possible overshoot. | find an exit point before possible overshoot. | |||
| It also adds a mitigation to prevent jitter from causing | It also adds a mitigation to prevent jitter from causing | |||
| skipping to change at line 75 ¶ | skipping to change at line 76 ¶ | |||
| the slow start phase of congestion control algorithms. | the slow start phase of congestion control algorithms. | |||
| Slow start can overshoot the ideal send rate | Slow start can overshoot the ideal send rate | |||
| in many cases, causing high packet loss and poor performance. | in many cases, causing high packet loss and poor performance. | |||
| HyStart++ uses increase in round-trip delay as a heuristic to | HyStart++ uses increase in round-trip delay as a heuristic to | |||
| find an exit point before possible overshoot. | find an exit point before possible overshoot. | |||
| It also adds a mitigation to prevent jitter from causing | It also adds a mitigation to prevent jitter from causing | |||
| premature slow start exit. | premature slow start exit. | |||
| </t> | </t> | |||
| </abstract> | </abstract> | |||
| </front> | </front> | |||
| <middle> | <middle> | |||
| <section title='Introduction'> | <section numbered="true" toc="default"> | |||
| <t> <xref target="RFC5681"/> describes the slow start | <name>Introduction</name> | |||
| <t> <xref target="RFC5681" format="default"/> describes the slow start | ||||
| congestion control algorithm for TCP. The slow start | congestion control algorithm for TCP. The slow start | |||
| algorithm is used when the congestion window (cwnd) | algorithm is used when the congestion window (cwnd) | |||
| is less than the slow start threshold (ssthresh). | is less than the slow start threshold (ssthresh). | |||
| During slow start, in absence of packet loss signals, | During slow start, in the absence of packet loss signals, | |||
| TCP increases cwnd exponentially to probe the network capacity. | TCP increases the cwnd exponentially to probe the network capacity. | |||
| This fast growth can overshoot the ideal sending rate | This fast growth can overshoot the ideal sending rate | |||
| and cause significant packet loss which cannot always | and cause significant packet loss that cannot always | |||
| be recovered efficiently. | be recovered efficiently. | |||
| </t> | </t> | |||
| <t> HyStart++ uses increase in round-trip delay as a signal to exit | <t>HyStart++ builds upon Hybrid Start (HyStart), originally described in | |||
| <xref target="HyStart" format="default"/>. HyStart++ uses increase in | ||||
| round-trip delay as a signal to exit | ||||
| slow start before potential packet loss occurs as a result | slow start before potential packet loss occurs as a result | |||
| of overshoot. This is one of two algorithms specified in | of overshoot. This is one of two algorithms specified in | |||
| <xref target="HyStart"/>. | <xref target="HyStart" format="default"/> for finding a safe exit point fo | |||
| After the slow start exit, a new | r | |||
| slow start. After the slow start exit, a new | ||||
| Conservative Slow Start (CSS) phase is used to determine | Conservative Slow Start (CSS) phase is used to determine | |||
| whether the slow start exit was premature and to resume | whether the slow start exit was premature and to resume | |||
| slow start. This mitigation improves performance in | slow start. This mitigation improves performance in the | |||
| presence of jitter. | presence of jitter. | |||
| HyStart++ reduces packet loss and retransmissions, and | HyStart++ reduces packet loss and retransmissions, and | |||
| improves goodput in lab measurements and real world | improves goodput in lab measurements and real-world | |||
| deployments. | deployments. | |||
| </t> | </t> | |||
| <t> While this document describes Hystart++ for TCP, it can | <t> While this document describes HyStart++ for TCP, it can | |||
| also be used for other transport protocols which use slow start | also be used for other transport protocols that use slow start, | |||
| such as QUIC <xref target="RFC9002"/> | such as QUIC <xref target="RFC9002" format="default"/> | |||
| or SCTP <xref target="RFC9260"/>. | or the Stream Control Transmission Protocol (SCTP) <xref target="RFC9260" | |||
| </t> | format="default"/>. | |||
| </section> | ||||
| <section title="Terminology" anchor="term"> | ||||
| <t> The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", | ||||
| "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", | ||||
| "NOT RECOMMENDED", "MAY", | ||||
| and "OPTIONAL" in this document are to be interpreted | ||||
| as described in BCP 14 | ||||
| <xref target="RFC2119"/> <xref target="RFC8174"/> when, | ||||
| and only when, they appear in all capitals, as shown here. | ||||
| </t> | </t> | |||
| </section> | </section> | |||
| <section anchor="term" numbered="true" toc="default"> | ||||
| <section title='Definitions'> | <name>Terminology</name> | |||
| <t> We repeat here some definition from | <t>The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
| <xref target="RFC5681"/> to aid the reader. | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", | |||
| "<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>", | ||||
| "<bcp14>SHOULD NOT</bcp14>", | ||||
| "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | ||||
| "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document | ||||
| are to be interpreted as described in BCP 14 | ||||
| <xref target="RFC2119"/> <xref target="RFC8174"/> when, and only | ||||
| when, they appear in all capitals, as shown here.</t> | ||||
| </section> | ||||
| <section numbered="true" toc="default"> | ||||
| <name>Definitions</name> | ||||
| <t> To aid the reader, we repeat some definitions from | ||||
| <xref target="RFC5681" format="default"/>: | ||||
| </t> | </t> | |||
| <t> SENDER MAXIMUM SEGMENT SIZE (SMSS): | <dl spacing="normal" newline="false"> | |||
| The SMSS is the size of the | <dt> SENDER MAXIMUM SEGMENT SIZE (SMSS):</dt> | |||
| <dd>The size of the | ||||
| largest segment that the sender can transmit. This value can be | largest segment that the sender can transmit. This value can be | |||
| based on the maximum transmission unit of the network, the path | based on the maximum transmission unit of the network, the Path | |||
| MTU discovery <xref target="RFC1191"/>, | MTU Discovery algorithm <xref target="RFC1191" format="default"/> | |||
| <xref target="RFC4821"/> algorithm, RMSS (see next item), | <xref target="RFC4821" format="default"/>, RMSS (see next item), | |||
| or other factors. The size does not include the TCP/IP headers | or other factors. The size does not include the TCP/IP headers | |||
| and options. | and options.</dd> | |||
| </t> | ||||
| <t> RECEIVER MAXIMUM SEGMENT SIZE (RMSS): The RMSS is the | <dt> RECEIVER MAXIMUM SEGMENT SIZE (RMSS):</dt><dd>The | |||
| size of the largest segment the receiver is willing to accept. | size of the largest segment that the receiver is willing to accept. | |||
| This is the value specified in the MSS option sent by the | This is the value specified in the MSS option sent by the | |||
| receiver during connection startup. Or, if the MSS option | receiver during connection startup. Or, if the MSS option | |||
| is not used, it is 536 bytes <xref target="RFC1122"/>. | is not used, it is 536 bytes <xref target="RFC1122" format="default"/>. | |||
| The size does not include the TCP/IP headers and | The size does not include the TCP/IP headers and | |||
| options. | options.</dd> | |||
| </t> | ||||
| <t> RECEIVER WINDOW (rwnd): The most recently advertised | <dt> RECEIVER WINDOW (rwnd):</dt><dd>The most recently advertised | |||
| receiver window. | receiver window.</dd> | |||
| </t> | ||||
| <t> CONGESTION WINDOW (cwnd): A TCP state variable that | <dt> CONGESTION WINDOW (cwnd):</dt><dd>A TCP state variable that | |||
| limits the amount of data a TCP can send. | limits the amount of data a TCP can send. | |||
| At any given time, a TCP MUST NOT send | At any given time, a TCP <bcp14>MUST NOT</bcp14> send | |||
| data with a sequence number higher than the sum of the highest | data with a sequence number higher than the sum of the highest | |||
| acknowledged sequence number and the minimum of cwnd and rwnd. | acknowledged sequence number and the minimum of the cwnd and rwnd.</dd> | |||
| </t> | </dl> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='HyStart++ Algorithm'> | <name>HyStart++ Algorithm</name> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='Summary'> | <name>Summary</name> | |||
| <t> <xref target="HyStart"/> specifies two algorithms | <t> <xref target="HyStart" format="default"/> specifies two algorithms | |||
| (a "Delay Increase" algorithm and an "Inter-Packet Arrival" | (a "Delay Increase" algorithm and an "Inter-Packet Arrival" | |||
| algorithm) to be run in parallel to detect that the sending | algorithm) to be run in parallel to detect that the sending | |||
| rate has reached capacity. In practice, the Inter-Packet | rate has reached capacity. In practice, the Inter-Packet | |||
| Arrival algorithm does not perform well and is not able | Arrival algorithm does not perform well and is not able | |||
| to detect congestion early, primarily due to ACK compression. | to detect congestion early, primarily due to ACK compression. | |||
| The idea of the Delay Increase algorithm is to look for | The idea of the Delay Increase algorithm is to look for | |||
| spikes in RTT (round-trip time), which suggest that the | spikes in RTT (round-trip time), which suggest that the | |||
| bottleneck buffer is filling up. | bottleneck buffer is filling up. | |||
| </t> | </t> | |||
| <t> In HyStart++, a TCP sender uses traditional slow start | <t> In HyStart++, a TCP sender uses standard slow start | |||
| and then uses the "Delay Increase" algorithm to trigger an | and then uses the Delay Increase algorithm to trigger an | |||
| exit from slow start. But instead of going straight from | exit from slow start. But instead of going straight from | |||
| slow start to congestion avoidance, the sender spends a | slow start to congestion avoidance, the sender spends a | |||
| number of RTTs in a Conservative Slow Start (CSS) phase | number of RTTs in a Conservative Slow Start (CSS) phase | |||
| to determine whether the exit from slow start was premature. | to determine whether the exit from slow start was premature. | |||
| During CSS, the congestion window is grown exponentially like | During CSS, the congestion window is grown exponentially in a | |||
| in regular slow start, but with a smaller exponential base, | fashion similar to regular slow start, but with a smaller exponential ba | |||
| se, | ||||
| resulting in less aggressive growth. | resulting in less aggressive growth. | |||
| If the RTT reduces during CSS, it's concluded that the RTT | If the RTT reduces during CSS, it's concluded that the RTT | |||
| spike was not related to congestion caused by the connection | spike was not related to congestion caused by the connection | |||
| sending at a rate greater than the ideal send rate, and the | sending at a rate greater than the ideal send rate, and the | |||
| connection resumes slow start. If the RTT inflation | connection resumes slow start. If the RTT inflation | |||
| persists throughout CSS, the connection enters congestion | persists throughout CSS, the connection enters congestion | |||
| avoidance. | avoidance. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='Algorithm Details'> | <name>Algorithm Details</name> | |||
| <t> The following pseudocode uses a limit, L, to control the | <t> The following pseudocode uses a limit, L, to control the | |||
| aggressiveness of the cwnd increase during both standard slow | aggressiveness of the cwnd increase during both standard slow | |||
| start and CSS. While an arriving ACK may newly acknowledge an | start and CSS. While an arriving ACK may newly acknowledge an | |||
| arbitrary number of bytes, the Hystart++ algorithm limits the | arbitrary number of bytes, the HyStart++ algorithm limits the | |||
| number of those bytes applied to increase the cwnd to L*SMSS bytes. </t> | number of those bytes applied to increase the cwnd to L*SMSS bytes. </t> | |||
| <t> lastRoundMinRTT and currentRoundMinRTT are initialized | <t> lastRoundMinRTT and currentRoundMinRTT are initialized | |||
| to infinity at the initialization time. currRTT is the RTT | to infinity at the initialization time. currRTT is the RTT | |||
| sampled from the latest incoming ACK and initialized to | sampled from the latest incoming ACK and initialized to | |||
| infinity. </t> | infinity. </t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| lastRoundMinRTT = infinity | lastRoundMinRTT = infinity | |||
| currentRoundMinRTT = infinity | currentRoundMinRTT = infinity | |||
| currRTT = infinity | currRTT = infinity | |||
| </sourcecode> | </sourcecode> | |||
| <t>HyStart++ measures rounds using sequence numbers, as | ||||
| <t>Hystart++ measures rounds using sequence numbers, as | follows:</t> | |||
| follows: | <ul spacing="normal"> | |||
| Define windowEnd as a sequence number initialized to SND.NXT. | <li>Define windowEnd as a sequence number initialized to SND.NXT.</li> | |||
| When windowEnd is ACKed, the current round ends and windowEnd | <li>When windowEnd is ACKed, the current round ends and windowEnd | |||
| is set to SND.NXT.</t> | is set to SND.NXT.</li> | |||
| </ul> | ||||
| <t> At the start of each round during standard slow start | <t> At the start of each round during standard slow start | |||
| (<xref target="RFC5681"/>) and CSS, initialize the variables | <xref target="RFC5681" format="default"/> and CSS, initialize the variab | |||
| used to compute last round and current round's minimum RTT: | les | |||
| <sourcecode type="pseudocode"> | used to compute the last round's and current round's minimum RTT: | |||
| </t> | ||||
| <sourcecode> | ||||
| lastRoundMinRTT = currentRoundMinRTT | lastRoundMinRTT = currentRoundMinRTT | |||
| currentRoundMinRTT = infinity | currentRoundMinRTT = infinity | |||
| rttSampleCount = 0 | rttSampleCount = 0 | |||
| </sourcecode> | </sourcecode> | |||
| </t> | ||||
| <t> For each arriving ACK in slow start, where N is the | <t> For each arriving ACK in slow start, where N is the | |||
| number of previously unacknowledged bytes acknowledged | number of previously unacknowledged bytes acknowledged | |||
| in the arriving ACK: </t> | in the arriving ACK: </t> | |||
| <t>Update the cwnd: </t> | <t>Update the cwnd: </t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| cwnd = cwnd + min(N, L * SMSS) | cwnd = cwnd + min(N, L * SMSS) | |||
| </sourcecode> | </sourcecode> | |||
| <t> Keep track of minimum observed RTT: </t> | <t> Keep track of the minimum observed RTT: </t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | |||
| rttSampleCount += 1 | rttSampleCount += 1 | |||
| </sourcecode> | </sourcecode> | |||
| <t>For rounds where at least N_RTT_SAMPLE RTT samples have been | <t>For rounds where at least N_RTT_SAMPLE RTT samples have been | |||
| obtained and currentRoundMinRTT and lastRoundMinRTT are valid, | obtained and currentRoundMinRTT and lastRoundMinRTT are valid, | |||
| check if delay increase triggers slow start exit:</t> | check to see if delay increase triggers slow start exit:</t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| if ((rttSampleCount >= N_RTT_SAMPLE) AND | if ((rttSampleCount >= N_RTT_SAMPLE) AND | |||
| (currentRoundMinRTT != infinity) AND | (currentRoundMinRTT != infinity) AND | |||
| (lastRoundMinRTT != infinity)) | (lastRoundMinRTT != infinity)) | |||
| Compute a RTT Threshold clamped between MIN_RTT_THRESH and MAX_RTT_THRESH | RttThresh = max(MIN_RTT_THRESH, | |||
| RttThresh = max(MIN_RTT_THRESH, min(lastRoundMinRTT / MIN_RTT_DIVISOR, MAX_RTT | min(lastRoundMinRTT / MIN_RTT_DIVISOR, MAX_RTT_THRESH)) | |||
| _THRESH)) | ||||
| if (currentRoundMinRTT >= (lastRoundMinRTT + RttThresh)) | if (currentRoundMinRTT >= (lastRoundMinRTT + RttThresh)) | |||
| cssBaselineMinRtt = currentRoundMinRTT | cssBaselineMinRtt = currentRoundMinRTT | |||
| exit slow start and enter CSS | exit slow start and enter CSS | |||
| </sourcecode> | </sourcecode> | |||
| <t> For each arriving ACK in CSS, where N is the number | <t> For each arriving ACK in CSS, where N is the number | |||
| of previously unacknowledged bytes acknowledged in | of previously unacknowledged bytes acknowledged in | |||
| the arriving ACK:</t> | the arriving ACK:</t> | |||
| <t> Update the cwnd: </t> | <t> Update the cwnd: </t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| cwnd = cwnd + (min(N, L * SMSS) / CSS_GROWTH_DIVISOR) | cwnd = cwnd + (min(N, L * SMSS) / CSS_GROWTH_DIVISOR) | |||
| </sourcecode> | </sourcecode> | |||
| <t> Keep track of minimum observed RTT: </t> | <t> Keep track of the minimum observed RTT: </t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | currentRoundMinRTT = min(currentRoundMinRTT, currRTT) | |||
| rttSampleCount += 1 | rttSampleCount += 1 | |||
| </sourcecode> | </sourcecode> | |||
| <t> For CSS rounds where at least N_RTT_SAMPLE RTT | <t> For CSS rounds where at least N_RTT_SAMPLE RTT | |||
| samples have been obtained, check if current round's | samples have been obtained, check to see if the current round's | |||
| minRTT drops below baseline indicating that HyStart | minRTT drops below baseline (cssBaselineMinRtt) indicating that | |||
| exit was spurious: | slow start exit was spurious: | |||
| </t> | </t> | |||
| <sourcecode type="pseudocode"> | <sourcecode> | |||
| if (currentRoundMinRTT < cssBaselineMinRtt) | if (currentRoundMinRTT < cssBaselineMinRtt) | |||
| cssBaselineMinRtt = infinity | cssBaselineMinRtt = infinity | |||
| resume slow start including HyStart++ | resume slow start including HyStart++ | |||
| </sourcecode> | </sourcecode> | |||
| <t> CSS lasts at most CSS_ROUNDS rounds. If the transition | <t> CSS lasts at most CSS_ROUNDS rounds. If the transition | |||
| into CSS happens in the middle of a round, that partial | into CSS happens in the middle of a round, that partial | |||
| round counts towards the limit. </t> | round counts towards the limit. </t> | |||
| <t> If CSS_ROUNDS rounds are complete, | <t> If CSS_ROUNDS rounds are complete, | |||
| enter congestion avoidance by setting ssthresh to current cwnd. </t> | enter congestion avoidance by setting the ssthresh to the current cwnd. | |||
| <sourcecode type="pseudocode"> | </t> | |||
| <sourcecode> | ||||
| ssthresh = cwnd | ssthresh = cwnd | |||
| </sourcecode> | </sourcecode> | |||
| <t> If loss or Explicit Congestion Notification (ECN) marking is observe | ||||
| <t> If loss or ECN-marking is observed anytime during | d at any time during | |||
| standard slow start or CSS, enter congestion avoidance | standard slow start or CSS, enter congestion avoidance | |||
| by setting ssthresh to current cwnd. </t> | by setting the ssthresh to the current cwnd. | |||
| <sourcecode type="pseudocode"> | </t> | |||
| <sourcecode> | ||||
| ssthresh = cwnd | ssthresh = cwnd | |||
| </sourcecode> | </sourcecode> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='Tuning constants and other considerations'> | <name>Tuning Constants and Other Considerations</name> | |||
| <t> It is RECOMMENDED that a HyStart++ implementation use | <t> It is <bcp14>RECOMMENDED</bcp14> that a HyStart++ implementation use | |||
| the following constants: | the following constants: | |||
| <sourcecode type="pseudocode"> | </t> | |||
| <sourcecode> | ||||
| MIN_RTT_THRESH = 4 msec | MIN_RTT_THRESH = 4 msec | |||
| MAX_RTT_THRESH = 16 msec | MAX_RTT_THRESH = 16 msec | |||
| MIN_RTT_DIVISOR = 8 | MIN_RTT_DIVISOR = 8 | |||
| N_RTT_SAMPLE = 8 | N_RTT_SAMPLE = 8 | |||
| CSS_GROWTH_DIVISOR = 4 | CSS_GROWTH_DIVISOR = 4 | |||
| CSS_ROUNDS = 5 | CSS_ROUNDS = 5 | |||
| L = infinity if paced, L = 8 if non-paced | L = infinity if paced, L = 8 if non-paced | |||
| </sourcecode> | </sourcecode> | |||
| </t> | <t> These constants have been determined with lab measurements | |||
| <t> These constants have been determined with lab measurements | and real-world deployments. An implementation <bcp14>MAY</bcp14> tune them | |||
| and real world deployments. An implementation MAY tune them for | for | |||
| different network characteristics. | different network characteristics. | |||
| </t> | </t> | |||
| <t> The delay increase sensitivity is determined | <t> The delay increase sensitivity is determined | |||
| by MIN_RTT_THRESH and MAX_RTT_THRESH. Smaller values of | by MIN_RTT_THRESH and MAX_RTT_THRESH. Smaller values of | |||
| MIN_RTT_THRESH may cause spurious exits from slow start. Larger | MIN_RTT_THRESH may cause spurious exits from slow start. Larger | |||
| values of MAX_RTT_THRESH may result in slow start not exiting | values of MAX_RTT_THRESH may result in slow start not exiting | |||
| until loss is encountered for connections on large RTT paths. | until loss is encountered for connections on large RTT paths. | |||
| </t> | </t> | |||
| <t>MIN_RTT_DIVISOR is a fraction of RTT to compute delay threshold. | <t>MIN_RTT_DIVISOR is a fraction of RTT to compute the delay threshold. | |||
| A smaller value would mean a bigger threshold and thus less sensitive to | A smaller value would mean a larger threshold and thus less sensitivity to | |||
| delay increase, and vice versa. | delay increase, and vice versa. | |||
| </t> | </t> | |||
| <t> While all TCP implementations are REQUIRED to take at least one RTT | <t> While all TCP implementations are <bcp14>REQUIRED</bcp14> to take at | |||
| sample each round, implementations of HyStart++ are RECOMMENDED to take | least one RTT | |||
| sample each round, implementations of HyStart++ are <bcp14>RECOMMENDED</bc | ||||
| p14> to take | ||||
| at least N_RTT_SAMPLE RTT samples. Using lower values of N_RTT_SAMPLE will | at least N_RTT_SAMPLE RTT samples. Using lower values of N_RTT_SAMPLE will | |||
| lower the accuracy of the measured RTT for the round; | lower the accuracy of the measured RTT for the round; | |||
| higher values will improve accuracy at the cost of more | higher values will improve accuracy at the cost of more | |||
| processing. | processing. | |||
| </t> | </t> | |||
| <t> The minimum value of CSS_GROWTH_DIVISOR MUST be at least 2. | <t> The minimum value of CSS_GROWTH_DIVISOR <bcp14>MUST</bcp14> be at le | |||
| ast 2. | ||||
| A value of 1 results in the same aggressive behavior as regular | A value of 1 results in the same aggressive behavior as regular | |||
| slow start. Values larger than 4 | slow start. Values larger than 4 | |||
| will cause the algorithm to be less aggressive and maybe less | will cause the algorithm to be less aggressive and maybe less | |||
| performant. | performant. | |||
| </t> | </t> | |||
| <t> Smaller values of CSS_ROUNDS may miss detecting jitter | <t> Smaller values of CSS_ROUNDS may miss detecting jitter, | |||
| and larger values may limit performance. | and larger values may limit performance. | |||
| </t> | </t> | |||
| <t> Packet pacing <xref target="ASA00"/> is a possible mechanism to | <t> Packet pacing <xref target="ASA00" format="default"/> is a possible | |||
| avoid large bursts and their associated harm. A paced TCP implementation S | mechanism to | |||
| HOULD | avoid large bursts and their associated harm. A paced TCP implementation < | |||
| use L = infinity. Burst concerns are mitigated by pacing and this | bcp14>SHOULD</bcp14> | |||
| use L = infinity. Burst concerns are mitigated by pacing, and this | ||||
| setting allows for optimal cwnd growth on modern networks. | setting allows for optimal cwnd growth on modern networks. | |||
| </t> | </t> | |||
| <t> For TCP implementations that pace to mitigate burst concerns, L | <t> For TCP implementations that pace to mitigate burst concerns, L | |||
| values smaller than INFINITY may suffer performance problems due to slow | values smaller than infinity may suffer performance problems due to slow | |||
| cwnd growth in high speed networks. For non-paced TCP implementations, L v | cwnd growth in high-speed networks. For non-paced TCP implementations, L v | |||
| alues | alues | |||
| smaller than 8 may suffer performance problems due to slow cwnd growth in | smaller than 8 may suffer performance problems due to slow cwnd growth in | |||
| high | high-speed networks; L values larger than 8 may cause an increase in burstiness | |||
| speed networks; L values larger than 8 may cause an increase in burstiness | ||||
| and thereby loss rates, and result in poor performance. | and thereby loss rates, and result in poor performance. | |||
| </t> | </t> | |||
| <t> An implementation SHOULD use HyStart++ only for the | <t> An implementation <bcp14>SHOULD</bcp14> use HyStart++ only for the | |||
| initial slow start (when ssthresh is at its initial value | initial slow start (when the ssthresh is at its initial value | |||
| of arbitrarily high per <xref target="RFC5681"/>) and fall | of arbitrarily high per <xref target="RFC5681" format="default"/>) and fal | |||
| back to using traditional slow start for the remainder of | l | |||
| back to using standard slow start for the remainder of | ||||
| the connection lifetime. This is acceptable because subsequent | the connection lifetime. This is acceptable because subsequent | |||
| slow starts will use the discovered ssthresh value to exit slow | slow starts will use the discovered ssthresh value to exit slow | |||
| start and avoid the overshoot problem. An implementation MAY | start and avoid the overshoot problem. An implementation <bcp14>MAY</bcp14 > | |||
| use HyStart++ to grow the restart window | use HyStart++ to grow the restart window | |||
| (<xref target="RFC5681"/>) after a long idle period. | <xref target="RFC5681" format="default"/> after a long idle period. | |||
| </t> | </t> | |||
| <t> | <t> | |||
| In application limited scenarios, the amount of data in | In application-limited scenarios, the amount of data in | |||
| flight could fall below the bandwidth-delay product (BDP) and | flight could fall below the bandwidth-delay product (BDP) and | |||
| result in smaller RTT samples which can trigger an exit back to | result in smaller RTT samples, which can trigger an exit back to | |||
| slow start. It is expected that a connection might oscillate | slow start. It is expected that a connection might oscillate | |||
| between CSS and slow start in such scenarios. But this behavior | between CSS and slow start in such scenarios. But this behavior | |||
| will neither result in a connection prematurely entering | will neither result in a connection prematurely entering | |||
| congestion avoidance nor cause overshooting compared to | congestion avoidance nor cause overshooting compared to | |||
| slow start. | slow start. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='Deployments and Performance Evaluations'> | <name>Deployments and Performance Evaluations</name> | |||
| <t> At the time of this writing, HyStart++ as described | ||||
| <t> As of February 2023, HyStart++ as described | ||||
| in this document has been default enabled for all TCP | in this document has been default enabled for all TCP | |||
| connections in the Windows operating system for over | connections in the Windows operating system for over | |||
| two years with pacing disabled and an actual L = 8. | two years with pacing disabled and an actual L = 8. | |||
| </t> | </t> | |||
| <t> In lab measurements with Windows TCP, HyStart++ shows | <t> In lab measurements with Windows TCP, HyStart++ shows | |||
| both goodput improvements as well as reductions in packet | goodput improvements as well as reductions in packet | |||
| loss and retransmissions compared to traditional slow start. | loss and retransmissions compared to standard slow start. | |||
| For example, across a variety of tests on a 100 Mbps link | For example, across a variety of tests on a 100 Mbps link | |||
| with a bottleneck buffer size of bandwidth-delay product, | with a bottleneck buffer size of bandwidth-delay product, | |||
| HyStart++ reduces bytes retransmitted by 50% and | HyStart++ reduces bytes retransmitted by 50% and | |||
| retransmission timeouts (RTOs) by 36%. | retransmission timeouts (RTOs) by 36%. | |||
| </t> | </t> | |||
| <t> In an A/B test where we compare HyStart++ draft 01 to | <t> In an A/B test where we compared an implementation of HyStart++ | |||
| traditional slow start across a large Windows device | (based on an earlier draft version of this document) to | |||
| standard slow start across a large Windows device | ||||
| population, out of 52 billion TCP connections, 0.7% of | population, out of 52 billion TCP connections, 0.7% of | |||
| connections move from 1 RTO to 0 RTOs and another 0.7% | connections move from 1 RTO to 0 RTOs and another 0.7% of | |||
| connections move from 2 RTOs to 1 RTO with HyStart++. | connections move from 2 RTOs to 1 RTO with HyStart++. | |||
| This test did not focus on send-heavy connections and | This test did not focus on send-heavy connections, and | |||
| the impact on send-heavy connections is likely much | the impact on send-heavy connections is likely much | |||
| higher. We plan to conduct more such production | higher. We plan to conduct more such production | |||
| experiments to gather more data in the future. | experiments to gather more data in the future. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='Security Considerations'> | <name>Security Considerations</name> | |||
| <t> HyStart++ enhances slow start and inherits the general | <t> HyStart++ enhances slow start and inherits the general | |||
| security considerations discussed in <xref target="RFC5681"/>. | security considerations discussed in <xref target="RFC5681" format="defaul t"/>. | |||
| </t> | </t> | |||
| <t>An attacker can cause HyStart++ to exit slow start prematurely | ||||
| <t>An attacker can cause Hystart++ to exit slow start prematurely | ||||
| and impair the performance of a TCP connection by, for example, | and impair the performance of a TCP connection by, for example, | |||
| dropping data packets or their acknowledgements.</t> | dropping data packets or their acknowledgments.</t> | |||
| <t>The ACK division attack outlined in <xref target="SCWA99" format="defau | ||||
| <t>The ACK division attack outlined in <xref target="SCWA99"/> does not af | lt"/> does not affect | |||
| fect | HyStart++ because the congestion window increase in HyStart++ is based | |||
| Hystart++ because the congestion window increase in Hystart++ is based | ||||
| on the number of bytes newly acknowledged in each arriving ACK rather than by | on the number of bytes newly acknowledged in each arriving ACK rather than by | |||
| a particular constant on each arriving ACK. | a particular constant on each arriving ACK. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section numbered="true" toc="default"> | ||||
| <section title='IANA Considerations'> | <name>IANA Considerations</name> | |||
| <t> This document has no actions for IANA. | <t>This document has no IANA actions.</t> | |||
| </t> | ||||
| </section> | ||||
| <section title='Acknowledgements'> | ||||
| <t> During the discussions of this work on the TCPM mailing list, in worki | ||||
| ng group meetings, | ||||
| helpful comments, critiques, and reviews were received from (listed alph | ||||
| abetically by last name): | ||||
| Mark Allman, Bob Briscoe, Neal Cardwell, Yuchung Cheng, Junho Choi, Mart | ||||
| in Duke, Reese Enghardt, | ||||
| Christian Huitema, Ilpo Järvinen, Yoshifumi Nishida, Randall Stewart, an | ||||
| d Michael Tuexen. | ||||
| </t> | ||||
| </section> | </section> | |||
| </middle> | </middle> | |||
| <back> | <back> | |||
| <references title='Normative References'> | <references> | |||
| &rfc2119; | <name>References</name> | |||
| &rfc5681; | <references> | |||
| &rfc8174; | <name>Normative References</name> | |||
| </references> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2 | |||
| 119.xml"/> | ||||
| <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5 | ||||
| 681.xml"/> | ||||
| <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | ||||
| 174.xml"/> | ||||
| </references> | ||||
| <references> | ||||
| <name>Informative References</name> | ||||
| <references title='Informative References'> | <reference anchor="HyStart" target="https://doi.org/10.1016/j.comnet.201 | |||
| <reference anchor='HyStart' target='https://doi.org/10.1016/j.comnet.2011. | 1.01.014"> | |||
| 01.014'> | <front> | |||
| <front> | <title>Taming the elephants: New TCP slow start</title> | |||
| <title>Taming the elephants: New TCP slow start</title> | <author initials="S." surname="Ha"> | |||
| <author initials="S." surname="Ha"> | ||||
| </author> | </author> | |||
| <author initials="I." surname="Ree"> | <author initials="I." surname="Rhee"> | |||
| </author> | </author> | |||
| <date year="2011"/> | <date month="June" year="2011"/> | |||
| </front> | </front> | |||
| <seriesInfo name="" value="Computer Networks vol. 55, no. 9, pp. 2092-21 | <refcontent>Computer Networks vol. 55, no. 9, pp. 2092-2110</refconten | |||
| 10"/> | t> | |||
| <seriesInfo name="DOI" value="10.1016/j.comnet.2011.01.014"/> | <seriesInfo name="DOI" value="10.1016/j.comnet.2011.01.014"/> | |||
| </reference> | </reference> | |||
| &rfc9002; | ||||
| &rfc9260; | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
| &rfc1191; | 002.xml"/> | |||
| &rfc4821; | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
| &rfc1122; | 260.xml"/> | |||
| <reference anchor='SCWA99' target='https://doi.org/10.1145/505696.505704'> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.1 | |||
| <front> | 191.xml"/> | |||
| <title>TCP congestion control with a misbehaving receiver</title> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.4 | |||
| <author initials="S." surname="Savage"> | 821.xml"/> | |||
| <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.1 | ||||
| 122.xml"/> | ||||
| <reference anchor="SCWA99" target="https://doi.org/10.1145/505696.505704 | ||||
| "> | ||||
| <front> | ||||
| <title>TCP congestion control with a misbehaving receiver</title> | ||||
| <author initials="S." surname="Savage"> | ||||
| </author> | </author> | |||
| <author initials="N." surname="Cardwell"> | <author initials="N." surname="Cardwell"> | |||
| </author> | </author> | |||
| <author initials="D." surname="Wetherall"> | <author initials="D." surname="Wetherall"> | |||
| </author> | </author> | |||
| <author initials="T." surname="Anderson"> | <author initials="T." surname="Anderson"> | |||
| </author> | </author> | |||
| <date year="1999"/> | <date month="October" year="1999"/> | |||
| </front> | </front> | |||
| <seriesInfo name="" value="ACM Computer Communication Review, 29(5)"/> | <refcontent>ACM SIGCOMM Computer Communication Review, vol. 29, issue | |||
| <seriesInfo name="DOI" value="10.1145/505696.505704"/> | 5, pp. 71-78</refcontent> | |||
| </reference> | <seriesInfo name="DOI" value="10.1145/505696.505704"/> | |||
| <reference anchor='ASA00' target='https://doi.org/10.1109/INFCOM.2000.8324 | </reference> | |||
| 83'> | ||||
| <front> | <reference anchor="ASA00" target="https://doi.org/10.1109/INFCOM.2000.83 | |||
| <title>Understanding the Performance of TCP Pacing</title> | 2483"> | |||
| <author initials="A." surname="Aggarwal"> | <front> | |||
| <title>Understanding the performance of TCP pacing</title> | ||||
| <author initials="A." surname="Aggarwal"> | ||||
| </author> | </author> | |||
| <author initials="S." surname="Savage"> | <author initials="S." surname="Savage"> | |||
| </author> | </author> | |||
| <author initials="T." surname="Anderson"> | <author initials="T." surname="Anderson"> | |||
| </author> | </author> | |||
| <date year="2000"/> | <date month="March" year="2000"/> | |||
| </front> | </front> | |||
| <seriesInfo name="" value="Proceedings IEEE INFOCOM 2000"/> | <refcontent>Proceedings IEEE INFOCOM 2000</refcontent> | |||
| <seriesInfo name="DOI" value="10.1109/INFCOM.2000.832483"/> | <seriesInfo name="DOI" value="10.1109/INFCOM.2000.832483"/> | |||
| </reference> | </reference> | |||
| </references> | ||||
| </references> | </references> | |||
| <section numbered="false" toc="default"> | ||||
| <name>Acknowledgments</name> | ||||
| <t> During the discussions of this work on the TCPM mailing list and in wo | ||||
| rking group meetings, | ||||
| helpful comments, critiques, and reviews were received from (listed alph | ||||
| abetically by last name) | ||||
| <contact fullname="Mark Allman"/>, <contact fullname="Bob Briscoe"/>, <c | ||||
| ontact fullname="Neal Cardwell"/>, <contact fullname="Yuchung Cheng"/>, <contact | ||||
| fullname="Junho Choi"/>, <contact fullname="Martin Duke"/>, <contact fullname=" | ||||
| Reese Enghardt"/>, | ||||
| <contact fullname="Christian Huitema"/>, <contact fullname="Ilpo Järvine | ||||
| n"/>, <contact fullname="Yoshifumi Nishida"/>, <contact fullname="Randall Stewar | ||||
| t"/>, and <contact fullname="Michael Tüxen"/>. | ||||
| </t> | ||||
| </section> | ||||
| </back> | </back> | |||
| </rfc> | </rfc> | |||
| End of changes. 92 change blocks. | ||||
| 281 lines changed or deleted | 297 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. | ||||