rfc8684xml2.original.xml | rfc8684.xml | |||
---|---|---|---|---|
<?xml version="1.0" encoding="US-ASCII"?> | <?xml version='1.0' encoding='utf-8'?> | |||
<!-- Convert to HTML and Text with xml2rfc: http://xml2rfc.ietf.org. --> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" version="3" category="std" conse | |||
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ | nsus="true" docName="draft-ietf-mptcp-rfc6824bis-18" indexInclude="true" ipr="tr | |||
<!ENTITY RFC5533 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ust200902" number="8684" obsoletes="6824" prepTime="2020-03-30T17:51:35" scripts | |||
RFC.5533.xml"> | ="Common,Latin" sortRefs="true" submissionType="IETF" symRefs="true" tocDepth="3 | |||
<!ENTITY RFC5062 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | " tocInclude="true" xml:lang="en"> | |||
RFC.5062.xml"> | <link href="https://datatracker.ietf.org/doc/draft-ietf-mptcp-rfc6824bis-18" r | |||
<!ENTITY RFC5061 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | el="prev"/> | |||
RFC.5061.xml"> | <link href="https://dx.doi.org/10.17487/rfc8684" rel="alternate"/> | |||
<!ENTITY RFC4960 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | <link href="urn:issn:2070-1721" rel="alternate"/> | |||
RFC.4960.xml"> | ||||
<!ENTITY RFC4987 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.4987.xml"> | ||||
<!ENTITY RFC6234 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6234.xml"> | ||||
<!ENTITY RFC4086 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.4086.xml"> | ||||
<!ENTITY RFC5681 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.5681.xml"> | ||||
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.2119.xml"> | ||||
<!ENTITY RFC2992 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.2992.xml"> | ||||
<!ENTITY RFC2979 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.2979.xml"> | ||||
<!ENTITY RFC2104 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.2104.xml"> | ||||
<!ENTITY RFC2018 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.2018.xml"> | ||||
<!ENTITY RFC1918 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.1918.xml"> | ||||
<!ENTITY RFC0793 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.0793.xml"> | ||||
<!ENTITY RFC7323 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.7323.xml"> | ||||
<!ENTITY RFC1122 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.1122.xml"> | ||||
<!ENTITY RFC3135 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.3135.xml"> | ||||
<!ENTITY RFC3022 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.3022.xml"> | ||||
<!ENTITY RFC6181 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6181.xml"> | ||||
<!ENTITY RFC6182 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6182.xml"> | ||||
<!ENTITY RFC6356 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6356.xml"> | ||||
<!ENTITY RFC6555 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6555.xml"> | ||||
<!ENTITY RFC8126 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.8126.xml"> | ||||
<!ENTITY RFC6897 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6897.xml"> | ||||
<!ENTITY RFC6528 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.6528.xml"> | ||||
<!ENTITY RFC5961 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.5961.xml"> | ||||
<!ENTITY RFC7413 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.7413.xml"> | ||||
<!ENTITY RFC7430 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.7430.xml"> | ||||
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.8174.xml"> | ||||
<!ENTITY RFC8041 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. | ||||
RFC.8041.xml"> | ||||
]> | ||||
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> | ||||
<?rfc strict="no" ?> | ||||
<?rfc toc="yes"?> | ||||
<?rfc tocdepth="4"?> | ||||
<?rfc symrefs="yes"?> | ||||
<?rfc sortrefs="yes" ?> | ||||
<?rfc compact="yes" ?> | ||||
<?rfc subcompact="no" ?> | ||||
<?rfc rfcedstyle="yes"?> | ||||
<rfc category="std" docName="draft-ietf-mptcp-rfc6824bis-18" ipr="trust200902" o | ||||
bsoletes="6824"> | ||||
<front> | <front> | |||
<title abbrev="Multipath TCP">TCP Extensions for Multipath Operation with Mu ltiple Addresses</title> | <title abbrev="Multipath TCP">TCP Extensions for Multipath Operation with Mu ltiple Addresses</title> | |||
<seriesInfo name="RFC" value="8684" stream="IETF"/> | ||||
<author fullname="Alan Ford" initials="A." surname="Ford"> | <author fullname="Alan Ford" initials="A." surname="Ford"> | |||
<organization>Pexip</organization> | <organization showOnFrontPage="true">Pexip</organization> | |||
<address> | <address> | |||
<!-- <postal> | ||||
<street>Beech Court</street> | ||||
<city>Hurst</city> | ||||
<region>Berkshire</region> | ||||
<code>RG10 0RQ</code> | ||||
<country>UK</country> | ||||
</postal> --> | ||||
<email>alan.ford@gmail.com</email> | <email>alan.ford@gmail.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Costin Raiciu" initials="C." surname="Raiciu"> | <author fullname="Costin Raiciu" initials="C." surname="Raiciu"> | |||
<organization abbrev="U. Politechnica of Bucharest">University Politehnica of Bucharest</organization> | <organization abbrev="U. Politehnica of Bucharest" showOnFrontPage="true"> University Politehnica of Bucharest</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>Splaiul Independentei 313</street> | <street>Splaiul Independentei 313</street> | |||
<city>Bucharest</city> | <city>Bucharest</city> | |||
<country>Romania</country> | <country>Romania</country> | |||
</postal> | </postal> | |||
<email>costin.raiciu@cs.pub.ro</email> | <email>costin.raiciu@cs.pub.ro</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Mark Handley" initials="M." surname="Handley"> | <author fullname="Mark Handley" initials="M." surname="Handley"> | |||
<organization abbrev="U. College London">University College London</organi zation> | <organization abbrev="U. College London" showOnFrontPage="true">University College London</organization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>Gower Street</street> | <street>Gower Street</street> | |||
<city>London</city> | <city>London</city> | |||
<code>WC1E 6BT</code> | <code>WC1E 6BT</code> | |||
<country>UK</country> | <country>United Kingdom</country> | |||
</postal> | </postal> | |||
<email>m.handley@cs.ucl.ac.uk</email> | <email>m.handley@cs.ucl.ac.uk</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure"> | <author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure"> | |||
<organization abbrev="U. catholique de Louvain">Université catholiq ue de Louvain</organization> | <organization abbrev="U. catholique de Louvain" ascii="Universite catholiq ue de Louvain" showOnFrontPage="true">Université catholique de Louvain</organi zation> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>Pl. Ste Barbe, 2</street> | <street>Pl. Ste Barbe, 2</street> | |||
<code>1348</code> | <code>1348</code> | |||
<city>Louvain-la-Neuve</city> | <city>Louvain-la-Neuve</city> | |||
<country>Belgium</country> | <country>Belgium</country> | |||
</postal> | </postal> | |||
<email>olivier.bonaventure@uclouvain.be</email> | <email>olivier.bonaventure@uclouvain.be</email> | |||
</address> | </address> | |||
</author> | </author> | |||
skipping to change at line 101 ¶ | skipping to change at line 50 ¶ | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street>Pl. Ste Barbe, 2</street> | <street>Pl. Ste Barbe, 2</street> | |||
<code>1348</code> | <code>1348</code> | |||
<city>Louvain-la-Neuve</city> | <city>Louvain-la-Neuve</city> | |||
<country>Belgium</country> | <country>Belgium</country> | |||
</postal> | </postal> | |||
<email>olivier.bonaventure@uclouvain.be</email> | <email>olivier.bonaventure@uclouvain.be</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<author fullname="Christoph Paasch" initials="C." surname="Paasch"> | <author fullname="Christoph Paasch" initials="C." surname="Paasch"> | |||
<organization abbrev="Apple, Inc.">Apple, Inc.</organization> | <organization abbrev="Apple, Inc." showOnFrontPage="true">Apple, Inc.</org anization> | |||
<address> | <address> | |||
<postal> | <postal> | |||
<street></street> | <street/> | |||
<city>Cupertino</city> | <city>Cupertino</city> | |||
<country>US</country> | <region>CA</region> | |||
<country>United States of America</country> | ||||
</postal> | </postal> | |||
<email>cpaasch@apple.com</email> | <email>cpaasch@apple.com</email> | |||
</address> | </address> | |||
</author> | </author> | |||
<date month="03" year="2020"/> | ||||
<date year="2019" /> | <keyword>tcp</keyword> | |||
<keyword>extensions</keyword> | ||||
<area>General</area> | <keyword>multipath</keyword> | |||
<workgroup>Internet Engineering Task Force</workgroup> | <keyword>multihomed</keyword> | |||
<keyword>tcp extensions multipath multihomed subflow</keyword> | <keyword>subflow</keyword> | |||
<abstract pn="section-abstract"> | ||||
<abstract> | <t pn="section-abstract-1">TCP/IP communication is currently restricted to | |||
<t>TCP/IP communication is currently restricted to a single path per conne | a single path per connection, yet multiple paths often exist between peers. The | |||
ction, yet multiple paths often exist between peers. The simultaneous use of the | simultaneous use of these multiple paths for a TCP/IP session would improve res | |||
se multiple paths for a TCP/IP session would improve resource usage within the n | ource usage within the network and thus improve user experience through higher t | |||
etwork and, thus, improve user experience through higher throughput and improved | hroughput and improved resilience to network failure.</t> | |||
resilience to network failure.</t> | <t pn="section-abstract-2">Multipath TCP provides the ability to simultane | |||
ously use multiple | ||||
<t>Multipath TCP provides the ability to simultaneously use multiple paths | paths between peers. This document presents a set of extensions to | |||
between peers. This document presents a set of extensions to traditional TCP to | traditional TCP to support multipath operation. The protocol offers the | |||
support multipath operation. The protocol offers the same type of service to ap | same type of service to applications as TCP (i.e., a reliable bytestream), | |||
plications as TCP (i.e., reliable bytestream), and it provides the components ne | and it provides the components necessary to establish and use multiple TCP flow | |||
cessary to establish and use multiple TCP flows across potentially disjoint path | s across potentially disjoint paths.</t> | |||
s.</t> | <t pn="section-abstract-3">This document specifies v1 of Multipath TCP, ob | |||
soleting v0 as | ||||
<t>This document specifies v1 of Multipath TCP, obsoleting v0 as specified | specified in RFC 6824, through clarifications and modifications primarily | |||
in RFC6824, through clarifications and modifications primarily driven by deploy | driven by deployment experience.</t> | |||
ment experience.</t> | ||||
</abstract> | </abstract> | |||
<boilerplate> | ||||
<section anchor="status-of-memo" numbered="false" removeInRFC="false" toc= | ||||
"exclude" pn="section-boilerplate.1"> | ||||
<name slugifiedName="name-status-of-this-memo">Status of This Memo</name | ||||
> | ||||
<t pn="section-boilerplate.1-1"> | ||||
This is an Internet Standards Track document. | ||||
</t> | ||||
<t pn="section-boilerplate.1-2"> | ||||
This document is a product of the Internet Engineering Task Force | ||||
(IETF). It represents the consensus of the IETF community. It has | ||||
received public review and has been approved for publication by | ||||
the Internet Engineering Steering Group (IESG). Further | ||||
information on Internet Standards is available in Section 2 of | ||||
RFC 7841. | ||||
</t> | ||||
<t pn="section-boilerplate.1-3"> | ||||
Information about the current status of this document, any | ||||
errata, and how to provide feedback on it may be obtained at | ||||
<eref target="https://www.rfc-editor.org/info/rfc8684" brackets="non | ||||
e"/>. | ||||
</t> | ||||
</section> | ||||
<section anchor="copyright" numbered="false" removeInRFC="false" toc="excl | ||||
ude" pn="section-boilerplate.2"> | ||||
<name slugifiedName="name-copyright-notice">Copyright Notice</name> | ||||
<t pn="section-boilerplate.2-1"> | ||||
Copyright (c) 2020 IETF Trust and the persons identified as the | ||||
document authors. All rights reserved. | ||||
</t> | ||||
<t pn="section-boilerplate.2-2"> | ||||
This document is subject to BCP 78 and the IETF Trust's Legal | ||||
Provisions Relating to IETF Documents | ||||
(<eref target="https://trustee.ietf.org/license-info" brackets="none | ||||
"/>) in effect on the date of | ||||
publication of this document. Please review these documents | ||||
carefully, as they describe your rights and restrictions with | ||||
respect to this document. Code Components extracted from this | ||||
document must include Simplified BSD License text as described in | ||||
Section 4.e of the Trust Legal Provisions and are provided without | ||||
warranty as described in the Simplified BSD License. | ||||
</t> | ||||
</section> | ||||
</boilerplate> | ||||
<toc> | ||||
<section anchor="toc" numbered="false" removeInRFC="false" toc="exclude" p | ||||
n="section-toc.1"> | ||||
<name slugifiedName="name-table-of-contents">Table of Contents</name> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="section-to | ||||
c.1-1"> | ||||
<li pn="section-toc.1-1.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.1.1"><xref derivedContent | ||||
="1" format="counter" sectionFormat="of" target="section-1"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-introduction">Introductio | ||||
n</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.1.2"> | ||||
<li pn="section-toc.1-1.1.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.1.2.1.1"><xref derive | ||||
dContent="1.1" format="counter" sectionFormat="of" target="section-1.1"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-design-assump | ||||
tions">Design Assumptions</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.1.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.1.2.2.1"><xref derive | ||||
dContent="1.2" format="counter" sectionFormat="of" target="section-1.2"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-multipath-tcp | ||||
-in-the-networ">Multipath TCP in the Networking Stack</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.1.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.1.2.3.1"><xref derive | ||||
dContent="1.3" format="counter" sectionFormat="of" target="section-1.3"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-terminology"> | ||||
Terminology</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.1.2.4"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.1.2.4.1"><xref derive | ||||
dContent="1.4" format="counter" sectionFormat="of" target="section-1.4"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-concept | ||||
">MPTCP Concept</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.1.2.5"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.1.2.5.1"><xref derive | ||||
dContent="1.5" format="counter" sectionFormat="of" target="section-1.5"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-requirements- | ||||
language">Requirements Language</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.1"><xref derivedContent | ||||
="2" format="counter" sectionFormat="of" target="section-2"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-operation-overview">Opera | ||||
tion Overview</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.2.2"> | ||||
<li pn="section-toc.1-1.2.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.1.1"><xref derive | ||||
dContent="2.1" format="counter" sectionFormat="of" target="section-2.1"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-initiating-an | ||||
-mptcp-connect">Initiating an MPTCP Connection</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.2.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.2.1"><xref derive | ||||
dContent="2.2" format="counter" sectionFormat="of" target="section-2.2"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-associating-a | ||||
-new-subflow-w">Associating a New Subflow with an Existing MPTCP Connection</xre | ||||
f></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.2.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.3.1"><xref derive | ||||
dContent="2.3" format="counter" sectionFormat="of" target="section-2.3"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-informing-the | ||||
-other-host-ab">Informing the Other Host about Another Potential Address</xref>< | ||||
/t> | ||||
</li> | ||||
<li pn="section-toc.1-1.2.2.4"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.4.1"><xref derive | ||||
dContent="2.4" format="counter" sectionFormat="of" target="section-2.4"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-data-transfer | ||||
-using-mptcp">Data Transfer Using MPTCP</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.2.2.5"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.5.1"><xref derive | ||||
dContent="2.5" format="counter" sectionFormat="of" target="section-2.5"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-requesting-a- | ||||
change-in-a-pa">Requesting a Change in a Path's Priority</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.2.2.6"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.6.1"><xref derive | ||||
dContent="2.6" format="counter" sectionFormat="of" target="section-2.6"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-closing-an-mp | ||||
tcp-connection">Closing an MPTCP Connection</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.2.2.7"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.2.2.7.1"><xref derive | ||||
dContent="2.7" format="counter" sectionFormat="of" target="section-2.7"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-notable-featu | ||||
res">Notable Features</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.1"><xref derivedContent | ||||
="3" format="counter" sectionFormat="of" target="section-3"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-mptcp-operations-an-overv | ||||
ie">MPTCP Operations: An Overview</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.3.2"> | ||||
<li pn="section-toc.1-1.3.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.1.1"><xref derive | ||||
dContent="3.1" format="counter" sectionFormat="of" target="section-3.1"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-connection-in | ||||
itiation">Connection Initiation</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.2.1"><xref derive | ||||
dContent="3.2" format="counter" sectionFormat="of" target="section-3.2"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-starting-a-ne | ||||
w-subflow">Starting a New Subflow</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.1"><xref derive | ||||
dContent="3.3" format="counter" sectionFormat="of" target="section-3.3"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-operati | ||||
on-and-data-tr">MPTCP Operation and Data Transfer</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
ction-toc.1-1.3.2.3.2"> | ||||
<li pn="section-toc.1-1.3.2.3.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.1.1"><xre | ||||
f derivedContent="3.3.1" format="counter" sectionFormat="of" target="section-3.3 | ||||
.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-d | ||||
ata-sequence-mapping">Data Sequence Mapping</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.2.1"><xre | ||||
f derivedContent="3.3.2" format="counter" sectionFormat="of" target="section-3.3 | ||||
.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-d | ||||
ata-acknowledgments">Data Acknowledgments</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.3.1"><xre | ||||
f derivedContent="3.3.3" format="counter" sectionFormat="of" target="section-3.3 | ||||
.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-c | ||||
losing-a-connection">Closing a Connection</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.4"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.4.1"><xre | ||||
f derivedContent="3.3.4" format="counter" sectionFormat="of" target="section-3.3 | ||||
.4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-r | ||||
eceiver-considerations">Receiver Considerations</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.5"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.5.1"><xre | ||||
f derivedContent="3.3.5" format="counter" sectionFormat="of" target="section-3.3 | ||||
.5"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-s | ||||
ender-considerations">Sender Considerations</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.6"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.6.1"><xre | ||||
f derivedContent="3.3.6" format="counter" sectionFormat="of" target="section-3.3 | ||||
.6"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-r | ||||
eliability-and-retransmiss">Reliability and Retransmissions</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.7"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.7.1"><xre | ||||
f derivedContent="3.3.7" format="counter" sectionFormat="of" target="section-3.3 | ||||
.7"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-c | ||||
ongestion-control-consider">Congestion Control Considerations</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.3.2.8"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.8.1"><xre | ||||
f derivedContent="3.3.8" format="counter" sectionFormat="of" target="section-3.3 | ||||
.8"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-s | ||||
ubflow-policy">Subflow Policy</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.4"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.4.1"><xref derive | ||||
dContent="3.4" format="counter" sectionFormat="of" target="section-3.4"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-address-knowl | ||||
edge-exchange-">Address Knowledge Exchange (Path Management)</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
ction-toc.1-1.3.2.4.2"> | ||||
<li pn="section-toc.1-1.3.2.4.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.4.2.1.1"><xre | ||||
f derivedContent="3.4.1" format="counter" sectionFormat="of" target="section-3.4 | ||||
.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-a | ||||
ddress-advertisement">Address Advertisement</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.4.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.4.2.2.1"><xre | ||||
f derivedContent="3.4.2" format="counter" sectionFormat="of" target="section-3.4 | ||||
.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-r | ||||
emove-address">Remove Address</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.5"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.5.1"><xref derive | ||||
dContent="3.5" format="counter" sectionFormat="of" target="section-3.5"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-fast-close">F | ||||
ast Close</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.6"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.6.1"><xref derive | ||||
dContent="3.6" format="counter" sectionFormat="of" target="section-3.6"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-subflow-reset | ||||
">Subflow Reset</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.7"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.7.1"><xref derive | ||||
dContent="3.7" format="counter" sectionFormat="of" target="section-3.7"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-fallback">Fal | ||||
lback</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.8"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.8.1"><xref derive | ||||
dContent="3.8" format="counter" sectionFormat="of" target="section-3.8"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-error-handlin | ||||
g">Error Handling</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.9"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.1"><xref derive | ||||
dContent="3.9" format="counter" sectionFormat="of" target="section-3.9"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-heuristics">H | ||||
euristics</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
ction-toc.1-1.3.2.9.2"> | ||||
<li pn="section-toc.1-1.3.2.9.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.1.1"><xre | ||||
f derivedContent="3.9.1" format="counter" sectionFormat="of" target="section-3.9 | ||||
.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-p | ||||
ort-usage">Port Usage</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.9.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.2.1"><xre | ||||
f derivedContent="3.9.2" format="counter" sectionFormat="of" target="section-3.9 | ||||
.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-d | ||||
elayed-subflow-start-and-s">Delayed Subflow Start and Subflow Symmetry</xref></t | ||||
> | ||||
</li> | ||||
<li pn="section-toc.1-1.3.2.9.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.3.1"><xre | ||||
f derivedContent="3.9.3" format="counter" sectionFormat="of" target="section-3.9 | ||||
.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-f | ||||
ailure-handling">Failure Handling</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.4"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.4.1"><xref derivedContent | ||||
="4" format="counter" sectionFormat="of" target="section-4"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-semantic-issues">Semantic | ||||
Issues</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.5"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.5.1"><xref derivedContent | ||||
="5" format="counter" sectionFormat="of" target="section-5"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-security-considerations"> | ||||
Security Considerations</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.6"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.6.1"><xref derivedContent | ||||
="6" format="counter" sectionFormat="of" target="section-6"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-interactions-with-middleb | ||||
ox">Interactions with Middleboxes</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.7"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.7.1"><xref derivedContent | ||||
="7" format="counter" sectionFormat="of" target="section-7"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-iana-considerations">IANA | ||||
Considerations</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.7.2"> | ||||
<li pn="section-toc.1-1.7.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.7.2.1.1"><xref derive | ||||
dContent="7.1" format="counter" sectionFormat="of" target="section-7.1"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-tcp-option-ki | ||||
nd-numbers">TCP Option Kind Numbers</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.7.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.7.2.2.1"><xref derive | ||||
dContent="7.2" format="counter" sectionFormat="of" target="section-7.2"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-option- | ||||
subtypes">MPTCP Option Subtypes</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.7.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.7.2.3.1"><xref derive | ||||
dContent="7.3" format="counter" sectionFormat="of" target="section-7.3"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-handsha | ||||
ke-algorithms">MPTCP Handshake Algorithms</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.7.2.4"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.7.2.4.1"><xref derive | ||||
dContent="7.4" format="counter" sectionFormat="of" target="section-7.4"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-mp_tcprst-rea | ||||
son-codes">MP_TCPRST Reason Codes</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.8"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.8.1"><xref derivedContent | ||||
="8" format="counter" sectionFormat="of" target="section-8"/>. <xref derivedCon | ||||
tent="" format="title" sectionFormat="of" target="name-references">References</x | ||||
ref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.8.2"> | ||||
<li pn="section-toc.1-1.8.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.8.2.1.1"><xref derive | ||||
dContent="8.1" format="counter" sectionFormat="of" target="section-8.1"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-normative-ref | ||||
erences">Normative References</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.8.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.8.2.2.1"><xref derive | ||||
dContent="8.2" format="counter" sectionFormat="of" target="section-8.2"/>. <xre | ||||
f derivedContent="" format="title" sectionFormat="of" target="name-informative-r | ||||
eferences">Informative References</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.9"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.9.1"><xref derivedContent | ||||
="Appendix A" format="default" sectionFormat="of" target="section-appendix.a"/>. | ||||
<xref derivedContent="" format="title" sectionFormat="of" target="name-notes-o | ||||
n-use-of-tcp-options">Notes on Use of TCP Options</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.10"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.10.1"><xref derivedConten | ||||
t="Appendix B" format="default" sectionFormat="of" target="section-appendix.b"/> | ||||
. <xref derivedContent="" format="title" sectionFormat="of" target="name-tcp-fa | ||||
st-open-and-mptcp">TCP Fast Open and MPTCP</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.10.2"> | ||||
<li pn="section-toc.1-1.10.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.10.2.1.1"><xref deriv | ||||
edContent="B.1" format="counter" sectionFormat="of" target="section-b.1"/>. <xr | ||||
ef derivedContent="" format="title" sectionFormat="of" target="name-tfo-cookie-r | ||||
equest-with-mpt">TFO Cookie Request with MPTCP</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.10.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.10.2.2.1"><xref deriv | ||||
edContent="B.2" format="counter" sectionFormat="of" target="section-b.2"/>. <xr | ||||
ef derivedContent="" format="title" sectionFormat="of" target="name-data-sequenc | ||||
e-mapping-under">Data Sequence Mapping under TFO</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.10.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.10.2.3.1"><xref deriv | ||||
edContent="B.3" format="counter" sectionFormat="of" target="section-b.3"/>. <xr | ||||
ef derivedContent="" format="title" sectionFormat="of" target="name-connection-e | ||||
stablishment-ex">Connection Establishment Examples</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.11"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.1"><xref derivedConten | ||||
t="Appendix C" format="default" sectionFormat="of" target="section-appendix.c"/> | ||||
. <xref derivedContent="" format="title" sectionFormat="of" target="name-contro | ||||
l-blocks">Control Blocks</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
n-toc.1-1.11.2"> | ||||
<li pn="section-toc.1-1.11.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.1"><xref deriv | ||||
edContent="C.1" format="counter" sectionFormat="of" target="section-c.1"/>. <xr | ||||
ef derivedContent="" format="title" sectionFormat="of" target="name-mptcp-contro | ||||
l-block">MPTCP Control Block</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
ction-toc.1-1.11.2.1.2"> | ||||
<li pn="section-toc.1-1.11.2.1.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.1.1"><xr | ||||
ef derivedContent="C.1.1" format="counter" sectionFormat="of" target="section-c. | ||||
1.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name- | ||||
authentication-and-metadata">Authentication and Metadata</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.11.2.1.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.2.1"><xr | ||||
ef derivedContent="C.1.2" format="counter" sectionFormat="of" target="section-c. | ||||
1.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name- | ||||
sending-side">Sending Side</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.11.2.1.2.3"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.3.1"><xr | ||||
ef derivedContent="C.1.3" format="counter" sectionFormat="of" target="section-c. | ||||
1.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name- | ||||
receiving-side">Receiving Side</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.11.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.2.1"><xref deriv | ||||
edContent="C.2" format="counter" sectionFormat="of" target="section-c.2"/>. <xr | ||||
ef derivedContent="" format="title" sectionFormat="of" target="name-tcp-control- | ||||
blocks">TCP Control Blocks</xref></t> | ||||
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
ction-toc.1-1.11.2.2.2"> | ||||
<li pn="section-toc.1-1.11.2.2.2.1"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.2.2.1.1"><xr | ||||
ef derivedContent="C.2.1" format="counter" sectionFormat="of" target="section-c. | ||||
2.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name- | ||||
sending-side-2">Sending Side</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.11.2.2.2.2"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.11.2.2.2.2.1"><xr | ||||
ef derivedContent="C.2.2" format="counter" sectionFormat="of" target="section-c. | ||||
2.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name- | ||||
receiving-side-2">Receiving Side</xref></t> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
</ul> | ||||
</li> | ||||
<li pn="section-toc.1-1.12"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.12.1"><xref derivedConten | ||||
t="Appendix D" format="default" sectionFormat="of" target="section-appendix.d"/> | ||||
. <xref derivedContent="" format="title" sectionFormat="of" target="name-finite | ||||
-state-machine">Finite State Machine</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.13"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.13.1"><xref derivedConten | ||||
t="Appendix E" format="default" sectionFormat="of" target="section-appendix.e"/> | ||||
. <xref derivedContent="" format="title" sectionFormat="of" target="name-change | ||||
s-from-rfc-6824">Changes from RFC 6824</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.14"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.14.1"><xref derivedConten | ||||
t="" format="none" sectionFormat="of" target="section-appendix.f"/><xref derived | ||||
Content="" format="title" sectionFormat="of" target="name-acknowledgments">Ackno | ||||
wledgments</xref></t> | ||||
</li> | ||||
<li pn="section-toc.1-1.15"> | ||||
<t keepWithNext="true" pn="section-toc.1-1.15.1"><xref derivedConten | ||||
t="" format="none" sectionFormat="of" target="section-appendix.g"/><xref derived | ||||
Content="" format="title" sectionFormat="of" target="name-authors-addresses">Aut | ||||
hors' Addresses</xref></t> | ||||
</li> | ||||
</ul> | ||||
</section> | ||||
</toc> | ||||
</front> | </front> | |||
<middle> | <middle> | |||
<section title="Introduction" anchor="sec_intro"> | <section anchor="sec_intro" numbered="true" toc="include" removeInRFC="false | |||
<t>Multipath TCP (MPTCP) is a set of extensions to regular TCP <xref targe | " pn="section-1"> | |||
t="RFC0793"/> to provide a Multipath TCP <xref target="RFC6182"/> service, which | <name slugifiedName="name-introduction">Introduction</name> | |||
enables a transport connection to operate across multiple paths | <t pn="section-1-1">Multipath TCP (MPTCP) is a set of extensions to regula | |||
simultaneously. This document presents the protocol changes required to add mult | r TCP <xref target="RFC0793" format="default" sectionFormat="of" derivedContent= | |||
ipath capability to TCP; specifically, those for signaling and setting up multip | "RFC0793"/> to provide a Multipath TCP service <xref target="RFC6182" format="de | |||
le paths ("subflows"), managing these subflows, reassembly of data, and terminat | fault" sectionFormat="of" derivedContent="RFC6182"/>, which enables a transport | |||
ion of sessions. | connection to operate across multiple paths | |||
This is not the only information required to create a Multipath TCP implem | simultaneously. This document presents the protocol changes required to add | |||
entation, however. This document is complemented by three others: | multipath capability to TCP -- specifically, those for signaling and setting | |||
<list style="symbols"> | up multiple paths ("subflows"), managing these subflows, reassembly of data, | |||
<t>Architecture <xref target="RFC6182"/>, which explains the motivatio | and termination of sessions. This is not the only information required to create | |||
ns behind Multipath TCP, contains a discussion of high-level design decisions on | a Multipath TCP implementation, however. This document is complemented by three | |||
which this design is based, and an explanation of a functional separation throu | others: | |||
gh which an extensible MPTCP implementation can be developed.</t> | ||||
<t>Congestion control <xref target="RFC6356"/> presents a safe congest | ||||
ion control algorithm for coupling the behavior of the multiple paths in order t | ||||
o "do no harm" to other network users.</t> | ||||
<t>Application considerations <xref target="RFC6897"/> discusses what | ||||
impact MPTCP will have on applications, what applications will want to do with M | ||||
PTCP, and as a consequence of these factors, what API extensions an MPTCP implem | ||||
entation should present.</t> | ||||
</list> | ||||
This document is an update to, and obsoletes, the v0 specification of Mult | ||||
ipath TCP (RFC6824). This document specifies MPTCP v1, which is not backward com | ||||
patible with MPTCP v0. This document additionally defines version negotiation pr | ||||
ocedures for implementations that support both versions. | ||||
</t> | </t> | |||
<ul spacing="normal" bare="false" empty="false" pn="section-1-2"> | ||||
<section title="Design Assumptions" anchor="sec_assum"> | <li pn="section-1-2.1"> | |||
<t>In order to limit the potentially huge design space, the mptcp workin | <xref target="RFC6182" format="default" sectionFormat="of" derivedCont | |||
g group imposed two key constraints on the Multipath TCP design presented in thi | ent="RFC6182"/> (MPTCP architecture), which | |||
s document: | explains the motivations behind Multipath TCP, contains a discussion | |||
<list style="symbols"> | of high-level design decisions on which this design is based, and provid | |||
<t>It must be backwards-compatible with current, regular TCP, to inc | es an explanation of a functional separation through which an extensible MPTCP i | |||
rease its chances of deployment.</t> | mplementation can be developed.</li> | |||
<t>It can be assumed that one or both hosts are multihomed and multi | <li pn="section-1-2.2"> | |||
addressed.</t> | <xref target="RFC6356" format="default" sectionFormat="of" derivedCont | |||
</list> | ent="RFC6356"/> (congestion control), which presents a safe congestion control a | |||
lgorithm for coupling the behavior of the multiple paths in order to "do no harm | ||||
" to other network users.</li> | ||||
<li pn="section-1-2.3"> | ||||
<xref target="RFC6897" format="default" sectionFormat="of" derivedCont | ||||
ent="RFC6897"/> (application considerations), which discusses what impact MPTCP | ||||
will have on applications, what applications will want to do with MPTCP, and as | ||||
a consequence of these factors, what API extensions an MPTCP implementation shou | ||||
ld present.</li> | ||||
</ul> | ||||
<t pn="section-1-3"> | ||||
This document obsoletes the v0 specification of | ||||
Multipath TCP <xref target="RFC6824" format="default" sectionFormat="of" d | ||||
erivedContent="RFC6824"/>. This document specifies MPTCP v1, which is not backwa | ||||
rd compatible with MPTCP v0. This document additionally defines version negotiat | ||||
ion procedures for implementations that support both versions. | ||||
</t> | ||||
<section anchor="sec_assum" numbered="true" toc="include" removeInRFC="fal | ||||
se" pn="section-1.1"> | ||||
<name slugifiedName="name-design-assumptions">Design Assumptions</name> | ||||
<t pn="section-1.1-1">In order to limit the potentially huge design spac | ||||
e, the | ||||
MPTCP Working Group imposed two key constraints on the Multipath TCP des | ||||
ign presented in this document: | ||||
</t> | </t> | |||
<t>To simplify the design, we assume that the presence of multiple addre | <ul spacing="normal" bare="false" empty="false" pn="section-1.1-2"> | |||
sses at a host is sufficient to indicate the existence of multiple paths. These | <li pn="section-1.1-2.1">It must be backward compatible with current, | |||
paths need not be entirely disjoint: they may share one or many routers between | regular TCP, to increase its chances of deployment.</li> | |||
them. Even in such a situation, making use of multiple paths is beneficial, impr | <li pn="section-1.1-2.2">It can be assumed that one or both hosts are | |||
oving resource utilization and resilience to a subset of node failures. The cong | multihomed and multiaddressed.</li> | |||
estion control algorithms defined in <xref target="RFC6356"/> ensure this does n | </ul> | |||
ot act detrimentally. Furthermore, there may be some scenarios where different T | <t pn="section-1.1-3">To simplify the design, we assume that the presenc | |||
CP ports on a single host can provide disjoint paths (such as through certain Eq | e of multiple | |||
ual-Cost Multipath (ECMP) implementations <xref target="RFC2992"/>), and so the | addresses at a host is sufficient to indicate the existence of | |||
MPTCP design also supports the use of ports in path identifiers.</t> | multiple paths. These paths need not be entirely disjoint: they may | |||
<t>There are three aspects to the backwards-compatibility listed above ( | share one or many routers between them. Even in such a situation, | |||
discussed in more detail in <xref target="RFC6182"/>): | making use of multiple paths is beneficial, improving resource | |||
<list style="hanging"> | utilization and resilience to a subset of node failures. The | |||
<t hangText="External Constraints:"> The protocol must function thro | congestion control algorithm defined in <xref target="RFC6356" format="d | |||
ugh the vast majority of existing | efault" sectionFormat="of" derivedContent="RFC6356"/> ensures that the use of mu | |||
middleboxes such as NATs, firewalls, and proxies, and as such must resemble exis | ltiple paths does not act detrimentally. | |||
ting TCP as far as possible on the | Furthermore, there may be some scenarios where different TCP ports on a | |||
wire. Furthermore, the protocol must not assume the segments it sends on the wir | single host can provide disjoint paths (such as through certain | |||
e arrive unmodified at the destination: | Equal-Cost Multipath (ECMP) implementations <xref target="RFC2992" format="defau | |||
they may be split or coalesced; TCP options may be removed or duplicated. </t> | lt" sectionFormat="of" derivedContent="RFC2992"/>), and so the MPTCP design also | |||
<t hangText="Application Constraints:"> The protocol must be usable | supports the use of | |||
with no change to existing applications that use the common TCP API (although it | ports in path identifiers.</t> | |||
is reasonable that not all features would be available to such legacy applicati | <t pn="section-1.1-4">There are three aspects to the backward compatibil | |||
ons). Furthermore, the protocol must provide the same service model as regular T | ity listed above (discussed in more detail in <xref target="RFC6182" format="def | |||
CP to the application.</t> | ault" sectionFormat="of" derivedContent="RFC6182"/>): | |||
<t hangText="Fallback:"> The protocol should be able to fall back to | ||||
standard TCP with no interference from the user, to be able to communicate with | ||||
legacy hosts.</t> | ||||
</list> | ||||
</t> | </t> | |||
<t>The complementary application considerations document <xref target="R | <dl newline="false" spacing="normal" indent="3" pn="section-1.1-5"> | |||
FC6897"/> discusses the necessary features of an API to provide backwards-compat | <dt pn="section-1.1-5.1">External Constraints:</dt> | |||
ibility, as well as API extensions to convey the behavior of MPTCP at a level of | <dd pn="section-1.1-5.2"> The protocol must function through the vast | |||
control and information equivalent to that available with regular, single-path | majority of existing | |||
TCP.</t> | middleboxes such as NATs, firewalls, and proxies, and as such must resemble exis | |||
<t>Further discussion of the design constraints and associated design de | ting TCP as far as possible on the | |||
cisions are given in the MPTCP Architecture document <xref target="RFC6182"/> an | wire. Furthermore, the protocol must not assume that the segments it sends on th | |||
d in <xref target="howhard"/>.</t> | e wire arrive unmodified at the destination: | |||
they may be split or coalesced; TCP options may be removed or duplicated. </dd> | ||||
<dt pn="section-1.1-5.3">Application Constraints:</dt> | ||||
<dd pn="section-1.1-5.4"> The protocol must be usable with no change t | ||||
o existing applications that use the common TCP API (although it is reasonable t | ||||
hat not all features would be available to such legacy applications). Furthermor | ||||
e, the protocol must provide the same service model as regular TCP to the applic | ||||
ation.</dd> | ||||
<dt pn="section-1.1-5.5">Fallback:</dt> | ||||
<dd pn="section-1.1-5.6"> The protocol should be able to fall back to | ||||
standard TCP with no interference from the user, to be able to communicate with | ||||
legacy hosts.</dd> | ||||
</dl> | ||||
<t pn="section-1.1-6">The complementary application considerations docum | ||||
ent <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="R | ||||
FC6897"/> discusses the necessary features | ||||
of an API to provide backward compatibility, as well as API extensions t | ||||
o convey the behavior of MPTCP at a level of control and information equivalent | ||||
to that available with regular, single-path TCP.</t> | ||||
<t pn="section-1.1-7">Further discussion of the design constraints and a | ||||
ssociated design decisions is given in the MPTCP architecture document <xref tar | ||||
get="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> and | ||||
in <xref target="howhard" format="default" sectionFormat="of" derivedContent="h | ||||
owhard"/>.</t> | ||||
</section> | </section> | |||
<section anchor="sec_layers" numbered="true" toc="include" removeInRFC="fa | ||||
<section title="Multipath TCP in the Networking Stack" anchor="sec_layers" | lse" pn="section-1.2"> | |||
> | <name slugifiedName="name-multipath-tcp-in-the-networ">Multipath TCP in | |||
<t>MPTCP operates at the transport layer and aims to be transparent to b | the Networking Stack</name> | |||
oth higher and lower | <t pn="section-1.2-1">MPTCP operates at the transport layer and aims to | |||
layers. It is a set of additional features on top of standard TCP; <xref target= | be transparent to both higher and lower | |||
"fig_arch" /> illustrates | layers. It is a set of additional features on top of standard TCP; <xref target= | |||
"fig_arch" format="default" sectionFormat="of" derivedContent="Figure 1"/> illus | ||||
trates | ||||
this layering. MPTCP is designed to be usable by legacy applications with no cha nges; detailed discussion | this layering. MPTCP is designed to be usable by legacy applications with no cha nges; detailed discussion | |||
of its interactions with applications is given in <xref target="RFC6897"/>.</t> | of its interactions with applications is given in <xref target="RFC6897" format= | |||
"default" sectionFormat="of" derivedContent="RFC6897"/>.</t> | ||||
<figure align="center" anchor="fig_arch" title="Comparison of Standard T | <figure anchor="fig_arch" align="left" suppress-title="false" pn="figure | |||
CP and MPTCP Protocol Stacks"> | -1"> | |||
<artwork align="left"><![CDATA[ | <name slugifiedName="name-comparison-of-standard-tcp-">Comparison of S | |||
tandard TCP and MPTCP Protocol Stacks</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-1.2-2.1"> | ||||
+-------------------------------+ | +-------------------------------+ | |||
| Application | | | Application | | |||
+---------------+ +-------------------------------+ | +---------------+ +-------------------------------+ | |||
| Application | | MPTCP | | | Application | | MPTCP | | |||
+---------------+ + - - - - - - - + - - - - - - - + | +---------------+ + - - - - - - - + - - - - - - - + | |||
| TCP | | Subflow (TCP) | Subflow (TCP) | | | TCP | | Subflow (TCP) | Subflow (TCP) | | |||
+---------------+ +-------------------------------+ | +---------------+ +-------------------------------+ | |||
| IP | | IP | IP | | | IP | | IP | IP | | |||
+---------------+ +-------------------------------+ | +---------------+ +-------------------------------+ </artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
</section> | </section> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-1.3 | ||||
<section title="Terminology"> | "> | |||
<t>This document makes use of a number of terms that are either MPTCP-sp | <name slugifiedName="name-terminology">Terminology</name> | |||
ecific or have defined meaning in the context of MPTCP, as follows: | <t pn="section-1.3-1">This document makes use of a number of terms that | |||
<list style="hanging"> | are either MPTCP specific or have defined meaning in the context of MPTCP, as fo | |||
<t hangText="Path:"> A sequence of links between a sender and a receiv | llows: | |||
er, defined in this context by a 4-tuple of source and destination address/port | </t> | |||
pairs.</t> | <dl newline="false" spacing="normal" indent="3" pn="section-1.3-2"> | |||
<t hangText="Subflow:"> A flow of TCP segments operating over an indiv | <dt pn="section-1.3-2.1">Path:</dt> | |||
idual path, which forms part of a larger MPTCP connection. A subflow is started | <dd pn="section-1.3-2.2"> A sequence of links between a sender and a r | |||
and terminated similar to a regular TCP connection.</t> | eceiver, defined in this context by a 4-tuple of source and destination address | |||
<t hangText="(MPTCP) Connection:"> A set of one or more subflows, over | /port pairs.</dd> | |||
which an application can communicate between two hosts. There is a one-to-one m | <dt pn="section-1.3-2.3">Subflow:</dt> | |||
apping between a connection and an application socket.</t> | <dd pn="section-1.3-2.4"> A flow of TCP segments operating over an ind | |||
<t hangText="Data-level:"> The payload data is nominally transferred o | ividual path, which forms part of a larger MPTCP connection. A subflow is starte | |||
ver a connection, which in turn is transported over subflows. Thus, the term "d | d and terminated similarly to a regular TCP connection.</dd> | |||
ata-level" is synonymous with "connection level", in contrast to "subflow-level" | <dt pn="section-1.3-2.5">(MPTCP) Connection:</dt> | |||
, which refers to properties of an individual subflow.</t> | <dd pn="section-1.3-2.6"> A set of one or more subflows, over which an | |||
<t hangText="Token:"> A locally unique identifier given to a multipath | application can communicate between two hosts. There is a one‑to‑one mapping be | |||
connection by a host. May also be referred to as a "Connection ID".</t> | tween a connection and an application socket.</dd> | |||
<t hangText="Host:"> An end host operating an MPTCP implementation, an | <dt pn="section-1.3-2.7">Data-level:</dt> | |||
d either initiating or accepting an MPTCP connection.</t> | <dd pn="section-1.3-2.8"> The payload data is nominally transferred ov | |||
</list> | er a connection, which in turn is transported over subflows. Thus, the term "da | |||
In addition to these terms, note that MPTCP's interpretation of, and eff | ta-level" is synonymous with "connection-level", in contrast to "subflow-level", | |||
ect on, regular single-path TCP semantics are discussed in <xref target="sec_sem | which refers to properties of an individual subflow.</dd> | |||
antics"/>.</t> | <dt pn="section-1.3-2.9">Token:</dt> | |||
<dd pn="section-1.3-2.10"> A locally unique identifier given to a mult | ||||
ipath connection by a host. May also be referred to as a "Connection ID".</dd> | ||||
<dt pn="section-1.3-2.11">Host:</dt> | ||||
<dd pn="section-1.3-2.12"> An end host operating an MPTCP implementati | ||||
on, and either initiating or accepting an MPTCP connection.</dd> | ||||
</dl> | ||||
<t pn="section-1.3-3"> | ||||
In addition to these terms, note that MPTCP's interpretation of, and eff | ||||
ect on, regular single-path TCP semantics are discussed in <xref target="sec_sem | ||||
antics" format="default" sectionFormat="of" derivedContent="Section 4"/>.</t> | ||||
</section> | </section> | |||
<section anchor="sec_operation" numbered="true" toc="include" removeInRFC= | ||||
<section title="MPTCP Concept" anchor="sec_operation"> | "false" pn="section-1.4"> | |||
<t>This section provides a high-level summary of normal | <name slugifiedName="name-mptcp-concept">MPTCP Concept</name> | |||
operation of MPTCP, and is illustrated by the scenario shown in | <t pn="section-1.4-1">This section provides a high-level summary of norm | |||
<xref target="fig_scenario"/>. A detailed description of operation is given in < | al | |||
xref target="sec_protocol"/>. | operation of MPTCP; this type of scenario is illustrated in | |||
<list style="symbols"> | <xref target="fig_scenario" format="default" sectionFormat="of" derivedContent=" | |||
<t>To a non-MPTCP-aware application, MPTCP will behave the same as n | Figure 2"/>. A detailed description of how | |||
ormal TCP. Extended APIs could provide | MPTCP operates is given in <xref target="sec_protocol" format="default" sectionF | |||
additional control to MPTCP-aware applications <xref target="RFC6897"/>. | ormat="of" derivedContent="Section 3"/>. | |||
An application begins by opening a TCP socket in the normal way. | ||||
MPTCP signaling and operation are handled by the MPTCP implementation. | ||||
</t> | ||||
<t>An MPTCP connection begins similarly to a regular TCP connection. | ||||
This is | ||||
illustrated in <xref target="fig_scenario"/> where an MPTCP connection is establ | ||||
ished between | ||||
addresses A1 and B1 on Hosts A and B, respectively.</t> | ||||
<t>If extra paths are available, additional TCP sessions (termed MPT | ||||
CP "subflows") | ||||
are created on these paths, and are combined with the existing session, which co | ||||
ntinues | ||||
to appear as a single connection to the applications at both ends. The creation | ||||
of the | ||||
additional TCP session is illustrated between Address A2 on Host A and Address B | ||||
1 on | ||||
Host B.</t> | ||||
<t>MPTCP identifies multiple paths by the presence of multiple addre | ||||
sses | ||||
at hosts. Combinations of these multiple addresses equate to the additional path | ||||
s. | ||||
In the example, other potential paths that could be set up are A1<->B2 and | ||||
A2<->B2. | ||||
Although this additional session is shown as being initiated from A2, it could e | ||||
qually have | ||||
been initiated from B1 or B2.</t> | ||||
<t>The discovery and setup of additional subflows | ||||
will be achieved through a path management method; this document describes a mec | ||||
hanism | ||||
by which a host can initiate new subflows by using its own additional addresses, | ||||
or by | ||||
signaling its available addresses to the other host.</t> | ||||
<t>MPTCP adds connection-level sequence numbers to allow the reassem | ||||
bly of | ||||
segments arriving on multiple subflows with differing network delays. </t> | ||||
<t>Subflows are terminated as regular TCP connections, with a four-w | ||||
ay FIN | ||||
handshake. The MPTCP connection is terminated by a connection-level FIN.</t> | ||||
</list> | ||||
</t> | </t> | |||
<?rfc needLines='17'?> | <figure anchor="fig_scenario" align="left" suppress-title="false" pn="fi | |||
<figure align="center" anchor="fig_scenario" title="Example MPTCP Usag | gure-2"> | |||
e Scenario"> | <name slugifiedName="name-example-mptcp-usage-scenari">Example MPTCP U | |||
<artwork align="left"><![CDATA[ | sage Scenario</name> | |||
<artwork align="left" name="" type="" alt="" pn="section-1.4-2.1"> | ||||
Host A Host B | Host A Host B | |||
------------------------ ------------------------ | ------------------------ ------------------------ | |||
Address A1 Address A2 Address B1 Address B2 | Address A1 Address A2 Address B1 Address B2 | |||
---------- ---------- ---------- ---------- | ---------- ---------- ---------- ---------- | |||
| | | | | | | | | | |||
| (initial connection setup) | | | | (initial connection setup) | | | |||
|----------------------------------->| | | |----------------------------------->| | | |||
|<-----------------------------------| | | |<-----------------------------------| | | |||
| | | | | | | | | | |||
| (additional subflow setup) | | | (additional subflow setup) | | |||
| |--------------------->| | | | |--------------------->| | | |||
| |<---------------------| | | | |<---------------------| | | |||
| | | | | ||||
| | | | | | | | | | |||
]]></artwork> | | | | | </artwork> | |||
</figure> | </figure> | |||
<ul spacing="normal" bare="false" empty="false" pn="section-1.4-3"> | ||||
<li pn="section-1.4-3.1">To a non-MPTCP-aware application, MPTCP will | ||||
behave the same as normal TCP. Extended APIs could provide | ||||
additional control to MPTCP-aware applications <xref target="RFC6897" format="de | ||||
fault" sectionFormat="of" derivedContent="RFC6897"/>. | ||||
An application begins by opening a TCP socket in the normal way. | ||||
MPTCP signaling and operation are handled by the MPTCP implementation. | ||||
</li> | ||||
<li pn="section-1.4-3.2">An MPTCP connection begins similarly to a reg | ||||
ular TCP connection. This is | ||||
illustrated in <xref target="fig_scenario" format="default" sectionFormat="of" d | ||||
erivedContent="Figure 2"/>, where an MPTCP connection is established between | ||||
addresses A1 and B1 on Hosts A and B, respectively.</li> | ||||
<li pn="section-1.4-3.3">If extra paths are available, additional TCP | ||||
sessions (termed MPTCP "subflows") | ||||
are created on these paths and are combined with the existing session, which con | ||||
tinues | ||||
to appear as a single connection to the applications at both ends. The creation | ||||
of the | ||||
additional TCP session is illustrated between Address A2 on Host A and Address B | ||||
1 on | ||||
Host B.</li> | ||||
<li pn="section-1.4-3.4">MPTCP identifies multiple paths by the presen | ||||
ce of multiple addresses | ||||
at hosts. Combinations of these multiple addresses equate to the additional path | ||||
s. | ||||
In the example, other potential paths that could be set up are A1<->B2 and | ||||
A2<->B2. | ||||
Although this additional session is shown as being initiated from A2, it could e | ||||
qually have | ||||
been initiated from B1 or B2.</li> | ||||
<li pn="section-1.4-3.5">The discovery and setup of additional subflow | ||||
s | ||||
will be achieved through a path management method; this document describes a mec | ||||
hanism | ||||
by which a host can initiate new subflows by using its own additional addresses | ||||
or by | ||||
signaling its available addresses to the other host.</li> | ||||
<li pn="section-1.4-3.6">MPTCP adds connection-level sequence numbers | ||||
to allow the reassembly of | ||||
segments arriving on multiple subflows with differing network delays. </li> | ||||
<li pn="section-1.4-3.7">Subflows are terminated as regular TCP connec | ||||
tions, with a four‑way FIN | ||||
handshake. The MPTCP connection is terminated by a connection-level FIN.</li> | ||||
</ul> | ||||
</section> | </section> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-1.5 | ||||
<section title="Requirements Language"> | "> | |||
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL | <name slugifiedName="name-requirements-language">Requirements Language</ | |||
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", | name> | |||
"MAY", and "OPTIONAL" in this document are to be interpreted as | <t pn="section-1.5-1"> | |||
described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174" | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
/> | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14> | |||
when, and only when, they appear in all capitals, as shown here.</t> | ", | |||
"<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", | ||||
"<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | ||||
"<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to | ||||
be | ||||
interpreted as described in BCP 14 <xref target="RFC2119" format="default" s | ||||
ectionFormat="of" derivedContent="RFC2119"/> <xref target="RFC8174" format="defa | ||||
ult" sectionFormat="of" derivedContent="RFC8174"/> when, and only when, they app | ||||
ear in all capitals, as | ||||
shown here. | ||||
</t> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sec_overview" numbered="true" toc="include" removeInRFC="fa | ||||
<section title="Operation Overview" anchor="sec_overview"> | lse" pn="section-2"> | |||
<t>This section presents a single description of common MPTCP operation, w | <name slugifiedName="name-operation-overview">Operation Overview</name> | |||
ith reference to the protocol operation. This is a high-level overview of the ke | <t pn="section-2-1">This section presents a single description of common M | |||
y functions; the full specification follows in <xref target="sec_protocol"/>. Ex | PTCP operation, with reference to the protocol operation. This is a high-level o | |||
tensibility and negotiated features are not discussed here. Considerable referen | verview of the key functions; the full specification follows in <xref target="se | |||
ce is made to symbolic names of MPTCP options throughout this section -- these a | c_protocol" format="default" sectionFormat="of" derivedContent="Section 3"/>. Ex | |||
re subtypes of the IANA-assigned MPTCP option (see <xref target="IANA"/>), and t | tensibility and negotiated features are not discussed here. Considerable referen | |||
heir formats are defined in the detailed protocol specification that follows in | ce is made to symbolic names of MPTCP options throughout this section -- these a | |||
<xref target="sec_protocol"/>.</t> | re subtypes of the IANA‑assigned MPTCP option (see <xref target="IANA" format="d | |||
efault" sectionFormat="of" derivedContent="Section 7"/>), and their formats are | ||||
<t>A Multipath TCP connection provides a bidirectional bytestream between two ho | defined in the detailed protocol specification provided in <xref target="sec_pro | |||
sts communicating like normal TCP and, thus, does not require any change to the | tocol" format="default" sectionFormat="of" derivedContent="Section 3"/>.</t> | |||
applications. However, Multipath TCP enables the hosts to use different paths wi | <t pn="section-2-2">A Multipath TCP connection provides a bidirectional by | |||
th different IP addresses to exchange packets belonging to the MPTCP connection. | testream between two hosts communicating like normal TCP and thus does not requi | |||
A Multipath TCP connection appears like a normal TCP connection to an applicati | re any change to the applications. However, Multipath TCP enables the hosts to u | |||
on. However, to the network layer, each MPTCP subflow looks like a regular TCP f | se different paths with different IP addresses to exchange packets belonging to | |||
low whose segments carry a new TCP option type. Multipath TCP manages the creati | the MPTCP connection. A Multipath TCP connection appears like a normal TCP conne | |||
on, removal, and utilization of these subflows to send data. The number of subfl | ction to an application. However, to the network layer, each MPTCP subflow looks | |||
ows that are managed within a Multipath TCP connection is not fixed and it can f | like a regular TCP flow whose segments carry a new TCP option type. Multipath T | |||
luctuate during the lifetime of the Multipath TCP connection.</t> | CP manages the creation, removal, and utilization of these subflows to send data | |||
. The number of subflows that are managed within a Multipath TCP connection is n | ||||
<t>All MPTCP operations are signaled with a TCP option -- a single numerical typ | ot fixed, and it can fluctuate during the lifetime of the Multipath TCP connecti | |||
e for MPTCP, with "sub-types" for each MPTCP message. What follows is a summary | on.</t> | |||
of the purpose and rationale of these messages.</t> | <t pn="section-2-3">All MPTCP operations are signaled with a TCP option -- | |||
<section title="Initiating an MPTCP Connection"> | a single numerical type for MPTCP, with "subtypes" for each MPTCP message. What | |||
<t>This is the same signaling as for initiating a normal TCP connection, but the | follows is a summary of the purpose and rationale of these messages.</t> | |||
SYN, SYN/ACK, and initial ACK (and data) packets also carry the MP_CAPABLE opti | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.1 | |||
on. This option has a variable length and serves multiple purposes. Firstly, it | "> | |||
verifies whether the remote host supports Multipath TCP; secondly, this option a | <name slugifiedName="name-initiating-an-mptcp-connect">Initiating an MPT | |||
llows the hosts to exchange some information to authenticate the establishment o | CP Connection</name> | |||
f additional subflows. Further details are given in <xref target="sec_init"/>.</ | <t pn="section-2.1-1">This is the same signaling as for initiating a nor | |||
t> | mal TCP connection, but the SYN, SYN/ACK, and initial ACK (and data) packets als | |||
o carry the MP_CAPABLE option. This option has a variable length and serves mult | ||||
<figure><artwork align="left"><![CDATA[ | iple purposes. Firstly, it verifies whether the remote host supports Multipath T | |||
CP; secondly, this option allows the hosts to exchange some information to authe | ||||
nticate the establishment of additional subflows. Further details are given in < | ||||
xref target="sec_init" format="default" sectionFormat="of" derivedContent="Secti | ||||
on 3.1"/>.</t> | ||||
<artwork align="left" name="" type="" alt="" pn="section-2.1-2"> | ||||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
MP_CAPABLE -> | MP_CAPABLE -> | |||
[flags] | [flags] | |||
<- MP_CAPABLE | <- MP_CAPABLE | |||
[B's key, flags] | [B's key, flags] | |||
ACK + MP_CAPABLE (+ data) -> | ACK + MP_CAPABLE (+ data) -> | |||
[A's key, B's key, flags, (data-level details)] | [A's key, B's key, flags, (data-level details)] </artwork> | |||
]]></artwork></figure> | <t pn="section-2.1-3">Retransmission of the ACK + MP_CAPABLE can occur i | |||
f it is not known if it has been received. The following diagrams show all possi | ||||
<t>Retransmission of the ACK + MP_CAPABLE can occur if it is not known if it has | ble exchanges for the initial subflow setup to ensure this reliability.</t> | |||
been received. The following diagrams show all possible exchanges for the initi | <artwork align="left" name="" type="" alt="" pn="section-2.1-4"> | |||
al subflow setup to ensure this reliability.</t> | ||||
<figure><artwork align="left"><![CDATA[ | ||||
Host A (with data to send immediately) Host B | Host A (with data to send immediately) Host B | |||
------ ------ | ------ ------ | |||
MP_CAPABLE -> | MP_CAPABLE -> | |||
[flags] | [flags] | |||
<- MP_CAPABLE | <- MP_CAPABLE | |||
[B's key, flags] | [B's key, flags] | |||
ACK + MP_CAPABLE + data -> | ACK + MP_CAPABLE + data -> | |||
[A's key, B's key, flags, data-level details] | [A's key, B's key, flags, data-level details] | |||
Host A (with data to send later) Host B | Host A (with data to send later) Host B | |||
------ ------ | ------ ------ | |||
MP_CAPABLE -> | MP_CAPABLE -> | |||
[flags] | [flags] | |||
<- MP_CAPABLE | <- MP_CAPABLE | |||
[B's key, flags] | [B's key, flags] | |||
ACK + MP_CAPABLE -> | ACK + MP_CAPABLE -> | |||
[A's key, B's key, flags] | [A's key, B's key, flags] | |||
ACK + MP_CAPABLE + data -> | ACK + MP_CAPABLE + data -> | |||
[A's key, B's key, flags, data-level details] | [A's key, B's key, flags, data-level details] | |||
Host A Host B (sending first) | Host A Host B (sending first) | |||
------ ------ | ------ ------ | |||
MP_CAPABLE -> | MP_CAPABLE -> | |||
[flags] | [flags] | |||
<- MP_CAPABLE | <- MP_CAPABLE | |||
[B's key, flags] | [B's key, flags] | |||
ACK + MP_CAPABLE -> | ACK + MP_CAPABLE -> | |||
[A's key, B's key, flags] | [A's key, B's key, flags] | |||
<- ACK + DSS + data | <- ACK + DSS + data | |||
[data-level details] | [data-level details] </artwork> | |||
]]></artwork></figure> | </section> | |||
</section> | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.2 | |||
"> | ||||
<section title="Associating a New Subflow with an Existing MPTCP Connection"> | <name slugifiedName="name-associating-a-new-subflow-w">Associating a New | |||
<t>The exchange of keys in the MP_CAPABLE handshake provides material that can b | Subflow with an Existing MPTCP Connection</name> | |||
e used to authenticate the endpoints when new subflows will be set up. | <t pn="section-2.2-1">The exchange of keys in the MP_CAPABLE handshake p | |||
rovides material that can be used to authenticate the endpoints when new subflow | ||||
s will be set up. | ||||
Additional subflows begin in the same way as initiating a normal TCP connection, but the SYN, SYN/ACK, and ACK packets also carry the MP_JOIN option. </t> | Additional subflows begin in the same way as initiating a normal TCP connection, but the SYN, SYN/ACK, and ACK packets also carry the MP_JOIN option. </t> | |||
<t pn="section-2.2-2">Host A initiates a new subflow between one of its | ||||
<t>Host A initiates a new subflow between one of its addresses and one of Host B | addresses and one | |||
's addresses. The token -- generated from the key -- is used to identify which M | of Host B's addresses. The token -- generated from the key -- is used | |||
PTCP connection it is joining, and the HMAC is used for authentication. The Hash | to identify which MPTCP connection it is joining, and the Hash‑based | |||
-based Message Authentication Code (HMAC) uses the keys exchanged in the MP_CAPA | Message Authentication Code (HMAC) is used for authentication. The HMAC | |||
BLE handshake, and the random numbers (nonces) exchanged in these MP_JOIN option | uses the keys exchanged in the MP_CAPABLE handshake and the random numbers (nonc | |||
s. MP_JOIN also contains flags and an Address ID that can be used to refer to th | es) exchanged in these MP_JOIN options. MP_JOIN also contains flags and an Addre | |||
e source address without the sender needing to know if it has been changed by a | ss ID that can be used to refer to the source address without the sender needing | |||
NAT. Further details are in <xref target="sec_join"/>.</t> | to know if it has been changed by a NAT. Further details are given in <xref tar | |||
get="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/ | ||||
<figure><artwork align="left"><![CDATA[ | >.</t> | |||
<artwork align="left" name="" type="" alt="" pn="section-2.2-3"> | ||||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
MP_JOIN -> | MP_JOIN -> | |||
[B's token, A's nonce, | [B's token, A's nonce, | |||
A's Address ID, flags] | A's Address ID, flags] | |||
<- MP_JOIN | <- MP_JOIN | |||
[B's HMAC, B's nonce, | [B's HMAC, B's nonce, | |||
B's Address ID, flags] | B's Address ID, flags] | |||
ACK + MP_JOIN -> | ACK + MP_JOIN -> | |||
[A's HMAC] | [A's HMAC] | |||
<- ACK | <- ACK </artwork> | |||
]]></artwork></figure> | </section> | |||
</section> | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.3 | |||
"> | ||||
<section title="Informing the Other Host about Another Potential Address"> | <name slugifiedName="name-informing-the-other-host-ab">Informing the Oth | |||
<t>The set of IP addresses associated to a multihomed host may change during the | er Host about Another Potential Address</name> | |||
lifetime of an MPTCP connection. MPTCP supports the addition and removal of add | <t pn="section-2.3-1">The set of IP addresses associated to a multihomed | |||
resses on a host both implicitly and explicitly. If Host A has established a sub | host may change during the lifetime of an MPTCP connection. MPTCP supports the | |||
flow starting at address/port pair IP#-A1 and wants to open a second subflow sta | addition and removal of addresses on a host both implicitly and explicitly. If H | |||
rting at address/port pair IP#-A2, it simply initiates the establishment of the | ost A has established a subflow starting at address/port pair IP#-A1 and wants | |||
subflow as explained above. The remote host will then be implicitly informed abo | to open a second subflow starting at address/port pair IP#-A2, it simply initia | |||
ut the new address.</t> | tes the establishment of the subflow as explained above. The remote host will th | |||
en be implicitly informed about the new address.</t> | ||||
<t>In some circumstances, a host may want to advertise to the remote host the av | <t pn="section-2.3-2">In some circumstances, a host may want to advertis | |||
ailability of an address without establishing a new subflow, for example, when a | e to the remote | |||
NAT prevents setup in one direction. In the example below, Host A informs Host | host the availability of an address without establishing a new subflow | |||
B about its alternative IP address/port pair (IP#-A2). Host B may later send an | -- for example, when a NAT prevents setup in one direction. In the example bel | |||
MP_JOIN to this new address. The ADD_ADDR option contains a HMAC to authenticat | ow, Host A informs Host B about its alternative IP address/port pair (IP#-A2). | |||
e the address as having been sent from the originator of the connection. The rec | Host B may later send an MP_JOIN to this new address. The ADD_ADDR option contai | |||
eiver of this option echoes it back to the client to indicate successful receipt | ns an HMAC to authenticate the address as having been sent from the originator o | |||
. Further details are in <xref target="sec_add_address"/>.</t> | f the connection. The receiver of this option echoes it back to the client to in | |||
dicate successful receipt. Further details are given in <xref target="sec_add_ad | ||||
<figure><artwork align="left"><![CDATA[ | dress" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>.</t> | |||
<artwork align="left" name="" type="" alt="" pn="section-2.3-3"> | ||||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
ADD_ADDR -> | ADD_ADDR -> | |||
[Echo-flag=0, | [Echo-flag=0, | |||
IP#-A2, | IP#-A2, | |||
IP#-A2's Address ID, | IP#-A2's Address ID, | |||
HMAC of IP#-A2] | HMAC of IP#-A2] | |||
<- ADD_ADDR | <- ADD_ADDR | |||
[Echo-flag=1, | [Echo-flag=1, | |||
IP#-A2, | IP#-A2, | |||
IP#-A2's Address ID, | IP#-A2's Address ID, | |||
HMAC of IP#-A2] | HMAC of IP#-A2] </artwork> | |||
]]></artwork></figure> | <t pn="section-2.3-4">There is a corresponding signal for address remova | |||
l, making use of | ||||
<t>There is a corresponding signal for address removal, making use of the Addres | the Address ID that is signaled in the ADD_ADDR handshake. | |||
s ID that is signaled in the add address handshake. Further details in <xref tar | ||||
get="sec_remove_addr"/>.</t> | ||||
<figure><artwork align="left"><![CDATA[ | Further details are given in <xref target="sec_remove_addr" format="default" se | |||
ctionFormat="of" derivedContent="Section 3.4.2"/>.</t> | ||||
<artwork align="left" name="" type="" alt="" pn="section-2.3-5"> | ||||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
REMOVE_ADDR -> | REMOVE_ADDR -> | |||
[IP#-A2's Address ID] | [IP#-A2's Address ID] </artwork> | |||
]]></artwork></figure> | </section> | |||
</section> | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.4 | |||
"> | ||||
<section title="Data Transfer Using MPTCP"> | <name slugifiedName="name-data-transfer-using-mptcp">Data Transfer Using | |||
<t>To ensure reliable, in-order delivery of data over subflows that may appear a | MPTCP</name> | |||
nd disappear at any time, MPTCP uses a 64-bit data sequence number (DSN) to numb | <t pn="section-2.4-1">To ensure reliable, in-order delivery of data over | |||
er all data sent over the MPTCP connection. Each subflow has its own 32-bit sequ | subflows that may appear and disappear at any time, MPTCP uses a 64-bit Data Se | |||
ence number space, utilising the regular TCP sequence number header, and an MPTC | quence Number (DSN) to number all data sent over the MPTCP connection. Each subf | |||
P option maps the subflow sequence space to the data sequence space. In this way | low has its own 32-bit sequence number space, utilizing the regular TCP sequence | |||
, data can be retransmitted on different subflows (mapped to the same DSN) in th | number header, and an MPTCP option maps the subflow sequence space to the data | |||
e event of failure.</t> | sequence space. In this way, data can be retransmitted on different subflows (ma | |||
pped to the same DSN) in the event of failure.</t> | ||||
<t>The Data Sequence Signal (DSS) carries the Data Sequence Mapping. The Data Se | <t pn="section-2.4-2">The Data Sequence Signal (DSS) carries the Data Se | |||
quence Mapping consists of the subflow sequence number, data sequence number, an | quence Mapping. The Data Sequence Mapping consists of the subflow sequence numbe | |||
d length for which this mapping is valid. This option can also carry a connectio | r, data sequence number, and length for which this mapping is valid. This option | |||
n-level acknowledgment (the "Data ACK") for the received DSN.</t> | can also carry a connection-level acknowledgment (the "Data ACK") for the recei | |||
ved DSN.</t> | ||||
<t>With MPTCP, all subflows share the same receive buffer and advertise the same | <t pn="section-2.4-3">With MPTCP, all subflows share the same receive bu | |||
receive window. There are two levels of acknowledgment in MPTCP. Regular TCP ac | ffer and advertise the same receive window. There are two levels of acknowledgme | |||
knowledgments are used on each subflow to acknowledge the reception of the segme | nt in MPTCP. Regular TCP acknowledgments are used on each subflow to acknowledge | |||
nts sent over the subflow independently of their DSN. In addition, there are con | the reception of the segments sent over the subflow independently of their DSN. | |||
nection-level acknowledgments for the data sequence space. These acknowledgments | In addition, there are connection-level acknowledgments for the data sequence s | |||
track the advancement of the bytestream and slide the receiving window.</t> | pace. These acknowledgments track the advancement of the bytestream and slide th | |||
e receive window.</t> | ||||
<t>Further details are in <xref target="sec_generalop"/>.</t> | <t pn="section-2.4-4">Further details are given in <xref target="sec_gen | |||
eralop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>.</t> | ||||
<figure><artwork align="left"><![CDATA[ | <artwork align="left" name="" type="" alt="" pn="section-2.4-5"> | |||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
DSS -> | DSS -> | |||
[Data Sequence Mapping] | [Data Sequence Mapping] | |||
[Data ACK] | [Data ACK] | |||
[Checksum] | [Checksum] </artwork> | |||
]]></artwork></figure> | </section> | |||
</section> | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.5 | |||
"> | ||||
<section title="Requesting a Change in a Path's Priority"> | <name slugifiedName="name-requesting-a-change-in-a-pa">Requesting a Chan | |||
<t>Hosts can indicate at initial subflow setup whether they wish the subflow to | ge in a Path's Priority</name> | |||
be used as a regular or backup path -- a backup path only being used if there ar | <t pn="section-2.5-1">Hosts can indicate at initial subflow setup whethe | |||
e no regular paths available. During a connection, Host A can request a change i | r they wish the subflow to be used as a regular or backup path -- a backup path | |||
n the priority of a subflow through the MP_PRIO signal to Host B. Further detail | only being used if there are no regular paths available. During a connection, Ho | |||
s are in <xref target="sec_policy"/>.</t> | st A can request a change in the priority of a subflow through the MP_PRIO signa | |||
l to Host B. Further details are given in <xref target="sec_policy" format="defa | ||||
<figure><artwork align="left"><![CDATA[ | ult" sectionFormat="of" derivedContent="Section 3.3.8"/>.</t> | |||
<artwork align="left" name="" type="" alt="" pn="section-2.5-2"> | ||||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
MP_PRIO -> | MP_PRIO -> </artwork> | |||
]]></artwork></figure> | </section> | |||
</section> | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.6 | |||
"> | ||||
<section title="Closing an MPTCP Connection"> | <name slugifiedName="name-closing-an-mptcp-connection">Closing an MPTCP | |||
<t>When a host wants to close an existing subflow, but not the whole connection, | Connection</name> | |||
it can initiate a regular TCP FIN/ACK exchange.</t> | <t pn="section-2.6-1">When a host wants to close an existing subflow but | |||
not the whole connection, it can initiate a regular TCP FIN/ACK exchange.</t> | ||||
<t>When Host A wants to inform Host B that it has no more data to send, it signa | <t pn="section-2.6-2">When Host A wants to inform Host B that it has no | |||
ls this "Data FIN" as part of the Data Sequence Signal (see above). It has the s | more data to send, it signals this "Data FIN" as part of the DSS (see above). It | |||
ame semantics and behavior as a regular TCP FIN, but at the connection level. On | has the same semantics and behavior as a regular TCP FIN, but at the connection | |||
ce all the data on the MPTCP connection has been successfully received, then thi | level. Once all the data on the MPTCP connection has been successfully received | |||
s message is acknowledged at the connection level with a Data ACK. Further detai | , this message is acknowledged at the connection level with a Data ACK. Further | |||
ls are in <xref target="sec_close"/>.</t> | details are given in <xref target="sec_close" format="default" sectionFormat="of | |||
" derivedContent="Section 3.3.3"/>.</t> | ||||
<figure><artwork align="left"><![CDATA[ | <artwork align="left" name="" type="" alt="" pn="section-2.6-3"> | |||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
DSS -> | DSS -> | |||
[Data FIN] | [Data FIN] | |||
<- DSS | <- DSS | |||
[Data ACK] | [Data ACK] </artwork> | |||
]]></artwork></figure> | <t pn="section-2.6-4">There is an additional method of connection closur | |||
e, referred to as | ||||
<t>There is an additional method of connection closure, referred to as "Fast Clo | "Fast Close", which is analogous to closing a single-path TCP | |||
se", which is analogous to closing a single-path TCP connection with a RST signa | connection with a RST signal. The MP_FASTCLOSE signal is used to | |||
l. The MP_FASTCLOSE signal is used to indicate to the peer that the connection w | indicate to the peer that the connection will be abruptly closed and | |||
ill be abruptly closed and no data will be accepted anymore. This can be used on | no data will be accepted anymore. This can be used on an ACK (which | |||
an ACK (ensuring reliability of the signal), or a RST (which is not). Both exam | ensures reliability of the signal) or a RST (which does not). | |||
ples are shown in the following diagrams. Further details are in <xref target="s | Both examples are shown in the following diagrams. Further details are given in | |||
ec_fastclose"/>.</t> | <xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent | |||
="Section 3.5"/>.</t> | ||||
<figure><artwork align="left"><![CDATA[ | <artwork align="left" name="" type="" alt="" pn="section-2.6-5"> | |||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
ACK + MP_FASTCLOSE -> | ACK + MP_FASTCLOSE -> | |||
[B's key] | [B's key] | |||
[RST on all other subflows] -> | [RST on all other subflows] -> | |||
<- [RST on all subflows] | <- [RST on all subflows] | |||
Host A Host B | Host A Host B | |||
------ ------ | ------ ------ | |||
RST + MP_FASTCLOSE -> | RST + MP_FASTCLOSE -> | |||
[B's key] [on all subflows] | [B's key] [on all subflows] | |||
<- [RST on all subflows] | <- [RST on all subflows] </artwork> | |||
]]></artwork></figure> | </section> | |||
</section> | <section numbered="true" toc="include" removeInRFC="false" pn="section-2.7 | |||
"> | ||||
<section title="Notable Features"> | <name slugifiedName="name-notable-features">Notable Features</name> | |||
<t>It is worth highlighting that MPTCP's signaling has been designed with severa | <t pn="section-2.7-1">It is worth highlighting that MPTCP's signaling ha | |||
l key requirements in mind: | s been designed with several key requirements in mind: | |||
<list style="symbols"> | </t> | |||
<t>To cope with NATs on the path, addresses are referred to by Address IDs, in c | <ul spacing="normal" bare="false" empty="false" pn="section-2.7-2"> | |||
ase the IP packet's source | <li pn="section-2.7-2.1">To cope with NATs on the path, addresses are | |||
referred to by Address IDs, in case the IP packet's source | ||||
address gets changed by a NAT. Setting up a new TCP flow is not possible if the receiver of the SYN is behind a NAT; | address gets changed by a NAT. Setting up a new TCP flow is not possible if the receiver of the SYN is behind a NAT; | |||
to allow subflows to be created when either end is behind a NAT, MPTCP uses the | to allow subflows to be created when either end is behind a NAT, MPTCP uses the | |||
ADD_ADDR message. </t> | ADD_ADDR message. </li> | |||
<li pn="section-2.7-2.2">MPTCP falls back to ordinary TCP if MPTCP ope | ||||
<t>MPTCP falls back to ordinary TCP if MPTCP operation is not possible, for exam | ration is not | |||
ple, if one host is not MPTCP capable or if a middlebox alters the payload. This | possible -- for example, if one host is not MPTCP capable or if a middlebox alt | |||
is discussed in <xref target="sec_fallback"/>.</t> | ers the payload. This is discussed in <xref target="sec_fallback" format="defaul | |||
t" sectionFormat="of" derivedContent="Section 3.7"/>.</li> | ||||
<t>To address the threats identified in <xref target="RFC6181"/>, the following | <li pn="section-2.7-2.3">To address the threats identified in <xref ta | |||
steps are taken: keys are sent in the clear in the MP_CAPABLE messages; MP_JOIN | rget="RFC6181" format="default" sectionFormat="of" derivedContent="RFC6181"/>, t | |||
messages are secured with HMAC-SHA256 (<xref target="RFC2104"/>, <xref target="R | he following steps are taken: keys are sent in | |||
FC6234"/>) using those keys; and standard TCP validity checks are made on the ot | the clear in the MP_CAPABLE messages; MP_JOIN messages are secured | |||
her messages (ensuring sequence numbers are in-window <xref target="RFC5961"/>). | with HMAC-SHA256 (<xref target="RFC2104" format="default" sectionForma | |||
Residual threats to MPTCP v0 were identified in <xref target="RFC7430"/>, and t | t="of" derivedContent="RFC2104"/> using | |||
hose affecting the protocol (i.e. modification to ADD_ADDR) have been incorporat | the algorithm in <xref target="RFC6234" format="default" sectionFormat | |||
ed in this document. Further discussion of security can be found in <xref target | ="of" derivedContent="RFC6234"/>) using those keys; and standard | |||
="sec_security"/>.</t> | TCP validity checks are made on the other messages (ensuring that | |||
</list></t> | sequence numbers are in‑window <xref target="RFC5961" format="default" | |||
</section> | sectionFormat="of" derivedContent="RFC5961"/>). | |||
Residual threats to MPTCP v0 were identified in <xref target="RFC7430" format=" | ||||
default" sectionFormat="of" derivedContent="RFC7430"/>, and those affecting the | ||||
protocol (i.e., modifications to | ||||
ADD_ADDR) have been incorporated in this document. | ||||
Further discussion of security can be found in <xref target="sec_security" form | ||||
at="default" sectionFormat="of" derivedContent="Section 5"/>.</li> | ||||
</ul> | ||||
</section> | ||||
</section> | </section> | |||
<section anchor="sec_protocol" numbered="true" toc="include" removeInRFC="fa | ||||
<section title="MPTCP Protocol" anchor="sec_protocol"> | lse" pn="section-3"> | |||
<t>This section describes the operation of the MPTCP protocol, and is subd | <name slugifiedName="name-mptcp-operations-an-overvie">MPTCP Operations: A | |||
ivided into sections for each key part of the protocol operation.</t> | n Overview</name> | |||
<t>All MPTCP operations are signaled using optional TCP header fields. A s | <t pn="section-3-1">This section describes the operation of MPTCP. The | |||
ingle TCP option number ("Kind") has been assigned by IANA for MPTCP (see <xref | subsections below discuss each key part of the protocol operation.</t> | |||
target="IANA"/>), and then individual messages will be determined by a "subtype" | <t pn="section-3-2">All MPTCP operations are signaled using optional TCP h | |||
, the values of which are also stored in an IANA registry (and are also listed i | eader fields. A single TCP option number ("Kind") has been assigned by IANA for | |||
n <xref target="IANA"/>). As with all TCP options, the Length field is specified | MPTCP (see <xref target="IANA" format="default" sectionFormat="of" derivedConten | |||
in bytes, and includes the 2 bytes of Kind and Length.</t> | t="Section 7"/>), and then individual messages will be determined by a "subtype" | |||
<t>Throughout this document, when reference is made to an MPTCP option by | , the values of which are also stored in an IANA registry (and are also listed i | |||
symbolic name, such as "MP_CAPABLE", this refers to a TCP option with the single | n <xref target="IANA" format="default" sectionFormat="of" derivedContent="Sectio | |||
MPTCP option type, and with the subtype value of the symbolic name as defined i | n 7"/>). As with all TCP options, the Length field is specified in bytes and inc | |||
n <xref target="IANA"/>. This subtype is a 4-bit field -- the first 4 bits of th | ludes the 2 bytes of Kind and Length.</t> | |||
e option payload, as shown in <xref target="fig_option"/>. The MPTCP messages ar | <t pn="section-3-3">Throughout this document, when reference is made to an | |||
e defined in the following sections.</t> | MPTCP option by symbolic name, such as "MP_CAPABLE", this refers to a TCP optio | |||
n with the single MPTCP option type, and with the subtype value of the symbolic | ||||
<?rfc needLines='8'?> | name as defined in <xref target="IANA" format="default" sectionFormat="of" deriv | |||
<figure align="center" anchor="fig_option" title="MPTCP Option Format"> | edContent="Section 7"/>. This subtype is a 4-bit field -- the first 4 bits of th | |||
<artwork align="left"><![CDATA[ | e option payload, as shown in <xref target="fig_option" format="default" section | |||
1 2 3 | Format="of" derivedContent="Figure 3"/>. The MPTCP messages are defined in the f | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ollowing sections.</t> | |||
+---------------+---------------+-------+-----------------------+ | <figure anchor="fig_option" align="left" suppress-title="false" pn="figure | |||
| Kind | Length |Subtype| | | -3"> | |||
+---------------+---------------+-------+ | | <name slugifiedName="name-mptcp-option-format">MPTCP Option Format</name | |||
| Subtype-specific data | | > | |||
| (variable length) | | <artwork align="left" name="" type="" alt="" pn="section-3-4.1"> | |||
+---------------------------------------------------------------+ | 1 2 3 | |||
]]></artwork> | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+---------------+---------------+-------+-----------------------+ | ||||
| Kind | Length |Subtype| | | ||||
+---------------+---------------+-------+ | | ||||
| Subtype-specific data | | ||||
| (variable length) | | ||||
+---------------------------------------------------------------+ </artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3-5">Those MPTCP options associated with subflow initiation | ||||
<t>Those MPTCP options associated with subflow initiation are used on pack | are used on | |||
ets with the SYN flag set. Additionally, there is one MPTCP option for signaling | packets with the SYN flag set. Additionally, there is one MPTCP option | |||
metadata to ensure segmented data can be recombined for delivery to the applica | for signaling metadata to ensure that segmented data can be recombined for | |||
tion.</t> | delivery to the application.</t> | |||
<t>The remaining options, however, are signals that do not need to be on a | <t pn="section-3-6">The remaining options, however, are signals that do no | |||
specific packet, such as those for signaling additional addresses. Whilst an im | t need to be on | |||
plementation may desire to send MPTCP options as soon as possible, it may not be | a specific packet, such as those for signaling additional | |||
possible to combine all desired options (both those for MPTCP and for regular T | addresses. While an implementation may desire to send MPTCP options as | |||
CP, such as SACK (selective acknowledgment) <xref target="RFC2018"/>) on a singl | soon as possible, it may not be possible to combine all desired options | |||
e packet. Therefore, an implementation may choose to send duplicate ACKs contain | (both those for MPTCP and for regular TCP, such as SACK (selective | |||
ing the additional signaling information. This changes the semantics of a duplic | acknowledgment) <xref target="RFC2018" format="default" sectionFormat="of" | |||
ate ACK; these are usually only sent as a signal of a lost segment <xref target= | derivedContent="RFC2018"/>) on a single | |||
"RFC5681"/> in regular TCP. Therefore, an MPTCP implementation receiving a dupli | packet. Therefore, an implementation may choose to send duplicate ACKs | |||
cate ACK that contains an MPTCP option MUST NOT treat it as a signal of congesti | containing the additional signaling information. This changes the | |||
on. Additionally, an MPTCP implementation SHOULD NOT send more than two duplicat | semantics of a duplicate ACK; these are usually only sent as a signal of | |||
e ACKs in a row for the purposes of sending MPTCP options alone, in order to ens | a lost segment <xref target="RFC5681" format="default" sectionFormat="of" | |||
ure no middleboxes misinterpret this as a sign of congestion.</t> | derivedContent="RFC5681"/> in regular | |||
<t>Furthermore, standard TCP validity checks (such as ensuring the sequenc | TCP. Therefore, an MPTCP implementation receiving a duplicate ACK that | |||
e number and acknowledgment number are within window) MUST be undertaken before | contains an MPTCP option <bcp14>MUST NOT</bcp14> treat it as a signal of | |||
processing any MPTCP signals, as described in <xref target="RFC5961"/>, and init | congestion. Additionally, an MPTCP implementation <bcp14>SHOULD NOT</bcp14 | |||
ial subflow sequence numbers SHOULD be generated according to the recommendation | > send more than two duplicate ACKs in a row for the purposes | |||
s in <xref target="RFC6528"/>.</t> | of sending MPTCP options alone, in order to ensure that no middleboxes mis | |||
interpret this as a sign of congestion.</t> | ||||
<section title="Connection Initiation" anchor="sec_init"> | <t pn="section-3-7">Furthermore, standard TCP validity checks (such as ens | |||
<t>Connection initiation begins with a SYN, SYN/ACK, ACK exchange | uring that the | |||
sequence number and acknowledgment number are within the window) <bcp14>MU | ||||
ST</bcp14> be undertaken before processing any MPTCP signals, as described in <x | ||||
ref target="RFC5961" format="default" sectionFormat="of" derivedContent="RFC5961 | ||||
"/>, and initial subflow sequence numbers <bcp14>SHOULD</bcp14> be generated acc | ||||
ording to the recommendations in <xref target="RFC6528" format="default" section | ||||
Format="of" derivedContent="RFC6528"/>.</t> | ||||
<section anchor="sec_init" numbered="true" toc="include" removeInRFC="fals | ||||
e" pn="section-3.1"> | ||||
<name slugifiedName="name-connection-initiation">Connection Initiation</ | ||||
name> | ||||
<t pn="section-3.1-1">Connection initiation begins with a SYN, SYN/ACK, | ||||
ACK exchange | ||||
on a single path. Each packet | on a single path. Each packet | |||
contains the Multipath Capable (MP_CAPABLE) MPTCP option | contains the Multipath Capable (MP_CAPABLE) MPTCP option | |||
(<xref target="tcpm_capable"/>). This option declares its | (<xref target="tcpm_capable" format="default" sectionFormat="of" derived | |||
sender is capable of performing Multipath TCP and wishes to do | Content="Figure 4"/>). This option declares its | |||
sender capable of performing Multipath TCP and wishes to do | ||||
so on this particular connection.</t> | so on this particular connection.</t> | |||
<figure anchor="tcpm_capable" align="left" suppress-title="false" pn="fi | ||||
<t>The MP_CAPABLE exchange in this specification (v1) is different to | gure-4"> | |||
<name slugifiedName="name-multipath-capable-mp_capabl">Multipath Capab | ||||
le (MP_CAPABLE) Option</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.1-2.1"> | ||||
1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+-------+---------------+ | ||||
| Kind | Length |Subtype|Version|A|B|C|D|E|F|G|H| | ||||
+---------------+---------------+-------+-------+---------------+ | ||||
| Option Sender's Key (64 bits) | | ||||
| (if option Length > 4) | | ||||
| | | ||||
+---------------------------------------------------------------+ | ||||
| Option Receiver's Key (64 bits) | | ||||
| (if option Length > 12) | | ||||
| | | ||||
+-------------------------------+-------------------------------+ | ||||
| Data-Level Length (16 bits) | Checksum (16 bits, optional) | | ||||
+-------------------------------+-------------------------------+ </artwork> | ||||
</figure> | ||||
<t pn="section-3.1-3">The MP_CAPABLE exchange in this specification (v1) | ||||
is different than | ||||
that specified in v0. If a host supports multiple versions | that specified in v0. If a host supports multiple versions | |||
of MPTCP, the sender of the MP_CAPABLE option SHOULD signal the | of MPTCP, the sender of the MP_CAPABLE option <bcp14>SHOULD</bcp14> sign al the | |||
highest version number it supports. In return, in its MP_CAPABLE option , | highest version number it supports. In return, in its MP_CAPABLE option , | |||
the receiver will signal the version number it wishes to use, which MUST | the receiver will signal the version number it wishes to use, which <bcp 14>MUST</bcp14> | |||
be equal to or lower than the version number indicated in the initial | be equal to or lower than the version number indicated in the initial | |||
MP_CAPABLE. | MP_CAPABLE. | |||
There is a caveat though with respect to this version negotiation with | There is a caveat, though, with respect to this version negotiation with | |||
old listeners that only support v0. A listener that supports v0 expects that | old listeners that only support v0. A listener that supports v0 expects that | |||
the MP_CAPABLE option in the SYN-segment includes the initiator's key. I | the MP_CAPABLE option in the SYN segment will include the initiator's | |||
f | key. If, however, | |||
the initiator however already upgraded to v1, it won't include the key i | the initiator already upgraded to v1, it won't include the key in the | |||
n the | SYN segment. Thus, the listener will ignore the MP_CAPABLE of this SYN s | |||
SYN-segment. Thus, the listener will ignore the MP_CAPABLE of this SYN-s | egment | |||
egment | and reply with a SYN/ACK that does not include an MP_CAPABLE. The initia | |||
and reply with a SYN/ACK that does not include an MP_CAPABLE. The initia | tor <bcp14>MAY</bcp14> | |||
tor MAY | choose to immediately fall back to TCP or <bcp14>MAY</bcp14> choose to a | |||
choose to immediately fall back to TCP or MAY choose to attempt a connec | ttempt a connection | |||
tion | ||||
using MPTCP v0 (if the initiator supports v0), in order to discover whet her the | using MPTCP v0 (if the initiator supports v0), in order to discover whet her the | |||
listener supports the earlier version of MPTCP. In general a MPTCP v0 co | listener supports the earlier version of MPTCP. In general, an MPTCP v0 | |||
nnection | connection | |||
is likely to be preferred to a TCP one, however in a particular deployme | will likely be preferred over a TCP connection; however, in a particular | |||
nt scenario | deployment scenario, | |||
it may be known that the listener is unlikely to support MPTCPv0 and so | it may be known that the listener is unlikely to support MPTCP v0 and so | |||
the | the | |||
initiator may prefer not to attempt a v0 connection. An initiator MAY ca | initiator may prefer not to attempt a v0 connection. An initiator <bcp14 | |||
che | >MAY</bcp14> cache | |||
information for a peer about what version of MPTCP it supports if any, a | information for a peer about what version of MPTCP it supports, if any, | |||
nd use | and use | |||
this information for future connection attempts.</t> | this information for future connection attempts.</t> | |||
<t pn="section-3.1-4">The MP_CAPABLE option is of variable length, with | ||||
<t>The MP_CAPABLE option is variable-length, with different fields | different fields | |||
included depending on which packet the option is used on. The full | included, depending on which packet the option is used on. The full | |||
MP_CAPABLE option is shown in <xref target="tcpm_capable"/>.</t> | MP_CAPABLE option is shown in <xref target="tcpm_capable" format="defaul | |||
t" sectionFormat="of" derivedContent="Figure 4"/>.</t> | ||||
<?rfc needLines='10'?> | <t pn="section-3.1-5">The MP_CAPABLE option is carried on the SYN, SYN/A | |||
<figure align="center" anchor="tcpm_capable" title="Multipath Capable (M | CK, and ACK packets that start the first subflow of an MPTCP connection, as well | |||
P_CAPABLE) Option"> | as the first packet that carries data, if the initiator wishes to send first. T | |||
<artwork align="left"><![CDATA[ | he data carried by each option is as follows, where A = initiator and B = listen | |||
1 2 3 | er. | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | </t> | |||
+---------------+---------------+-------+-------+---------------+ | <ul spacing="normal" bare="false" empty="false" pn="section-3.1-6"> | |||
| Kind | Length |Subtype|Version|A|B|C|D|E|F|G|H| | <li pn="section-3.1-6.1">SYN (A->B): only the first 4 octets (Lengt | |||
+---------------+---------------+-------+-------+---------------+ | h = 4).</li> | |||
| Option Sender's Key (64 bits) | | <li pn="section-3.1-6.2">SYN/ACK (B->A): B's key for this connectio | |||
| (if option Length > 4) | | n (Length = 12).</li> | |||
| | | <li pn="section-3.1-6.3">ACK (no data) (A->B): A's key followed by | |||
+---------------------------------------------------------------+ | B's key (Length = 20).</li> | |||
| Option Receiver's Key (64 bits) | | <li pn="section-3.1-6.4">ACK (with first data) (A->B): A's key foll | |||
| (if option Length > 12) | | owed by B's key followed by Data-Level Length, and optional Checksum (Length = 2 | |||
| | | 2 or 24).</li> | |||
+-------------------------------+-------------------------------+ | </ul> | |||
| Data-Level Length (16 bits) | Checksum (16 bits, optional) | | <t pn="section-3.1-7"> | |||
+-------------------------------+-------------------------------+ | The contents of the option are determined by the SYN and ACK flags of th | |||
]]></artwork> | e packet, along with the option's Length field. In <xref target="tcpm_capable" f | |||
</figure> | ormat="default" sectionFormat="of" derivedContent="Figure 4"/>, "Sender" and "Re | |||
ceiver" refer to the sender or receiver of the TCP packet (which can be either h | ||||
<t>The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK packets | ost).</t> | |||
that start the first subflow of an MPTCP connection, as well as the first packe | <t pn="section-3.1-8">The initial SYN, containing just the MP_CAPABLE he | |||
t that carries data, if the initiator wishes to send first. The data carried by | ader, is used | |||
each option is as follows, where A = initiator and B = listener. | to define the version of MPTCP being requested and also to exchange | |||
<list style="symbols"> | flags to negotiate connection features, as described later.</t> | |||
<t>SYN (A->B): only the first four octets (Length = 4).</t> | <t pn="section-3.1-9">This option is used to declare the 64-bit keys tha | |||
<t>SYN/ACK (B->A): B's Key for this connection (Length = 12).</t> | t the end hosts | |||
<t>ACK (no data) (A->B): A's Key followed by B's Key (Length = 20 | have generated for this MPTCP connection. These keys are used to | |||
).</t> | authenticate the addition of future subflows to this connection. This | |||
<t>ACK (with first data) (A->B): A's Key followed by B's Key foll | is the only time the key will be sent in the clear on the wire (unless " | |||
owed by Data-Level Length, and optional Checksum (Length = 22 or 24).</t> | Fast Close" (<xref target="sec_fastclose" format="default" sectionFormat="of" de | |||
</list> | rivedContent="Section 3.5"/>) is used); all future subflows will identify the co | |||
The contents of the option is determined by the SYN and ACK flags of the | nnection using a 32-bit "token". This token is a cryptographic hash of this key. | |||
packet, along with the option's length field. For the diagram shown in <xref ta | The algorithm for this process is dependent on the authentication algorithm sel | |||
rget="tcpm_capable"/>, "sender" and "receiver" refer to the sender or receiver o | ected; the method of selection is defined later in this section.</t> | |||
f the TCP packet (which can be either host).</t> | <t pn="section-3.1-10">Upon reception of the initial SYN segment, a stat | |||
eful server generates a random key and replies with a SYN/ACK. The key's method | ||||
<t>The initial SYN, containing just the MP_CAPABLE header, is used | of generation is implementation specific. The key <bcp14>MUST</bcp14> be hard to | |||
to define the version of MPTCP being requested, as well as exchanging | guess, and it <bcp14>MUST</bcp14> be unique for the sending host across all its | |||
flags to negotiate connection features, described later.</t> | current MPTCP connections. Recommendations for generating random numbers for us | |||
e in keys are given in <xref target="RFC4086" format="default" sectionFormat="of | ||||
<t>This option is used to declare the 64-bit keys that the end hosts hav | " derivedContent="RFC4086"/>. Connections will be indexed at each host by the to | |||
e generated for this MPTCP connection. These keys are used to authenticate the a | ken (a one-way hash of the key). Therefore, an implementation will require a map | |||
ddition of future subflows to this connection. This is the only time the key wil | ping from each token to the corresponding connection, and in turn to the keys fo | |||
l be sent in clear on the wire (unless "fast close", <xref target="sec_fastclose | r the connection.</t> | |||
"/>, is used); all future subflows will identify the connection using a 32-bit " | <t pn="section-3.1-11">There is a risk that two different keys will hash | |||
token". This token is a cryptographic hash of this key. The algorithm for this p | to the same | |||
rocess is dependent on the authentication algorithm selected; the method of sele | token. The risk of hash collisions is usually small, unless the host | |||
ction is defined later in this section.</t> | is handling many tens of thousands of connections. Therefore, an | |||
implementation <bcp14>SHOULD</bcp14> check its list of connection | ||||
<t>Upon reception of the initial SYN-segment, a stateful server generate | tokens to ensure that there is no collision before sending its key, | |||
s a random key and replies with a SYN/ACK. The key's method of generation is imp | and if there is, then it should generate a new key. This would, | |||
lementation specific. The key MUST be hard to guess, and it MUST be unique for t | however, be costly for a server with thousands of connections. The | |||
he sending host across all its current MPTCP connections. Recommendations for ge | subflow handshake mechanism (<xref target="sec_join" format="default" se | |||
nerating random numbers for use in keys are given in <xref target="RFC4086"/>. C | ctionFormat="of" derivedContent="Section 3.2"/>) will ensure that new subflows o | |||
onnections will be indexed at each host by the token (a one-way hash of the key) | nly join the | |||
. Therefore, an implementation will require a mapping from each token to the cor | correct connection, however, through the cryptographic handshake, as | |||
responding connection, and in turn to the keys for the connection.</t> | well as checking the connection tokens in both directions, and | |||
ensuring that sequence numbers are in-window. So, in the worst case, if | ||||
<t>There is a risk that two different keys will hash to the same token. | there was a token collision, the new subflow would not succeed, but the MPTCP co | |||
The risk of hash collisions is usually small, unless the host is handling many t | nnection would continue to provide a regular TCP service.</t> | |||
ens of thousands of connections. Therefore, an implementation SHOULD check its l | <t pn="section-3.1-12">Since key generation is implementation specific, | |||
ist of connection tokens to ensure there is no collision before sending its key, | there is no | |||
and if there is, then it should generate a new key. This would, however, be cos | requirement that they simply be random numbers. An implementation is | |||
tly for a server with thousands of connections. The subflow handshake mechanism | free to exchange cryptographic material out of band and generate these | |||
(<xref target="sec_join"/>) will ensure that new subflows only join the correct | keys from this material, in order to provide additional mechanisms by wh | |||
connection, however, through the cryptographic handshake, as well as checking th | ich to verify the identity of the communicating entities. For example, an implem | |||
e connection tokens in both directions, and ensuring sequence numbers are in-win | entation could choose to link its MPTCP keys to those used in higher-layer TLS o | |||
dow. So in the worst case if there was a token collision, the new subflow would | r SSH connections.</t> | |||
not succeed, but the MPTCP connection would continue to provide a regular TCP se | <t pn="section-3.1-13">If the server behaves in a | |||
rvice.</t> | ||||
<t>Since key generation is implementation-specific, there is no r | ||||
equirement that they be simply random numbers. An implementation is free to exch | ||||
ange cryptographic material out-of-band and generate these keys from this, in or | ||||
der to provide additional mechanisms by which to verify the identity of the comm | ||||
unicating entities. For example, an implementation could choose to link its MPTC | ||||
P keys to those used in higher-layer TLS or SSH connections.</t> | ||||
<t>If the server behaves in a | ||||
stateless manner, it has to generate its own key in a verifiable | stateless manner, it has to generate its own key in a verifiable | |||
fashion. This verifiable way of generating the key can be done by | fashion. This verifiable way of generating the key can be done by | |||
using a hash of the 4-tuple, sequence number and a local secret | using a hash of the 4-tuple, sequence number, and a local secret | |||
(similar to what is done for the TCP-sequence number <xref target="RFC49 | (similar to what is done for the TCP sequence number <xref target="RFC49 | |||
87"/>). | 87" format="default" sectionFormat="of" derivedContent="RFC4987"/>). | |||
It will thus be able to verify whether it is indeed the originator of | It will thus be able to verify whether it is indeed the originator of | |||
the key echoed back in the later MP_CAPABLE option. | the key echoed back in the subsequent MP_CAPABLE option. | |||
As for a stateful server, the tokens SHOULD be checked for uniqueness, h | As for a stateful server, the tokens <bcp14>SHOULD</bcp14> be checked fo | |||
owever | r uniqueness; however, | |||
if uniqueness is not met, and there is no way to generate an alternative | if uniqueness is not met and there is no way to generate an alternative | |||
verifiable | verifiable | |||
key, then the connection MUST fall back to using regular TCP by not send | key, then the connection <bcp14>MUST</bcp14> fall back to using regular | |||
ing a | TCP by not sending an | |||
MP_CAPABLE in the SYN/ACK.</t> | MP_CAPABLE in the SYN/ACK.</t> | |||
<t pn="section-3.1-14">The ACK carries both A's key and B's key. This is | ||||
<t>The ACK carries both A's key and B's key. This is the first time that | the first time that A's key is seen on the wire, although it is expected that A | |||
A's key is seen on the wire, although it is expected that A will have generated | will have generated a key locally before the initial SYN. The echoing of B's ke | |||
a key locally before the initial SYN. The echoing of B's key allows B to operat | y allows B to operate statelessly, as described above. Therefore, A's key must b | |||
e statelessly, as described above. Therefore, A's key must be delivered reliably | e delivered reliably to B, and in order to do this, the transmission of this pac | |||
to B, and in order to do this, the transmission of this packet must be made rel | ket must be made reliable.</t> | |||
iable.</t> | <t pn="section-3.1-15">If B has data to send first, then the reliable de | |||
livery of the | ||||
<t>If B has data to send first, then the reliable delivery of the ACK+MP | ACK + MP_CAPABLE is ensured by the receipt of this data with an | |||
_CAPABLE can be inferred by the receipt of this data with a MPTCP Data Sequence | MPTCP Data Sequence Signal (DSS) option (<xref target="sec_generalop" fo | |||
Signal (DSS) option (<xref target="sec_generalop"/>). If, however, A wishes to s | rmat="default" sectionFormat="of" derivedContent="Section 3.3"/>) containing a D | |||
end data first, it has two options to ensure the reliable delivery of the ACK+MP | ATA_ACK for the MP_CAPABLE (which is | |||
_CAPABLE. If it immediately has data to send, then the third ACK (with data) wou | the first octet of the data sequence space). If, however, A wishes to sen | |||
ld also contain an MP_CAPABLE option with additional data parameters (the Data-L | d data first, it has | |||
evel Length and optional Checksum as shown in <xref target="tcpm_capable"/>). If | two options to ensure the reliable delivery of the ACK + MP_CAPABLE. If | |||
A does not immediately have data to send, it MUST include the MP_CAPABLE on the | it immediately has data to send, then the first ACK (with data) would | |||
third ACK, but without the additional data parameters. When A does have data to | also contain an MP_CAPABLE option with additional data parameters (the | |||
send, it must repeat the sending of the MP_CAPABLE option from the third ACK, w | Data-Level Length and optional Checksum as shown in <xref target="tcpm_c | |||
ith additional data parameters. This MP_CAPABLE option is in place of the DSS, a | apable" format="default" sectionFormat="of" derivedContent="Figure 4"/>). If A d | |||
nd simply specifies the data-level length of the payload, and the checksum (if t | oes not immediately | |||
he use of checksums is negotiated). This is the minimal data required to establi | have data to send, it <bcp14>MUST</bcp14> include the MP_CAPABLE on | |||
sh a MPTCP connection - it allows validation of the payload, and given it is the | the first ACK, but without the additional data parameters. When A does | |||
first data, the Initial Data Sequence Number (IDSN) is also known (as it is gen | have data to send, it must repeat the sending of the MP_CAPABLE option | |||
erated from the key, as described below). Conveying the keys on the first data p | from the first ACK, with additional data parameters. This MP_CAPABLE | |||
acket allows the TCP reliability mechanisms to ensure the packet is successfully | option is used in place of the DSS and simply specifies (1) the Data-Lev | |||
delivered. The receiver will acknowledge this data at the connection level with | el | |||
a Data ACK, as if a DSS option has been received.</t> | Length of the payload and (2) the checksum (if the use of checksums is | |||
negotiated). This is the minimal data required to establish an MPTCP | ||||
<t>There could be situations where both A and B attempt to transmit init | connection -- it allows validation of the payload, and given that it is | |||
ial data at the same time. For example, if A did not initially have data to send | the | |||
, but then needed to transmit data before it had received anything from B, it wo | first data, the Initial Data Sequence Number (IDSN) is also known (as | |||
uld use a MP_CAPABLE option with data parameters (since it would not know if the | it is generated from the key, as described below). Conveying the keys | |||
MP_CAPABLE on the ACK was received). In such a situation, B may also have trans | on the first data packet allows the TCP reliability mechanisms to | |||
mitted data with a DSS option, but it had not yet been received at A. Therefore, | ensure that the packet is successfully delivered. The receiver will ackn | |||
B has received data with a MP_CAPABLE mapping after it has sent data with a DSS | owledge this data at the connection level with a Data ACK, as if a DSS option ha | |||
option. To ensure these situations can be handled, it follows that the data par | s been received.</t> | |||
ameters in a MP_CAPABLE are semantically equivalent to those in a DSS option and | <t pn="section-3.1-16">There could be situations where both A and B atte | |||
can be used interchangeably. Similar situations could occur when the MP_CAPABLE | mpt to transmit | |||
with data is lost and retransmitted. Furthermore, in the case of TCP Segmentati | initial data at the same time. For example, if A did not initially | |||
on Offloading, the MP_CAPABLE with data parameters may be duplicated across mult | have data to send but then needed to transmit data before it had | |||
iple packets, and implementations must also be able to cope with duplicate MP_CA | received anything from B, it would use an MP_CAPABLE option with data | |||
PABLE mappings as well as duplicate DSS mappings.</t> | parameters (since it would not know if the MP_CAPABLE on the ACK was | |||
received). In such a situation, B may also have transmitted data with | ||||
<t>Additionally, the MP_CAPABLE exchange allows the safe passage of MPTC | a DSS option, but it had not yet been received at A. Therefore, B has | |||
P options on SYN packets to be determined. If any of these options are dropped, | received data with an MP_CAPABLE mapping after it has sent data with a | |||
MPTCP will gracefully fall back to regular single-path TCP, as documented in <xr | DSS option. To ensure that these situations can be handled, it follows t | |||
ef target="sec_fallback"/>. If at any point in the handshake either party think | hat the data parameters in an MP_CAPABLE are semantically equivalent to those in | |||
s the MPTCP negotiation is compromised, for example by a middlebox corrupting th | a DSS option and can be used interchangeably. Similar situations could occur wh | |||
e TCP options, or unexpected ACK numbers being present, the host MUST stop using | en the MP_CAPABLE with data is lost and retransmitted. Furthermore, in the case | |||
MPTCP and no longer include MPTCP options in future TCP packets. The other host | of TCP segmentation offloading, the MP_CAPABLE with data parameters may be dupli | |||
will then also fall back to regular TCP using the fall back mechanism. Note th | cated across multiple packets, and implementations must also be able to cope wit | |||
at new subflows MUST NOT be established (using the process documented in <xref t | h duplicate MP_CAPABLE mappings as well as duplicate DSS mappings.</t> | |||
arget="sec_join"/>) until a Data Sequence Signal (DSS) option has been successfu | <t pn="section-3.1-17">Additionally, the MP_CAPABLE exchange allows the | |||
lly received across the path (as documented in <xref target="sec_generalop"/>).< | safe passage of | |||
/t> | MPTCP options on SYN packets to be determined. If any of these options | |||
are dropped, MPTCP will gracefully fall back to regular single-path | ||||
<t>Like all MPTCP options, the MP_CAPABLE option starts with the Kind an | TCP, as documented in <xref target="sec_fallback" format="default" secti | |||
d Length to specify the TCP-option kind and its length. Followed by that is the | onFormat="of" derivedContent="Section 3.7"/>. | |||
MP_CAPABLE option. The first 4 bits of the first octet in the MP_CAPABLE option | If at any point in the handshake either party thinks the MPTCP | |||
(<xref target="tcpm_capable"/>) define the MPTCP option subtype (see <xref targe | negotiation is compromised -- for example, by a middlebox corrupting | |||
t="IANA"/>; for MP_CAPABLE, this is 0x0), and the remaining 4 bits of this octet | the TCP options or by unexpected ACK numbers being present -- the host < | |||
specify the MPTCP version in use (for this specification, this is 1).</t> | bcp14>MUST</bcp14> stop using MPTCP and no longer include MPTCP options in futur | |||
e TCP packets. The other host will then also fall back to regular TCP using the | ||||
<t>The second octet is reserved for flags, allocated as follows: | fallback mechanism. Note that new subflows <bcp14>MUST NOT</bcp14> be establish | |||
ed (using the process documented in <xref target="sec_join" format="default" sec | ||||
<list style="hanging"> | tionFormat="of" derivedContent="Section 3.2"/>) until a DSS option has been succ | |||
<t hangText="A:"> The leftmost bit, labeled "A", SHOULD be set to 1 to | essfully received across the path (as documented in <xref target="sec_generalop" | |||
indicate "Checksum Required", unless the system administrator has decided that | format="default" sectionFormat="of" derivedContent="Section 3.3"/>).</t> | |||
checksums are not required (for example, if the environment is controlled and no | <t pn="section-3.1-18">Like all MPTCP options, the MP_CAPABLE option sta | |||
middleboxes exist that might adjust the payload).</t> | rts with the Kind | |||
<t hangText="B:"> The second bit, labeled "B", is an extensibility fla | and Length to specify the TCP option's kind and length. This | |||
g, and MUST be set to 0 for current implementations. This will be used for an ex | information is followed by the MP_CAPABLE option. The first 4 bits of | |||
tensibility mechanism in a future specification, and the impact of this flag wil | the first octet in the MP_CAPABLE option (<xref target="tcpm_capable" fo | |||
l be defined at a later date. It is expected, but not mandated, that this flag w | rmat="default" sectionFormat="of" derivedContent="Figure 4"/>) define the MPTCP | |||
ould be used as part of an alternative security mechanism that does not require | Option Subtype (see <xref target="IANA" format="default" sectionFormat="of" deri | |||
a full version upgrade of the protocol, but does require redefining some element | vedContent="Section 7"/>; for MP_CAPABLE, this value is | |||
s of the handshake. If receiving a message with the 'B' flag set to 1, and this | 0x0), and the remaining 4 bits of this octet specify the MPTCP | |||
is not understood, then the MP_CAPABLE in this SYN MUST be silently ignored, whi | version in use (for this specification, this value is 1).</t> | |||
ch triggers a fallback to regular TCP; the sender is expected to retry with a fo | <t pn="section-3.1-19">The second octet is reserved for flags, allocated | |||
rmat compatible with this legacy specification. Note that the length of the MP_C | as follows: | |||
APABLE option, and the meanings of bits "D" through "H", may be altered by setti | ||||
ng B=1.</t> | ||||
<t hangText="C:"> The third bit, labeled "C", is set to "1" to indicat | ||||
e that the sender of this option will not accept additional MPTCP subflows to th | ||||
e source address and port, and therefore the receiver MUST NOT try to open any a | ||||
dditional subflows towards this address and port. This is an efficiency improvem | ||||
ent for situations where the sender knows a restriction is in place, for example | ||||
if the sender is behind a strict NAT, or operating behind a legacy Layer 4 load | ||||
balancer.</t> | ||||
<t hangText="D through H:"> The remaining bits, labeled "D" through "H | ||||
", are used for crypto algorithm negotiation. In this specification only the ri | ||||
ghtmost bit, labeled "H", is assigned. Bit "H" indicates the use of HMAC-SHA256 | ||||
(as defined in <xref target="sec_join"/>). An implementation that only support | ||||
s this method MUST set bit "H" to 1, and bits "D" through "G" to 0.</t> | ||||
</list> | ||||
A crypto algorithm MUST be specified. If flag bits D through H are all | ||||
0, the MP_CAPABLE option MUST be treated as invalid and ignored (that is, it mus | ||||
t be treated as a regular TCP handshake).</t> | ||||
<t>The selection of the authentication algorithm also impacts the algori | ||||
thm used to generate the token and the Initial Data Sequence Number (IDSN). In t | ||||
his specification, with only the SHA-256 algorithm (bit "H") specified and selec | ||||
ted, the token MUST be a truncated (most significant 32 bits) SHA-256 hash (<xre | ||||
f target="RFC6234"/>) of the key. A different, 64-bit truncation (the least sign | ||||
ificant 64 bits) of the SHA-256 hash of the key MUST be used as the IDSN. Note t | ||||
hat the key MUST be hashed in network byte order. Also note that the "least sign | ||||
ificant" bits MUST be the rightmost bits of the SHA-256 digest, as per <xref tar | ||||
get="RFC6234"/>. Future specifications of the use of the crypto bits may choose | ||||
to specify different algorithms for token and IDSN generation.</t> | ||||
<t>Both the crypto and checksum bits negotiate capabilities in similar w | ||||
ays. For the Checksum Required bit (labeled "A"), if either host requires the us | ||||
e of checksums, checksums MUST be used. In other words, the only way for checksu | ||||
ms not to be used is if both hosts in their SYNs set A=0. This decision is confi | ||||
rmed by the setting of the "A" bit in the third packet (the ACK) of the handshak | ||||
e. For example, if the initiator sets A=0 in the SYN, but the responder sets A=1 | ||||
in the SYN/ACK, checksums MUST be used in both directions, and the initiator wi | ||||
ll set A=1 in the ACK. The decision whether to use checksums will be stored by a | ||||
n implementation in a per-connection binary state variable. If A=1 is received b | ||||
y a host that does not want to use checksums, it MUST fall back to regular TCP b | ||||
y ignoring the MP_CAPABLE option as if it was invalid.</t> | ||||
<t>For crypto negotiation, the responder has the choice. The initiator c | ||||
reates a proposal setting a bit for each algorithm it supports to 1 (in this ver | ||||
sion of the specification, there is only one proposal, so bit "H" will be always | ||||
set to 1). The responder responds with only 1 bit set -- this is the chosen alg | ||||
orithm. The rationale for this behavior is that the responder will typically be | ||||
a server with potentially many thousands of connections, so it may wish to choos | ||||
e an algorithm with minimal computational complexity, depending on the load. If | ||||
a responder does not support (or does not want to support) any of the initiator' | ||||
s proposals, it MUST respond without an MP_CAPABLE option, thus forcing a fallba | ||||
ck to regular TCP.</t> | ||||
<t>The MP_CAPABLE option is only used in the first subflow of a connecti | </t> | |||
on, in order to identify the connection; all following subflows will use the "Jo | <dl newline="false" spacing="normal" indent="14" pn="section-3.1-20"> | |||
in" option (see <xref target="sec_join"/>) to join the existing connection.</t> | <dt pn="section-3.1-20.1">A:</dt> | |||
<t>If a SYN contains an MP_CAPABLE option but the | <dd pn="section-3.1-20.2"> The leftmost bit, labeled "A", <bcp14>SHOUL | |||
SYN/ACK does not, it is assumed that sender of the SYN/ACK is not | D</bcp14> be set to 1 to indicate "Checksum required", unless the system adminis | |||
multipath capable; thus, the MPTCP session MUST operate as | trator has decided that checksums are not required (for example, if the environm | |||
a regular, single-path TCP. If a SYN does not contain a | ent is controlled and no middleboxes exist that might adjust the payload).</dd> | |||
MP_CAPABLE option, the SYN/ACK MUST NOT contain one | <dt pn="section-3.1-20.3">B:</dt> | |||
<dd pn="section-3.1-20.4"> The second bit, labeled "B", is an extensib | ||||
ility flag. It | ||||
<bcp14>MUST</bcp14> be set to 0 for current implementations. This | ||||
flag will be used for an extensibility mechanism in a future specifica | ||||
tion, and the impact of this flag will be defined at a later date. It is expecte | ||||
d, but not mandated, that this flag would be used as part of an alternative secu | ||||
rity mechanism that does not require a full version upgrade of the protocol but | ||||
does require redefining some elements of the handshake. If receiving a message w | ||||
ith the "B" flag set to 1 and this is not understood, then the MP_CAPABLE in thi | ||||
s SYN <bcp14>MUST</bcp14> be silently ignored, which triggers a fallback to regu | ||||
lar TCP; the sender is expected to retry with a format compatible with this lega | ||||
cy specification. Note that the length of the MP_CAPABLE option, and the meaning | ||||
s of bits "D" through "H", may be altered by setting B=1.</dd> | ||||
<dt pn="section-3.1-20.5">C:</dt> | ||||
<dd pn="section-3.1-20.6"> The third bit, labeled "C", is set to 1 to | ||||
indicate that the | ||||
sender of this option will not accept additional MPTCP subflows to | ||||
the source address and port, and therefore the receiver <bcp14>MUST NO | ||||
T</bcp14> try to open any additional subflows toward this address | ||||
and port. This improves efficiency in situations where the | ||||
sender knows a restriction is in place -- for example, if the sender i | ||||
s behind a strict NAT or operating behind a legacy Layer 4 load balancer.</dd> | ||||
<dt pn="section-3.1-20.7">D through H:</dt> | ||||
<dd pn="section-3.1-20.8"> The remaining bits, labeled "D" through "H" | ||||
, are used for | ||||
crypto algorithm negotiation. In this specification, only the | ||||
rightmost bit, labeled "H", is assigned. Bit "H" indicates the use | ||||
of HMAC-SHA256 (as defined in <xref target="sec_join" format="default" | ||||
sectionFormat="of" derivedContent="Section 3.2"/>). An implementation that onl | ||||
y supports this | ||||
method <bcp14>MUST</bcp14> set bit "H" to 1 and bits "D" | ||||
through "G" to 0.</dd> | ||||
</dl> | ||||
<t pn="section-3.1-21">A crypto algorithm <bcp14>MUST</bcp14> be specifi | ||||
ed. If flag bits "D" through "H" are all 0, the MP_CAPABLE option <bcp14>MUST</ | ||||
bcp14> be treated as invalid and ignored (that is, it must be treated as a regul | ||||
ar TCP handshake).</t> | ||||
<t pn="section-3.1-22">The selection of the authentication algorithm als | ||||
o impacts the algorithm used to generate the token and the IDSN. In this specifi | ||||
cation, with only the SHA-256 algorithm (bit "H") specified and selected, the to | ||||
ken <bcp14>MUST</bcp14> be a truncated (most significant 32 bits) SHA-256 hash < | ||||
xref target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC623 | ||||
4"/> of the key. A different, 64-bit truncation (the least significant 64 bits) | ||||
of the SHA-256 hash of the key <bcp14>MUST</bcp14> be used as the IDSN. Note tha | ||||
t the key <bcp14>MUST</bcp14> be hashed in network byte order. Also note that th | ||||
e "least significant" bits <bcp14>MUST</bcp14> be the rightmost bits of the SHA- | ||||
256 digest, as per <xref target="RFC6234" format="default" sectionFormat="of" de | ||||
rivedContent="RFC6234"/>. Future specifications of the use of the crypto bits ma | ||||
y choose to specify different algorithms for token and IDSN generation.</t> | ||||
<t pn="section-3.1-23">Both the crypto and checksum bits negotiate capab | ||||
ilities in similar | ||||
ways. For the "Checksum required" bit (labeled "A"), if either host | ||||
requires the use of checksums, checksums <bcp14>MUST</bcp14> be | ||||
used. In other words, the only way for checksums not to be used is if | ||||
both hosts in their SYNs set A=0. This decision is confirmed by the | ||||
setting of the "A" bit in the third packet (the ACK) of the | ||||
handshake. For example, if the initiator sets A=0 in the SYN but the | ||||
responder sets A=1 in the SYN/ACK, checksums <bcp14>MUST</bcp14> be | ||||
used in both directions, and the initiator will set A=1 in the | ||||
ACK. The decision regarding whether to use checksums will be stored by a | ||||
n implementation in a per-connection binary state variable. If A=1 is received b | ||||
y a host that does not want to use checksums, it <bcp14>MUST</bcp14> fall back t | ||||
o regular TCP by ignoring the MP_CAPABLE option as if it was invalid.</t> | ||||
<t pn="section-3.1-24">For crypto negotiation, the responder has the cho | ||||
ice. The initiator | ||||
creates a proposal setting a bit for each algorithm it supports to 1 | ||||
(in this version of the specification, there is only one proposal, so | ||||
bit "H" will always be set to 1). The responder responds with only 1 bit | ||||
set -- this is the chosen algorithm. The rationale for this behavior is that th | ||||
e responder will typically be a server with potentially many thousands of connec | ||||
tions, so it may wish to choose an algorithm with minimal computational complexi | ||||
ty, depending on the load. If a responder does not support (or does not want to | ||||
support) any of the initiator's proposals, it <bcp14>MUST</bcp14> respond withou | ||||
t an MP_CAPABLE option, thus forcing a fallback to regular TCP.</t> | ||||
<t pn="section-3.1-25">The MP_CAPABLE option is only used in the first s | ||||
ubflow of a | ||||
connection, in order to identify the connection; all subsequent | ||||
subflows will use the MP_JOIN option (see <xref target="sec_join" format | ||||
="default" sectionFormat="of" derivedContent="Section 3.2"/>) to join the existi | ||||
ng connection.</t> | ||||
<t pn="section-3.1-26">If a SYN contains an MP_CAPABLE option but the | ||||
SYN/ACK does not, it is assumed that the sender of the SYN/ACK is not | ||||
multipath capable; thus, the MPTCP session <bcp14>MUST</bcp14> operate a | ||||
s | ||||
a regular, single-path TCP session. If a SYN does not contain an | ||||
MP_CAPABLE option, the SYN/ACK <bcp14>MUST NOT</bcp14> contain one | ||||
in response. If the third packet (the ACK) does not contain | in response. If the third packet (the ACK) does not contain | |||
the MP_CAPABLE option, then the session MUST fall back to | the MP_CAPABLE option, then the session <bcp14>MUST</bcp14> fall back to | |||
operating as a regular, single-path TCP. This is to maintain | operating as a regular, single-path TCP session. This is done to maintai | |||
n | ||||
compatibility with middleboxes on the path that drop some | compatibility with middleboxes on the path that drop some | |||
or all TCP options. Note that an implementation MAY choose | or all TCP options. Note that an implementation <bcp14>MAY</bcp14> choos e | |||
to attempt sending MPTCP options more than one time before | to attempt sending MPTCP options more than one time before | |||
making this decision to operate as regular TCP (see | making this decision to operate as regular TCP (see | |||
<xref target="heuristics"/>).</t> | <xref target="heuristics" format="default" sectionFormat="of" derivedCon | |||
tent="Section 3.9"/>).</t> | ||||
<t>If the SYN packets are unacknowledged, it is up to local | <t pn="section-3.1-27">If the SYN packets are unacknowledged, it is up t | |||
o local | ||||
policy to decide how to respond. It is expected that a sender | policy to decide how to respond. It is expected that a sender | |||
will eventually fall back to single-path TCP (i.e., without the | will eventually fall back to single-path TCP (i.e., without the | |||
MP_CAPABLE option) in order to work around middleboxes that | MP_CAPABLE option) in order to work around middleboxes that | |||
may drop packets with unknown options; however, the number of | may drop packets with unknown options; however, the number of | |||
multipath-capable attempts that are made first will be up to | multipath-capable attempts that are made first will be up to | |||
local policy. | local policy. | |||
It is possible that MPTCP and non-MPTCP SYNs could get reordered | It is possible that MPTCP and non-MPTCP SYNs could get reordered | |||
in the network. Therefore, the final state is inferred from the | in the network. Therefore, the final state is inferred from the | |||
presence or absence of the MP_CAPABLE option in the third packet | presence or absence of the MP_CAPABLE option in the third packet | |||
of the TCP handshake. If this option is not present, the | of the TCP handshake. If this option is not present, the | |||
connection SHOULD fall back to regular TCP, as documented in | connection <bcp14>SHOULD</bcp14> fall back to regular TCP, as documented | |||
<xref target="sec_fallback"/>.</t> | in | |||
<xref target="sec_fallback" format="default" sectionFormat="of" derivedC | ||||
<t>The initial data sequence number on an MPTCP connection | ontent="Section 3.7"/>.</t> | |||
<t pn="section-3.1-28">The IDSN on an MPTCP connection | ||||
is generated from the key. The algorithm for IDSN generation is | is generated from the key. The algorithm for IDSN generation is | |||
also determined from the negotiated authentication algorithm. | also determined from the negotiated authentication algorithm. | |||
In this specification, with only the SHA-256 algorithm specified and | In this specification, with only the SHA-256 algorithm specified and | |||
selected, the IDSN of a host MUST be the least significant 64 bits of th e | selected, the IDSN of a host <bcp14>MUST</bcp14> be the least significan t 64 bits of the | |||
SHA-256 hash of its key, i.e., IDSN-A = Hash(Key-A) and IDSN-B = Hash(Ke y-B). | SHA-256 hash of its key, i.e., IDSN-A = Hash(Key-A) and IDSN-B = Hash(Ke y-B). | |||
This deterministic generation of the IDSN allows a receiver to ensure | This deterministic generation of the IDSN allows a receiver to ensure | |||
that there are no gaps in sequence space at the start of the connection. | that there are no gaps in sequence space at the start of the connection. | |||
The SYN with MP_CAPABLE occupies the first octet of data sequence space, | The SYN with MP_CAPABLE occupies the first octet of data sequence space, | |||
although this does not need to be acknowledged at the connection level | although this does not need to be acknowledged at the connection level | |||
until the first data is sent (see <xref target="sec_generalop"/>).</t> | until the first data is sent (see <xref target="sec_generalop" format="d efault" sectionFormat="of" derivedContent="Section 3.3"/>).</t> | |||
</section> | </section> | |||
<section anchor="sec_join" numbered="true" toc="include" removeInRFC="fals | ||||
<section title="Starting a New Subflow" anchor="sec_join"> | e" pn="section-3.2"> | |||
<t>Once an MPTCP connection has begun with the MP_CAPABLE | <name slugifiedName="name-starting-a-new-subflow">Starting a New Subflow | |||
</name> | ||||
<t pn="section-3.2-1">Once an MPTCP connection has begun with the MP_CAP | ||||
ABLE | ||||
exchange, further subflows can be added to the connection. | exchange, further subflows can be added to the connection. | |||
Hosts have knowledge of their own address(es), and can | Hosts have knowledge of their own address(es) and can | |||
become aware of the other host's addresses through | become aware of the other host's addresses through | |||
signaling exchanges as described in | signaling exchanges as described in | |||
<xref target="sec_pm"/>. Using this knowledge, a host | <xref target="sec_pm" format="default" sectionFormat="of" derivedContent ="Section 3.4"/>. Using this knowledge, a host | |||
can initiate a new subflow over a currently unused pair of | can initiate a new subflow over a currently unused pair of | |||
addresses. It is permitted for either host in a connection | addresses. It is permissible for either host in a connection | |||
to initiate the creation of a new subflow, but it is expected | to initiate the creation of a new subflow, but it is expected | |||
that this will normally be the original connection initiator | that this will normally be the original connection initiator | |||
(see <xref target="heuristics"/> for heuristics).</t> | (see <xref target="heuristics" format="default" sectionFormat="of" deriv | |||
edContent="Section 3.9"/> for heuristics).</t> | ||||
<t>A new subflow is started as a normal TCP SYN/ACK | <t pn="section-3.2-2">A new subflow is started as a normal TCP SYN/ACK | |||
exchange. The Join Connection (MP_JOIN) MPTCP option | exchange. The Join Connection (MP_JOIN) MPTCP option | |||
is used to identify the connection to be joined by the new subflow. | is used to identify the connection to be joined by the new subflow. | |||
It uses keying material that was exchanged in the initial MP_CAPABLE | It uses keying material that was exchanged in the initial MP_CAPABLE | |||
handshake (<xref target="sec_init"/>), and that handshake also | handshake (<xref target="sec_init" format="default" sectionFormat="of" d erivedContent="Section 3.1"/>), and that handshake also | |||
negotiates the crypto algorithm in use for the MP_JOIN handshake.</t> | negotiates the crypto algorithm in use for the MP_JOIN handshake.</t> | |||
<t pn="section-3.2-3">This section specifies the behavior of MP_JOIN usi | ||||
<t>This section specifies the behavior of MP_JOIN using the HMAC-SHA256 | ng the HMAC-SHA256 | |||
algorithm. An MP_JOIN option is present in the SYN, SYN/ACK, | algorithm. An MP_JOIN option is present in the SYN, SYN/ACK, | |||
and ACK of the three-way handshake, although in each case with a | and ACK of the three-way handshake, although in each case with a | |||
different format.</t> | different format.</t> | |||
<t pn="section-3.2-4">In the first MP_JOIN on the SYN packet, illustrate | ||||
<t>In the first MP_JOIN on the SYN packet, illustrated in | d in | |||
<xref target="tcpm_join"/>, the initiator sends a token, random | <xref target="tcpm_join" format="default" sectionFormat="of" derivedCont | |||
number, and address ID.</t> | ent="Figure 5"/>, the initiator sends a token, random | |||
number, and Address ID.</t> | ||||
<t>The token is used to identify the MPTCP connection and is a | <figure anchor="tcpm_join" align="left" suppress-title="false" pn="figur | |||
e-5"> | ||||
<name slugifiedName="name-join-connection-mp_join-opt">Join Connection | ||||
(MP_JOIN) Option (for Initial SYN)</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.2-5.1"> | ||||
1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+-----+-+---------------+ | ||||
| Kind | Length = 12 |Subtype|(rsv)|B| Address ID | | ||||
+---------------+---------------+-------+-----+-+---------------+ | ||||
| Receiver's Token (32 bits) | | ||||
+---------------------------------------------------------------+ | ||||
| Sender's Random Number (32 bits) | | ||||
+---------------------------------------------------------------+ </artwork> | ||||
</figure> | ||||
<t pn="section-3.2-6">The token is used to identify the MPTCP connection | ||||
and is a | ||||
cryptographic hash of the receiver's key, as exchanged | cryptographic hash of the receiver's key, as exchanged | |||
in the initial MP_CAPABLE handshake (<xref target="sec_init"/>). | in the initial MP_CAPABLE handshake (<xref target="sec_init" format="def ault" sectionFormat="of" derivedContent="Section 3.1"/>). | |||
In this specification, the tokens presented in this | In this specification, the tokens presented in this | |||
option are generated by the SHA-256 <xref target="RFC6234"/> | option are generated by the SHA-256 algorithm <xref target="RFC6234" for | |||
algorithm, truncated to the most significant 32 bits. The token | mat="default" sectionFormat="of" derivedContent="RFC6234"/>, truncated to the mo | |||
st significant 32 bits. The token | ||||
included in the MP_JOIN option is the token that the receiver | included in the MP_JOIN option is the token that the receiver | |||
of the packet uses to identify this connection; i.e., Host A | of the packet uses to identify this connection; i.e., Host A | |||
will send Token-B (which is generated from Key-B). Note that the | will send Token-B (which is generated from Key-B). Note that the | |||
hash generation algorithm can be overridden by the choice of | hash generation algorithm can be overridden by the choice of | |||
cryptographic handshake algorithm, as defined in <xref target="sec_init" | cryptographic handshake algorithm, as defined in <xref target="sec_init" | |||
/>.</t> | format="default" sectionFormat="of" derivedContent="Section 3.1"/>.</t> | |||
<t pn="section-3.2-7">The MP_JOIN SYN sends not only the token (which is | ||||
<t>The MP_JOIN SYN sends not only the token (which is static for a | static for a | |||
connection) but also random numbers (nonces) that are used to prevent | connection) but also random numbers (nonces) that are used to prevent | |||
replay attacks on the authentication method. Recommendations for the | replay attacks on the authentication method. Recommendations for the | |||
generation of random numbers for this purpose are given in <xref target= | generation of random numbers for this purpose are given in <xref target= | |||
"RFC4086"/>.</t> | "RFC4086" format="default" sectionFormat="of" derivedContent="RFC4086"/>.</t> | |||
<t pn="section-3.2-8">The MP_JOIN option includes an "Address ID". This | ||||
<t>The MP_JOIN option includes an "Address ID". This is an identifier | is an identifier | |||
generated by the sender of the option, used to identify the source addre ss | generated by the sender of the option, used to identify the source addre ss | |||
of this packet, even if the IP header has been changed in transit by a m iddlebox. | of this packet, even if the IP header has been changed in transit by a m iddlebox. | |||
The numeric value of this field is generated by the sender and must map uniquely | The numeric value of this field is generated by the sender and must map uniquely | |||
to a source IP address for the sending host. | to a source IP address for the sending host. | |||
The Address ID allows address removal (<xref target="sec_remove_addr"/>) | The Address ID allows address removal (<xref target="sec_remove_addr" fo rmat="default" sectionFormat="of" derivedContent="Section 3.4.2"/>) | |||
without needing to know what the source address at the | without needing to know what the source address at the | |||
receiver is, thus allowing address removal through NATs. | receiver is, thus allowing address removal through NATs. | |||
The Address ID also allows correlation between new subflow setup attempt s | The Address ID also allows correlation between new subflow setup attempt s | |||
and address signaling (<xref target="sec_add_address"/>), | and address signaling (<xref target="sec_add_address" format="default" s ectionFormat="of" derivedContent="Section 3.4.1"/>), | |||
to prevent setting up duplicate subflows on the same path, if an MP_JOIN | to prevent setting up duplicate subflows on the same path, if an MP_JOIN | |||
and ADD_ADDR are sent at the same time.</t> | and ADD_ADDR are sent at the same time.</t> | |||
<t pn="section-3.2-9">The Address IDs of the subflow used in the initial | ||||
<t>The Address IDs of the subflow used in the initial SYN | SYN | |||
exchange of the first subflow in the connection are implicit, | exchange of the first subflow in the connection are implicit | |||
and have the value zero. A host MUST store the mappings between | and have the value zero. A host <bcp14>MUST</bcp14> store the mappings b | |||
etween | ||||
Address IDs and addresses both for itself and the remote host. | Address IDs and addresses both for itself and the remote host. | |||
An implementation will also need to know which local and remote | An implementation will also need to know which local and remote | |||
Address IDs are associated with which established subflows, for | Address IDs are associated with which established subflows, for | |||
when addresses are removed from a local or remote host.</t> | when addresses are removed from a local or remote host.</t> | |||
<t pn="section-3.2-10">The MP_JOIN option on packets with the SYN flag s | ||||
<t>The MP_JOIN option on packets with the SYN flag set also includes 4 b | et also includes | |||
its of flags, 3 of which are currently reserved and MUST be set to zero by the s | 4 bits of flags, 3 of which are currently reserved and | |||
ender. The final bit, labeled "B", indicates whether the sender of this option w | <bcp14>MUST</bcp14> be set to 0 by the sender. The final bit, labeled | |||
ishes this subflow to be used as a backup path (B=1) in the event of failure of | "B", indicates whether the sender of this option (1) wishes this | |||
other paths, or whether it wants it to be used as part of the connection immedia | subflow to be used as a backup path (B=1) in the event of failure of | |||
tely. By setting B=1, the sender of the option is requesting the other host to o | other paths or (2) wants the subflow to be used as part of the | |||
nly send data on this subflow if there are no available subflows where B=0. Subf | connection immediately. By setting B=1, the sender of the option is | |||
low policy is discussed in more detail in <xref target="sec_policy"/>.</t> | requesting that the other host only send data on this subflow if there | |||
are no available subflows where B=0. Subflow policy is discussed in more | ||||
<?rfc needLines='10'?> | detail in <xref target="sec_policy" format="default" sectionFormat="of" derived | |||
<figure align="center" anchor="tcpm_join" title="Join Connection (MP_JOI | Content="Section 3.3.8"/>.</t> | |||
N) Option (for Initial SYN)"> | <t pn="section-3.2-11">When receiving a SYN with an MP_JOIN option that | |||
<artwork align="left"><![CDATA[ | contains | |||
1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+-----+-+---------------+ | ||||
| Kind | Length = 12 |Subtype|(rsv)|B| Address ID | | ||||
+---------------+---------------+-------+-----+-+---------------+ | ||||
| Receiver's Token (32 bits) | | ||||
+---------------------------------------------------------------+ | ||||
| Sender's Random Number (32 bits) | | ||||
+---------------------------------------------------------------+ | ||||
]]></artwork> | ||||
</figure> | ||||
<t>When receiving a SYN with an MP_JOIN option that contains | ||||
a valid token for an existing MPTCP connection, the recipient | a valid token for an existing MPTCP connection, the recipient | |||
SHOULD respond with a SYN/ACK also containing an MP_JOIN | <bcp14>SHOULD</bcp14> respond with a SYN/ACK also containing an MP_JOIN | |||
option containing a random number and a truncated (leftmost 64 | option containing a random number and a truncated (leftmost 64 bits) HMA | |||
bits) Hash-based Message Authentication Code (HMAC). This | C. This | |||
version of the option is shown in <xref target="tcpm_join2"/>. | version of the option is shown in <xref target="tcpm_join2" format="defa | |||
If the token is unknown, or the host wants to refuse subflow | ult" sectionFormat="of" derivedContent="Figure 6"/>. If the token is unknown or | |||
the host wants to refuse subflow | ||||
establishment (for example, due to a limit on the number of | establishment (for example, due to a limit on the number of | |||
subflows it will permit), the receiver will send back a reset | subflows it will permit), the receiver will send back a reset | |||
(RST) signal, analogous to an unknown port in TCP, containing a | (RST) signal, analogous to an unknown port in TCP, containing an | |||
MP_TCPRST option (<xref target="sec_reset"/>) with a "MPTCP | MP_TCPRST option (<xref target="sec_reset" format="default" sectionForma | |||
t="of" derivedContent="Section 3.6"/>) with an "MPTCP | ||||
specific error" reason code. Although calculating an HMAC | specific error" reason code. Although calculating an HMAC | |||
requires cryptographic operations, it is believed that the | requires cryptographic operations, it is believed that the | |||
32-bit token in the MP_JOIN SYN gives sufficient protection against blin d state | 32-bit token in the MP_JOIN SYN gives sufficient protection against blin d state | |||
exhaustion attacks; therefore, there is no need to provide | exhaustion attacks; therefore, there is no need to provide | |||
mechanisms to allow a responder to operate statelessly at the | mechanisms to allow a responder to operate statelessly at the | |||
MP_JOIN stage.</t> | MP_JOIN stage.</t> | |||
<figure anchor="tcpm_join2" align="left" suppress-title="false" pn="figu | ||||
<t>An HMAC is sent by both hosts -- by the initiator (Host A) | re-6"> | |||
<name slugifiedName="name-join-connection-mp_join-opti">Join Connectio | ||||
n (MP_JOIN) Option (for Responding SYN/ACK)</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.2-12.1"> | ||||
1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+-----+-+---------------+ | ||||
| Kind | Length = 16 |Subtype|(rsv)|B| Address ID | | ||||
+---------------+---------------+-------+-----+-+---------------+ | ||||
| | | ||||
| Sender's Truncated HMAC (64 bits) | | ||||
| | | ||||
+---------------------------------------------------------------+ | ||||
| Sender's Random Number (32 bits) | | ||||
+---------------------------------------------------------------+ </artwork> | ||||
</figure> | ||||
<t pn="section-3.2-13">An HMAC is sent by both hosts -- by the initiator | ||||
(Host A) | ||||
in the third packet (the ACK) and by the responder (Host B) in | in the third packet (the ACK) and by the responder (Host B) in | |||
the second packet (the SYN/ACK). Doing the HMAC exchange at this | the second packet (the SYN/ACK). Doing the HMAC exchange at this | |||
stage allows both hosts to have first exchanged random data (in the | stage allows both hosts to have first exchanged random data (in the | |||
first two SYN packets) that is used as the "message". This | first two SYN packets) that is used as the "message". This | |||
specification defines that HMAC as defined in <xref target="RFC2104"/> | specification defines that HMAC as defined in <xref target="RFC2104" for | |||
is used, along with the SHA-256 hash algorithm <xref target="RFC6234"/>, | mat="default" sectionFormat="of" derivedContent="RFC2104"/> | |||
is used, along with the SHA-256 hash algorithm <xref target="RFC6234" fo | ||||
rmat="default" sectionFormat="of" derivedContent="RFC6234"/>, | ||||
and that the output is truncated to the leftmost 160 bits (20 octets). | and that the output is truncated to the leftmost 160 bits (20 octets). | |||
Due to option space limitations, the HMAC included in | Due to option space limitations, the HMAC included in | |||
the SYN/ACK is truncated to the leftmost 64 bits, but this is | the SYN/ACK is truncated to the leftmost 64 bits, but this is | |||
acceptable since random numbers are used; thus, an attacker | acceptable, since random numbers are used; thus, an attacker | |||
only has one chance to correctly guess the HMAC that matches the random | only has one chance to correctly guess the HMAC that matches the random | |||
number previously sent by the peer (if the HMAC is | number previously sent by the peer (if the HMAC is | |||
incorrect, the TCP connection is closed, so a new MP_JOIN negotiation | incorrect, the TCP connection is closed, so a new MP_JOIN negotiation | |||
with a new random number is required).</t> | with a new random number is required).</t> | |||
<t pn="section-3.2-14">The initiator's authentication information is sen | ||||
<t>The initiator's authentication information is sent in its | t in its | |||
first ACK (the third packet of the handshake), as shown in | first ACK (the third packet of the handshake), as shown in | |||
<xref target="tcpm_join3"/>. This data needs to be sent reliably, | <xref target="tcpm_join3" format="default" sectionFormat="of" derivedCon tent="Figure 7"/>. This data needs to be sent reliably, | |||
since it is the only time this HMAC is sent; | since it is the only time this HMAC is sent; | |||
therefore, receipt of this packet MUST trigger a regular TCP ACK | therefore, receipt of this packet <bcp14>MUST</bcp14> trigger a regular | |||
in response, and the packet MUST be retransmitted if this | TCP ACK | |||
in response, and the packet <bcp14>MUST</bcp14> be retransmitted if this | ||||
ACK is not received. In other words, sending the ACK/MP_JOIN | ACK is not received. In other words, sending the ACK/MP_JOIN | |||
packet places the subflow in the PRE_ESTABLISHED state, and it | packet places the subflow in the PRE_ESTABLISHED state, and it | |||
moves to the ESTABLISHED state only on receipt of an ACK from | moves to the ESTABLISHED state only on receipt of an ACK from | |||
the receiver. It is not permitted to send data while in the | the receiver. It is not permissible to send data while in the | |||
PRE_ESTABLISHED state. The reserved bits in this option MUST be set | PRE_ESTABLISHED state. The reserved bits in this option <bcp14>MUST</bcp | |||
to zero by the sender.</t> | 14> be set | |||
to 0 by the sender.</t> | ||||
<t>The key for the HMAC algorithm, in the case of the message transmitte | <figure anchor="tcpm_join3" align="left" suppress-title="false" pn="figu | |||
d by Host A, will be Key-A followed by Key-B, and in the case of Host B, Key-B f | re-7"> | |||
ollowed by Key-A. These are the keys that were exchanged in the original MP_CAPA | <name slugifiedName="name-join-connection-mp_join-optio">Join Connecti | |||
BLE handshake. The "message" for the HMAC algorithm in each case is the concaten | on (MP_JOIN) Option (for Initiator's First ACK)</name> | |||
ations of random number for each host (denoted by R): for Host A, R-A followed b | <artwork align="left" name="" type="" alt="" pn="section-3.2-15.1"> | |||
y R-B; and for Host B, R-B followed by R-A.</t> | 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
<?rfc needLines='10'?> | +---------------+---------------+-------+-----------------------+ | |||
<figure align="center" anchor="tcpm_join2" title="Join Connection (MP_JO | | Kind | Length = 24 |Subtype| (reserved) | | |||
IN) Option (for Responding SYN/ACK)"> | +---------------+---------------+-------+-----------------------+ | |||
<artwork align="left"><![CDATA[ | | | | |||
1 2 3 | | | | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | | Sender's Truncated HMAC (160 bits) | | |||
+---------------+---------------+-------+-----+-+---------------+ | | | | |||
| Kind | Length = 16 |Subtype|(rsv)|B| Address ID | | | | | |||
+---------------+---------------+-------+-----+-+---------------+ | +---------------------------------------------------------------+ </artwork> | |||
| | | ||||
| Sender's Truncated HMAC (64 bits) | | ||||
| | | ||||
+---------------------------------------------------------------+ | ||||
| Sender's Random Number (32 bits) | | ||||
+---------------------------------------------------------------+ | ||||
]]></artwork> | ||||
</figure> | ||||
<?rfc needLines='12'?> | ||||
<figure align="center" anchor="tcpm_join3" title="Join Connection (MP_JO | ||||
IN) Option (for Third ACK)"> | ||||
<artwork align="left"><![CDATA[ | ||||
1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+-----------------------+ | ||||
| Kind | Length = 24 |Subtype| (reserved) | | ||||
+---------------+---------------+-------+-----------------------+ | ||||
| | | ||||
| | | ||||
| Sender's Truncated HMAC (160 bits) | | ||||
| | | ||||
| | | ||||
+---------------------------------------------------------------+ | ||||
]]></artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.2-16">The key for the HMAC algorithm, in the case of th | ||||
e message | ||||
transmitted by Host A, will be Key-A followed by Key-B; and in the | ||||
case of Host B, Key-B followed by Key-A. These are the keys that were | ||||
exchanged in the original MP_CAPABLE handshake. The "message" for the | ||||
HMAC algorithm in each case is the concatenations of random numbers for | ||||
each host (denoted by R): for Host A, R-A followed by R-B; and for | ||||
Host B, R-B followed by R-A.</t> | ||||
<t pn="section-3.2-17">These various MPTCP options fit together to enabl | ||||
e authenticated subflow setup as illustrated in <xref target="fig_tokens" format | ||||
="default" sectionFormat="of" derivedContent="Figure 8"/>.</t> | ||||
<figure anchor="fig_tokens" align="left" suppress-title="false" pn="figu | ||||
re-8"> | ||||
<name slugifiedName="name-example-use-of-mptcp-authen">Example Use of | ||||
MPTCP Authentication</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.2-18.1"> | ||||
Host A Host B | ||||
------------------------ ---------- | ||||
Address A1 Address A2 Address B1 | ||||
---------- ---------- ---------- | ||||
| | | | ||||
| | SYN + MP_CAPABLE | | ||||
|--------------------------------------------->| | ||||
|<---------------------------------------------| | ||||
| SYN/ACK + MP_CAPABLE(Key-B) | | ||||
| | | | ||||
| ACK + MP_CAPABLE(Key-A, Key-B) | | ||||
|--------------------------------------------->| | ||||
| | | | ||||
| | SYN + MP_JOIN(Token-B, R-A) | | ||||
| |------------------------------->| | ||||
| |<-------------------------------| | ||||
| | SYN/ACK + MP_JOIN(HMAC-B, R-B) | | ||||
| | | | ||||
| | ACK + MP_JOIN(HMAC-A) | | ||||
| |------------------------------->| | ||||
| |<-------------------------------| | ||||
| | ACK | | ||||
<t>These various MPTCP options fit together to enable authenticated subf | HMAC-A = HMAC(Key=(Key-A + Key-B), Msg=(R-A + R-B)) | |||
low setup as illustrated in <xref target="fig_tokens"/>.</t> | HMAC-B = HMAC(Key=(Key-B + Key-A), Msg=(R-B + R-A)) </artwork> | |||
<?rfc needLines='24'?> | ||||
<figure align="center" anchor="fig_tokens" title="Example Use of MPTCP A | ||||
uthentication"> | ||||
<artwork align="left"><![CDATA[ | ||||
Host A Host B | ||||
------------------------ ---------- | ||||
Address A1 Address A2 Address B1 | ||||
---------- ---------- ---------- | ||||
| | | | ||||
| | SYN + MP_CAPABLE | | ||||
|--------------------------------------------->| | ||||
|<---------------------------------------------| | ||||
| SYN/ACK + MP_CAPABLE(Key-B) | | ||||
| | | | ||||
| ACK + MP_CAPABLE(Key-A, Key-B) | | ||||
|--------------------------------------------->| | ||||
| | | | ||||
| | SYN + MP_JOIN(Token-B, R-A) | | ||||
| |------------------------------->| | ||||
| |<-------------------------------| | ||||
| | SYN/ACK + MP_JOIN(HMAC-B, R-B) | | ||||
| | | | ||||
| | ACK + MP_JOIN(HMAC-A) | | ||||
| |------------------------------->| | ||||
| |<-------------------------------| | ||||
| | ACK | | ||||
HMAC-A = HMAC(Key=(Key-A+Key-B), Msg=(R-A+R-B)) | ||||
HMAC-B = HMAC(Key=(Key-B+Key-A), Msg=(R-B+R-A)) | ||||
]]></artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.2-19">If the token received at Host B is unknown or loc | ||||
<t>If the token received at Host B is unknown or local policy | al policy | |||
prohibits the acceptance of the new subflow, the recipient MUST | prohibits the acceptance of the new subflow, the recipient <bcp14>MUST</ | |||
respond with a TCP RST for the subflow. If appropriate, a MP_TCPRST | bcp14> | |||
option with a "Administratively prohibited" reason code | respond with a TCP RST for the subflow. If appropriate, an MP_TCPRST | |||
(<xref target="sec_reset"/>) should be included.</t> | option with an "Administratively prohibited" reason code | |||
(<xref target="sec_reset" format="default" sectionFormat="of" derivedCon | ||||
<t>If the token is accepted at Host B, but the HMAC returned to | tent="Section 3.6"/>) should be included.</t> | |||
Host A does not match the one expected, Host A MUST close the | <t pn="section-3.2-20">If the token is accepted at Host B but the HMAC r | |||
subflow with a TCP RST. In this, and all following cases of sending | eturned to | |||
a RST in this section, the sender SHOULD send a MP_TCPRST option | Host A does not match the one expected, Host A <bcp14>MUST</bcp14> close | |||
(<xref target="sec_reset"/>) on this RST packet with the reason | the | |||
code for a "MPTCP specific error".</t> | subflow with a TCP RST. In this and all subsequent cases of sending | |||
a RST as described in this section, the sender <bcp14>SHOULD</bcp14> sen | ||||
<t>If Host B does not receive the expected HMAC, or the MP_JOIN | d an MP_TCPRST option | |||
option is missing from the ACK, it MUST close the subflow with a | (<xref target="sec_reset" format="default" sectionFormat="of" derivedCon | |||
tent="Section 3.6"/>) on this RST packet with the reason | ||||
code for an "MPTCP-specific error".</t> | ||||
<t pn="section-3.2-21">If Host B does not receive the expected HMAC or t | ||||
he MP_JOIN | ||||
option is missing from the ACK, it <bcp14>MUST</bcp14> close the subflow | ||||
with a | ||||
TCP RST.</t> | TCP RST.</t> | |||
<t pn="section-3.2-22">If the HMACs are verified as correct, then both h | ||||
<t>If the HMACs are verified as correct, then both hosts have | osts have | |||
verified each other as being the same peers as existed at | verified each other as being the same peers as those that existed at | |||
the start of the connection, and they have agreed of which | the start of the connection, and they have agreed of which | |||
connection this subflow will become a part.</t> | connection this subflow will become a part.</t> | |||
<t pn="section-3.2-23">If the SYN/ACK as received at Host A does not hav | ||||
<t>If the SYN/ACK as received at Host A does not have an MP_JOIN | e an MP_JOIN | |||
option, Host A MUST close the subflow with a TCP RST.</t> | option, Host A <bcp14>MUST</bcp14> close the subflow with a TCP RST.</t> | |||
<t pn="section-3.2-24">This covers all cases of the loss of an MP_JOIN. | ||||
<t>This covers all cases of the loss of an MP_JOIN. In more detail, | In more detail, | |||
if MP_JOIN is stripped from the SYN on the path from A to | if an MP_JOIN is stripped from the SYN on the path from A to | |||
B, and Host B does not have a listener on the relevant | B and Host B does not have a listener on the relevant | |||
port, it will respond with a RST in the normal way. If in | port, it will respond with a RST in the normal way. If in | |||
response to a SYN with an MP_JOIN option, a SYN/ACK is | response to a SYN with an MP_JOIN option a SYN/ACK is | |||
received without the MP_JOIN option (either since it was | received without the MP_JOIN option (because it was either | |||
stripped on the return path, or it was stripped on the | stripped on the return path, or stripped on the | |||
outgoing path but Host B responded as if | outgoing path leading to Host B responding as if | |||
it were a new regular TCP session), then the subflow is | it was a new regular TCP session), then the subflow is | |||
unusable and Host A MUST close it with a RST.</t> | unusable and Host A <bcp14>MUST</bcp14> close it with a RST.</t> | |||
<t pn="section-3.2-25">Note that additional subflows can be created | ||||
<t>Note that additional subflows can be created | between any pair of ports (but see <xref target="heuristics" format="def | |||
between any pair of ports (but see <xref target="heuristics"/> for | ault" sectionFormat="of" derivedContent="Section 3.9"/> for | |||
heuristics); no explicit application-level accept calls or | heuristics); no explicit application-level accept calls or | |||
bind calls are required to open additional subflows. To | bind calls are required to open additional subflows. To | |||
associate a new subflow with an existing connection, the token | associate a new subflow with an existing connection, the token | |||
supplied in the subflow's SYN exchange is used for | supplied in the subflow's SYN exchange is used for | |||
demultiplexing. This then binds the 5-tuple of the TCP | demultiplexing. This then binds the 5-tuple of the TCP | |||
subflow to the local token of the connection. A consequence is | subflow to the local token of the connection. One consequence is | |||
that it is possible to allow any port pairs to be used for a | that it is possible to allow any port pairs to be used for a | |||
connection. </t> | connection. </t> | |||
<t pn="section-3.2-26">Demultiplexing subflow SYNs <bcp14>MUST</bcp14> b | ||||
<t>Demultiplexing subflow SYNs MUST be done using the token; | e done using the token; | |||
this is unlike traditional TCP, where the destination port is | this is unlike traditional TCP, where the destination port is | |||
used for demultiplexing SYN packets. Once a subflow is set up, | used for demultiplexing SYN packets. Once a subflow is set up, | |||
demultiplexing packets is done using the 5-tuple, as in | demultiplexing packets is done using the 5-tuple, as in | |||
traditional TCP. The 5-tuples will be mapped to the local | traditional TCP. The 5-tuples will be mapped to the local | |||
connection identifier (token). Note that Host A will know its | connection identifier (token). Note that Host A will know its | |||
local token for the subflow even though it is not sent on the | local token for the subflow even though it is not sent on the | |||
wire -- only the responder's token is sent.</t> | wire -- only the responder's token is sent.</t> | |||
</section> | </section> | |||
<section anchor="sec_generalop" numbered="true" toc="include" removeInRFC= | ||||
<section title="General MPTCP Operation" anchor="sec_generalop"> | "false" pn="section-3.3"> | |||
<t>This section discusses operation of MPTCP for data transfer. At a hig | <name slugifiedName="name-mptcp-operation-and-data-tr">MPTCP Operation a | |||
h level, an MPTCP implementation will take one input data stream from an applica | nd Data Transfer</name> | |||
tion, and split it into one or more subflows, with sufficient control informatio | <t pn="section-3.3-1">This section discusses the operation of MPTCP for | |||
n to allow it to be reassembled and delivered reliably and in order to the recip | data transfer. At a high level, an MPTCP implementation will take one input data | |||
ient application. The following subsections define this behavior in detail.</t> | stream from an application and split it into one or more subflows, with suffici | |||
ent control information to allow it to be reassembled and delivered reliably and | ||||
<t>The data sequence mapping and the Data ACK are signaled in the Data S | in order to the recipient application. The following subsections define this be | |||
equence Signal (DSS) option (<xref target="tcpm_dsn"/>). Either or both can be s | havior in detail.</t> | |||
ignaled in one DSS, depending on the flags set. The data sequence mapping define | <t pn="section-3.3-2">The Data Sequence Mapping and the Data ACK are sig | |||
s how the sequence space on the subflow maps to the connection level, and the Da | naled in the DSS option (<xref target="tcpm_dsn" format="default" sectionFormat= | |||
ta ACK acknowledges receipt of data at the connection level. These functions are | "of" derivedContent="Figure 9"/>). Either or both can be signaled in one DSS, de | |||
described in more detail in the following two subsections.</t> | pending on the flags set. The Data Sequence Mapping defines how the sequence spa | |||
ce on the subflow maps to the connection level, and the Data ACK acknowledges re | ||||
<?rfc needLines='18'?> | ceipt of data at the connection level. These functions are described in more det | |||
<figure align="center" anchor="tcpm_dsn" title="Data Sequence Signal (DS | ail in the following two subsections.</t> | |||
S) Option"> | <figure anchor="tcpm_dsn" align="left" suppress-title="false" pn="figure | |||
<artwork align="left"><![CDATA[ | -9"> | |||
<name slugifiedName="name-data-sequence-signal-dss-op">Data Sequence S | ||||
ignal (DSS) Option</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.3-3.1"> | ||||
1 2 3 | 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+---------------+---------------+-------+----------------------+ | +---------------+---------------+-------+----------------------+ | |||
| Kind | Length |Subtype| (reserved) |F|m|M|a|A| | | Kind | Length |Subtype| (reserved) |F|m|M|a|A| | |||
+---------------+---------------+-------+----------------------+ | +---------------+---------------+-------+----------------------+ | |||
| Data ACK (4 or 8 octets, depending on flags) | | | Data ACK (4 or 8 octets, depending on flags) | | |||
+--------------------------------------------------------------+ | +--------------------------------------------------------------+ | |||
| Data sequence number (4 or 8 octets, depending on flags) | | | Data Sequence Number (4 or 8 octets, depending on flags) | | |||
+--------------------------------------------------------------+ | +--------------------------------------------------------------+ | |||
| Subflow Sequence Number (4 octets) | | | Subflow Sequence Number (4 octets) | | |||
+-------------------------------+------------------------------+ | +-------------------------------+------------------------------+ | |||
| Data-Level Length (2 octets) | Checksum (2 octets) | | | Data-Level Length (2 octets) | Checksum (2 octets) | | |||
+-------------------------------+------------------------------+ | +-------------------------------+------------------------------+ </artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.3-4">The flags, when set, define the contents of this o ption, as follows: | ||||
<t>The flags, when set, define the contents of this option, as follows: | </t> | |||
<ul spacing="normal" bare="false" empty="false" pn="section-3.3-5"> | ||||
<list style="symbols"> | <li pn="section-3.3-5.1">A = Data ACK present</li> | |||
<t>A = Data ACK present</t> | <li pn="section-3.3-5.2">a = Data ACK is 8 octets (if not set, Data AC | |||
<t>a = Data ACK is 8 octets (if not set, Data ACK is 4 octets)</t> | K is 4 octets)</li> | |||
<t>M = Data Sequence Number (DSN), Subflow Sequence Number (SSN), Da | <li pn="section-3.3-5.3">M = Data Sequence Number (DSN), Subflow Seque | |||
ta-Level Length, and Checksum (if negotiated) present</t> | nce Number (SSN), Data-Level Length, and Checksum (if negotiated) present</li> | |||
<t>m = Data sequence number is 8 octets (if not set, DSN is 4 octets | <li pn="section-3.3-5.4">m = Data Sequence Number is 8 octets (if not | |||
)</t> | set, DSN is 4 octets)</li> | |||
</list> | </ul> | |||
<t pn="section-3.3-6"> | ||||
The flags 'a' and 'm' only have meaning if the corresponding 'A' or 'M' | ||||
flags are set; otherwise, they will be ignored. The maximum length of this optio | ||||
n, with all flags set, is 28 octets.</t> | ||||
<t>The 'F' flag indicates "Data FIN". If present, this means that this m | ||||
apping covers the final data from the sender. This is the connection-level equiv | ||||
alent to the FIN flag in single-path TCP. A connection is not closed unless ther | ||||
e has been a Data FIN exchange, a MP_FASTCLOSE (<xref target="sec_fastclose"/>) | ||||
message, or an implementation-specific, connection-level send timeout. The purpo | ||||
se of the Data FIN and the interactions between this flag, the subflow-level FIN | ||||
flag, and the data sequence mapping are described in <xref target="sec_close"/> | ||||
. | ||||
The remaining reserved bits MUST be set to zero by an implementation of | ||||
this specification.</t> | ||||
<t>Note that the checksum is only present in this option if the use of M | ||||
PTCP checksumming has been negotiated at the MP_CAPABLE handshake (see <xref tar | ||||
get="sec_init"/>). The presence of the checksum can be inferred from the length | ||||
of the option. If a checksum is present, but its use had not been negotiated in | ||||
the MP_CAPABLE handshake, the receiver MUST close the subflow with a RST as it n | ||||
ot behaving as negotiated. If a checksum is not present when its use has been ne | ||||
gotiated, the receiver MUST close the subflow with a RST as it is considered bro | ||||
ken. In both cases, this RST SHOULD be accompanied with a MP_TCPRST option (<xre | ||||
f target="sec_reset"/>) with the reason code for a "MPTCP specific error".</t> | ||||
<section title="Data Sequence Mapping" anchor="sec_dsn"> | ||||
<t>The data stream as a whole can be reassembled through the use of th | ||||
e data sequence mapping components of the DSS option (<xref target="tcpm_dsn"/>) | ||||
, which define the | ||||
mapping from the subflow sequence number to the data sequence number. This is us | ||||
ed by the receiver to ensure in-order delivery to the application layer. Meanwhi | ||||
le, the subflow-level sequence numbers (i.e., the regular sequence numbers in th | ||||
e TCP header) have subflow-only relevance. It is expected (but not mandated) tha | ||||
t SACK <xref target='RFC2018'/> is used at the subflow level to improve efficien | ||||
cy.</t> | ||||
<t>The data sequence mapping specifies a mapping from subflow sequence s | ||||
pace to data sequence space. This is expressed in terms of starting sequence num | ||||
bers for the subflow and the data level, and a length of bytes for which this ma | ||||
pping is valid. | ||||
This explicit mapping for a range of data was chosen rather than per-packet sign | ||||
aling to assist with compatibility with situations where TCP/IP segmentation or | ||||
coalescing is undertaken separately from the stack that is generating the data f | ||||
low (e.g., through the use of TCP segmentation offloading on network interface c | ||||
ards, or by middleboxes such as performance enhancing proxies). It also allows a | ||||
single mapping to cover many packets, which may be useful in bulk transfer situ | ||||
ations.</t> | ||||
<t>A mapping is fixed, in that the subflow sequence number is bound to t | ||||
he data sequence number after the mapping has been processed. A sender MUST NOT | ||||
change this mapping | ||||
after it has been declared; however, the same data sequence number can be mapped | ||||
to by different subflows for retransmission purposes (see <xref target="sec_ret | ||||
ransmit"/>). This would also permit the same data to be sent simultaneously on m | ||||
ultiple subflows for resilience or efficiency purposes, especially in the case o | ||||
f lossy links. Although the detailed specification of such operation is outside | ||||
the scope of this document, an implementation SHOULD treat the first data that i | ||||
s received at a subflow for the data sequence space as that which should be deli | ||||
vered to the application, and any later data for that sequence space SHOULD be i | ||||
gnored.</t> | ||||
<t>The data sequence number is specified as an absolute value, whereas t | ||||
he subflow sequence numbering is relative (the SYN at the start of the subflow h | ||||
as relative subflow sequence number 0). This is to allow middleboxes to change t | ||||
he initial sequence number of a subflow, such as firewalls that undertake Initia | ||||
l Sequence Number (ISN) randomization.</t> | ||||
<t>The data sequence mapping also contains a checksum of the data that t | ||||
his mapping covers, if use of checksums has been negotiated at the MP_CAPABLE ex | ||||
change. Checksums are used to detect if the payload has been adjusted in any way | ||||
by a non-MPTCP-aware middlebox. If this checksum fails, it will trigger a failu | ||||
re of the subflow, or a fallback to regular TCP, as documented in <xref target=" | ||||
sec_fallback"/>, since MPTCP can no longer reliably know the subflow sequence sp | ||||
ace at the receiver to build data sequence mappings. Without checksumming enable | ||||
d, corrupt data may be delivered to the application if a middlebox alters segmen | ||||
t boundaries, alters content, or does not deliver all segments covered by a data | ||||
sequence mapping. It is therefore RECOMMENDED to use checksumming unless it is | ||||
known the network path contains no such devices.</t> | ||||
<t>The checksum algorithm used is the standard TCP checksum <xref target | ||||
="RFC0793"/>, operating over the data covered by this mapping, along with a pseu | ||||
do-header as shown in <xref target="fig_pseudo"/>.</t> | ||||
<?rfc needLines='18'?> | The flags "a" and "m" only have meaning if the corresponding "A" or "M" | |||
<figure align="center" anchor="fig_pseudo" title="Pseudo-Header for DSS | flags are set; otherwise, they will be ignored. The maximum length of this optio | |||
Checksum"> | n, with all flags set, is 28 octets.</t> | |||
<artwork align="left"><![CDATA[ | <t pn="section-3.3-7">The "F" flag indicates "Data FIN". If present, thi | |||
s means that this | ||||
mapping covers the final data from the sender. This is the | ||||
connection-level equivalent of the FIN flag in single-path TCP. A connec | ||||
tion is not closed unless there has been a Data FIN exchange, an MP_FASTCLOSE (< | ||||
xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent=" | ||||
Section 3.5"/>) message, or an implementation-specific connection-level send tim | ||||
eout. The purpose of the Data FIN and the interactions between this flag, the su | ||||
bflow-level FIN flag, and the Data Sequence Mapping are described in <xref targe | ||||
t="sec_close" format="default" sectionFormat="of" derivedContent="Section 3.3.3" | ||||
/>. | ||||
The remaining reserved bits <bcp14>MUST</bcp14> be set to 0 by an implem | ||||
entation of this specification.</t> | ||||
<t pn="section-3.3-8">Note that the checksum is only present in this opt | ||||
ion if the use of | ||||
MPTCP checksumming has been negotiated at the MP_CAPABLE handshake | ||||
(see <xref target="sec_init" format="default" sectionFormat="of" derived | ||||
Content="Section 3.1"/>). The presence of the | ||||
checksum can be inferred from the length of the option. If a checksum | ||||
is present but its use had not been negotiated in the MP_CAPABLE | ||||
handshake, the receiver <bcp14>MUST</bcp14> close the subflow with a | ||||
RST, as it is not behaving as negotiated. If a checksum is not present w | ||||
hen its use has been negotiated, the receiver <bcp14>MUST</bcp14> close the subf | ||||
low with a RST, as it is considered broken. In both cases, this RST <bcp14>SHOUL | ||||
D</bcp14> be accompanied by an MP_TCPRST option (<xref target="sec_reset" format | ||||
="default" sectionFormat="of" derivedContent="Section 3.6"/>) with the reason co | ||||
de for an "MPTCP-specific error".</t> | ||||
<section anchor="sec_dsn" numbered="true" toc="include" removeInRFC="fal | ||||
se" pn="section-3.3.1"> | ||||
<name slugifiedName="name-data-sequence-mapping">Data Sequence Mapping | ||||
</name> | ||||
<t pn="section-3.3.1-1">The data stream as a whole can be reassembled | ||||
through the use of the Data Sequence Mapping components of the DSS option (<xref | ||||
target="tcpm_dsn" format="default" sectionFormat="of" derivedContent="Figure 9" | ||||
/>), which define the | ||||
mapping from the subflow sequence number to the data sequence number. This is | ||||
used by the receiver to ensure in-order delivery to the application | ||||
layer. Meanwhile, the subflow-level sequence numbers (i.e., the | ||||
regular sequence numbers in the TCP header) are only relevant to the s | ||||
ubflow. It is expected (but not mandated) that SACK <xref target="RFC2018" forma | ||||
t="default" sectionFormat="of" derivedContent="RFC2018"/> will be used at the su | ||||
bflow level to improve efficiency.</t> | ||||
<t pn="section-3.3.1-2">The Data Sequence Mapping specifies a mapping | ||||
from the subflow | ||||
sequence space to the data sequence space. This is expressed in terms | ||||
of starting sequence numbers for the subflow and the data level, and a length of | ||||
bytes for which this mapping is valid. | ||||
This explicit mapping for a range of data, rather than per‑packet signaling, was | ||||
chosen to assist with compatibility with | ||||
situations where TCP/IP segmentation or coalescing is undertaken | ||||
separately from the stack that is generating the data flow (e.g., | ||||
through the use of TCP segmentation offloading on network interface | ||||
cards, or by middleboxes such as Performance Enhancing Proxies | ||||
(PEPs) <xref target="RFC3135" format="default" sectionFormat="of" deri | ||||
vedContent="RFC3135"/>). It | ||||
also allows a single mapping to cover many packets; this may be useful | ||||
in bulk‑transfer situations.</t> | ||||
<t pn="section-3.3.1-3">A mapping is fixed, in that the subflow sequen | ||||
ce number is bound to the data sequence number after the mapping has been proces | ||||
sed. A sender <bcp14>MUST NOT</bcp14> change this mapping | ||||
after it has been declared; however, the same data sequence number can be | ||||
mapped to by different subflows for retransmission purposes (see | ||||
<xref target="sec_retransmit" format="default" sectionFormat="of" deri | ||||
vedContent="Section 3.3.6"/>). This would also | ||||
permit the same data to be sent simultaneously on multiple subflows | ||||
for resilience or efficiency purposes, especially in the case of | ||||
lossy links. Although the detailed specification of such operation | ||||
is outside the scope of this document, an implementation | ||||
<bcp14>SHOULD</bcp14> treat the first data that is received at a | ||||
subflow for the data sequence space as the data that should be deliver | ||||
ed to the application, and any subsequent data for that sequence space <bcp14>SH | ||||
OULD</bcp14> be ignored.</t> | ||||
<t pn="section-3.3.1-4">The data sequence number is specified as an ab | ||||
solute value, | ||||
whereas the subflow sequence numbering is relative (the SYN at the | ||||
start of the subflow has a relative subflow sequence number of | ||||
0). This is done to allow middleboxes to change the Initial Sequence | ||||
Number (ISN) of a subflow, such as firewalls that undertake ISN random | ||||
ization.</t> | ||||
<t pn="section-3.3.1-5">The Data Sequence Mapping also contains a chec | ||||
ksum of the data | ||||
that this mapping covers, if the use of checksums has been negotiated | ||||
at | ||||
the MP_CAPABLE exchange. Checksums are used to detect if the payload | ||||
has been adjusted in any way by a non-MPTCP-aware middlebox. If this | ||||
checksum fails, it will trigger a failure of the subflow, or a | ||||
fallback to regular TCP, as documented in <xref target="sec_fallback" | ||||
format="default" sectionFormat="of" derivedContent="Section 3.7"/>, since MPTCP | ||||
can no longer | ||||
reliably know the subflow sequence space at the receiver to build | ||||
Data Sequence Mappings. Without checksumming enabled, corrupt data | ||||
may be delivered to the application if a middlebox alters segment | ||||
boundaries, alters content, or does not deliver all segments covered | ||||
by a Data Sequence Mapping. It is therefore | ||||
<bcp14>RECOMMENDED</bcp14> that checksumming be used, unless it is kno | ||||
wn | ||||
that the network path contains no such devices.</t> | ||||
<t pn="section-3.3.1-6">The checksum algorithm used is the standard TC | ||||
P checksum <xref target="RFC0793" format="default" sectionFormat="of" derivedCon | ||||
tent="RFC0793"/>, operating over the data covered by this mapping, along with a | ||||
pseudo‑header as shown in <xref target="fig_pseudo" format="default" sectionForm | ||||
at="of" derivedContent="Figure 10"/>.</t> | ||||
<figure anchor="fig_pseudo" align="left" suppress-title="false" pn="fi | ||||
gure-10"> | ||||
<name slugifiedName="name-pseudo-header-for-dss-check">Pseudo-Header | ||||
for DSS Checksum</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.3.1-7.1"> | ||||
1 2 3 | 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+--------------------------------------------------------------+ | +--------------------------------------------------------------+ | |||
| | | | | | |||
| Data Sequence Number (8 octets) | | | Data Sequence Number (8 octets) | | |||
| | | | | | |||
+--------------------------------------------------------------+ | +--------------------------------------------------------------+ | |||
| Subflow Sequence Number (4 octets) | | | Subflow Sequence Number (4 octets) | | |||
+-------------------------------+------------------------------+ | +-------------------------------+------------------------------+ | |||
| Data-Level Length (2 octets) | Zeros (2 octets) | | | Data-Level Length (2 octets) | Zeros (2 octets) | | |||
+-------------------------------+------------------------------+ | +-------------------------------+------------------------------+ </artwork> | |||
]]></artwork> | </figure> | |||
</figure> | <t pn="section-3.3.1-8">Note that the data sequence number used in the | |||
pseudo-header is always the 64-bit value, irrespective of what length is used i | ||||
<t>Note that the data sequence number used in the pseudo-header is alway | n the DSS option itself. The standard TCP checksum algorithm has been chosen, si | |||
s the 64-bit value, irrespective of what length is used in the DSS option itself | nce it will be calculated anyway for the TCP subflow, and if calculated first ov | |||
. The standard TCP checksum algorithm has been chosen since it will be calculate | er the data before adding the pseudo-headers, it only needs to be calculated onc | |||
d anyway for the TCP subflow, and if calculated first over the data before addin | e. Furthermore, since the TCP checksum is additive, the checksum for a DSN_MAP c | |||
g the pseudo-headers, it only needs to be calculated once. Furthermore, since th | an be constructed by simply adding together the checksums for the data of each c | |||
e TCP checksum is additive, the checksum for a DSN_MAP can be constructed by sim | onstituent TCP segment and adding the checksum for the DSS pseudo‑header.</t> | |||
ply adding together the checksums for the data of each constituent TCP segment, | <t pn="section-3.3.1-9">Note that checksumming relies on the TCP subfl | |||
and adding the checksum for the DSS pseudo-header.</t> | ow containing contiguous data; therefore, a TCP subflow <bcp14>MUST NOT</bcp14> | |||
use the Urgent Pointer to interrupt an existing mapping. Further note, however, | ||||
<t>Note that checksumming relies on the TCP subflow containing contiguou | that if Urgent data is received on a subflow, it <bcp14>SHOULD</bcp14> be mapped | |||
s data; therefore, a TCP subflow MUST NOT use the Urgent Pointer to interrupt an | to the data sequence space and delivered to the application, analogous to Urgen | |||
existing mapping. Further note, however, that if Urgent data is received on a s | t data in regular TCP.</t> | |||
ubflow, it SHOULD be mapped to the data sequence space and delivered to the appl | <t pn="section-3.3.1-10">To avoid possible deadlock scenarios, subflow | |||
ication analogous to Urgent data in regular TCP.</t> | -level | |||
processing should be undertaken separately from processing at the | ||||
<t>To avoid possible deadlock scenarios, subflow-level | ||||
processing should be undertaken separately from that at | ||||
connection level. Therefore, even if a mapping does not exist | connection level. Therefore, even if a mapping does not exist | |||
from the subflow space to the data-level space, the data | from the subflow space to the data‑level space, the data | |||
SHOULD still be ACKed at the subflow (if it is in-window). | <bcp14>SHOULD</bcp14> still be ACKed at the subflow (if it is in-window) | |||
. | ||||
This data cannot, however, be acknowledged at the data level | This data cannot, however, be acknowledged at the data level | |||
(<xref target="sec_dataack"/>) because its data sequence | (<xref target="sec_dataack" format="default" sectionFormat="of" derivedC | |||
numbers are unknown. Implementations MAY hold onto such | ontent="Section 3.3.2"/>) because its data sequence | |||
unmapped data for a short while in the expectation that a | numbers are unknown. Implementations <bcp14>MAY</bcp14> hold onto such | |||
unmapped data for a short while, in the expectation that a | ||||
mapping will arrive shortly. Such unmapped data cannot be | mapping will arrive shortly. Such unmapped data cannot be | |||
counted as being within the connection level receive window because this is | counted as being within the connection-level receive window because this is | |||
relative to the data sequence numbers, so if the receiver runs | relative to the data sequence numbers, so if the receiver runs | |||
out of memory to hold this data, it will have to be discarded. | out of memory to hold this data, it will have to be discarded. | |||
If a mapping for that subflow-level sequence space does not | If a mapping for that subflow-level sequence space does not | |||
arrive within a receive window of data, that subflow SHOULD be | arrive within a receive window of data, that subflow <bcp14>SHOULD</bcp1 4> be | |||
treated as broken, closed with a RST, and any unmapped data | treated as broken, closed with a RST, and any unmapped data | |||
silently discarded.</t> | silently discarded.</t> | |||
<t pn="section-3.3.1-11">Data sequence numbers are always 64-bit quant | ||||
<t>Data sequence numbers are always 64-bit quantities, and | ities and | |||
MUST be maintained as such in implementations. If a | <bcp14>MUST</bcp14> be maintained as such in implementations. If a | |||
connection is progressing at a slow rate, so protection | connection is progressing at a slow rate, so protection | |||
against wrapped sequence numbers is not required, | against wrapped sequence numbers is not required, | |||
then an implementation MAY include just the lower 32 | then an implementation <bcp14>MAY</bcp14> include just the lower 32 | |||
bits of the data sequence number in the data sequence mapping and/or | bits of the data sequence number in the Data Sequence Mapping and/or | |||
Data ACK as an optimization, and an implementation can make this choice | Data ACK as an optimization, and an implementation can make this choice | |||
independently for each packet. An implementation MUST be able to receive | independently for each packet. An implementation <bcp14>MUST</bcp14> be | |||
and process both 64-bit or 32-bit sequence number values, but it is not | able to receive | |||
required that an implementation is able to send both.</t> | and process both 64-bit and 32-bit sequence number values, but it is not | |||
required that an implementation be able to send both.</t> | ||||
<t>An implementation MUST send the full 64-bit data sequence number | <t pn="section-3.3.1-12">An implementation <bcp14>MUST</bcp14> send th | |||
e full 64-bit data sequence number | ||||
if it is transmitting at a sufficiently high rate that the 32-bit value | if it is transmitting at a sufficiently high rate that the 32-bit value | |||
could wrap within the Maximum Segment Lifetime | could wrap within the Maximum Segment Lifetime | |||
(MSL) <xref target="RFC7323"/>. The lengths of the DSNs used in these | (MSL) <xref target="RFC7323" format="default" sectionFormat="of" derived Content="RFC7323"/>. The lengths of the DSNs used in these | |||
values (which may be different) are declared with flags in the | values (which may be different) are declared with flags in the | |||
DSS option. Implementations MUST accept a 32-bit DSN and implicitly | DSS option. Implementations <bcp14>MUST</bcp14> accept a 32-bit DSN and implicitly | |||
promote it to a 64-bit quantity by incrementing the upper 32 | promote it to a 64-bit quantity by incrementing the upper 32 | |||
bits of sequence number each time the lower 32 | bits of the sequence number each time the lower 32 | |||
bits wrap. A sanity check MUST be implemented to ensure that | bits wrap. A sanity check <bcp14>MUST</bcp14> be implemented to ensure t | |||
hat | ||||
a wrap occurs at an expected time (e.g., the sequence number jumps | a wrap occurs at an expected time (e.g., the sequence number jumps | |||
from a very high number to a very low number) and is not triggered | from a very high number to a very low number) and is not triggered | |||
by out-of-order packets.</t> | by out‑of-order packets.</t> | |||
<t pn="section-3.3.1-13">As with the standard TCP sequence number, the | ||||
<t>As with the standard TCP sequence number, the data sequence | data sequence | |||
number should not start at zero, but at a random value to make | number should not start at zero, but at a random value to make | |||
blind session hijacking harder. This specification requires | blind session hijacking harder. This specification requires | |||
setting the initial data sequence number (IDSN) of each host to the | setting the IDSN of each host to the | |||
least significant 64 bits of the SHA-256 hash of the host's key, as | least significant 64 bits of the SHA-256 hash of the host's key, as | |||
described in <xref target="sec_init"/>. This is required also in | described in <xref target="sec_init" format="default" sectionFormat="of" | |||
order for the receiver to know what the expected IDSN is, and thus | derivedContent="Section 3.1"/>. This is also required in | |||
order for the receiver to know what the expected IDSN is and thus | ||||
determine if any initial connection-level packets are missing; this | determine if any initial connection-level packets are missing; this | |||
is particularly relevant if two subflows start transmitting simultaneous ly.</t> | is particularly relevant if two subflows start transmitting simultaneous ly.</t> | |||
<t pn="section-3.3.1-14">The mapping provided by a Data Sequence Mappi | ||||
<t>A data sequence mapping does not need to be included in | ng MUST apply to | |||
some or all of the subflow sequence space in the TCP segment that | ||||
carries the option. It does not need to be included in | ||||
every MPTCP packet, as long as the subflow sequence space in | every MPTCP packet, as long as the subflow sequence space in | |||
that packet is covered by a mapping known at the receiver. This | that packet is covered by a mapping known at the receiver. This | |||
can be used to reduce overhead in cases where the mapping is | can be used to reduce overhead in cases where the mapping is | |||
known in advance; one such case is when there is a single | known in advance. One such case is when there is a single | |||
subflow between the hosts, another is when segments of | subflow between the hosts, and another is when segments of | |||
data are scheduled in larger than packet-sized chunks.</t> | data are scheduled in larger-than-packet-sized chunks.</t> | |||
<t pn="section-3.3.1-15">An "infinite" mapping can be used to fall bac | ||||
<t>An "infinite" mapping can be used to fall back to regular TCP by | k to regular TCP by | |||
mapping the subflow-level data to the connection-level data | mapping the subflow-level data to the connection-level data | |||
for the remainder of the connection (see | for the remainder of the connection (see | |||
<xref target="sec_fallback"/>). This is achieved by setting | <xref target="sec_fallback" format="default" sectionFormat="of" derivedC ontent="Section 3.7"/>). This is achieved by setting | |||
the Data-Level Length field of the DSS option to the reserved value of 0 . The | the Data-Level Length field of the DSS option to the reserved value of 0 . The | |||
checksum, in such a case, will also be set to zero.</t> | checksum, in such a case, will also be set to 0.</t> | |||
</section> | </section> | |||
<section anchor="sec_dataack" numbered="true" toc="include" removeInRFC= | ||||
<section title="Data Acknowledgments" anchor="sec_dataack"> | "false" pn="section-3.3.2"> | |||
<t>To provide full end-to-end resilience, MPTCP provides a | <name slugifiedName="name-data-acknowledgments">Data Acknowledgments</ | |||
name> | ||||
<t pn="section-3.3.2-1">To provide full end-to-end resilience, MPTCP p | ||||
rovides a | ||||
connection-level acknowledgment, to act as a cumulative ACK for | connection-level acknowledgment, to act as a cumulative ACK for | |||
the connection as a whole. This is the "Data ACK" field of | the connection as a whole. This is done via the "Data ACK" field of | |||
the DSS option (<xref target="tcpm_dsn"/>). The Data ACK | the DSS option (<xref target="tcpm_dsn" format="default" sectionFormat=" | |||
of" derivedContent="Figure 9"/>). The Data ACK | ||||
is analogous to the behavior | is analogous to the behavior | |||
of the standard TCP cumulative ACK -- indicating | of the standard TCP cumulative ACK -- indicating | |||
how much data has been successfully received (with no | how much data has been successfully received (with no | |||
holes). This is in comparison to the subflow-level ACK, which | holes). This can be compared to the subflow-level ACK, which | |||
acts analogous to TCP SACK, given that there may still be | acts in a fashion analogous to TCP SACK, given that there may still be | |||
holes in the data stream at the connection level. | holes in the data stream at the connection level. | |||
The Data ACK specifies the next data sequence number | The Data ACK specifies the next data sequence number | |||
it expects to receive.</t> | it expects to receive.</t> | |||
<t pn="section-3.3.2-2">The Data ACK, as for the DSN, can be sent as t | ||||
<t>The Data ACK, as for the DSN, can be sent as the full 64-bit | he full 64-bit | |||
value, or as the lower 32 bits. If data is received with a 64-bit DSN, | value or as the lower 32 bits. If data is received with a 64-bit DSN, | |||
it MUST be acknowledged with a 64-bit Data ACK. If the DSN received | it <bcp14>MUST</bcp14> be acknowledged with a 64-bit Data ACK. If the D | |||
is 32 bits, an implementation can choose whether to send a 32-bit or | SN received | |||
64-bit Data ACK, and an implementation MUST accept either in this situat | is 32 bits, an implementation can choose whether to send a 32-bit or | |||
ion.</t> | 64-bit Data ACK, and an implementation <bcp14>MUST</bcp14> accept either | |||
in this situation.</t> | ||||
<t>The Data ACK proves that the data, and all required MPTCP | <t pn="section-3.3.2-3">The Data ACK proves that the data, and all req | |||
signaling, has been received and accepted by the remote end. | uired MPTCP | |||
signaling, have been received and accepted by the remote end. | ||||
One key use of the Data ACK signal is that it is used to indicate | One key use of the Data ACK signal is that it is used to indicate | |||
the left edge of the advertised receive window. As explained in | the left edge of the advertised receive window. As explained in | |||
<xref target="sec_rwin"/>, the receive window is shared by all | <xref target="sec_rwin" format="default" sectionFormat="of" derivedConte nt="Section 3.3.4"/>, the receive window is shared by all | |||
subflows and is relative to the Data ACK. Because of this, an | subflows and is relative to the Data ACK. Because of this, an | |||
implementation MUST NOT use the RCV.WND field of a TCP segment | implementation <bcp14>MUST NOT</bcp14> use the RCV.WND field of a TCP se gment | |||
at the connection level if it does not also carry a DSS option with | at the connection level if it does not also carry a DSS option with | |||
a Data ACK field. Furthermore, | a Data ACK field. Furthermore, | |||
separating the connection-level acknowledgments from the | separating the connection-level acknowledgments from the | |||
subflow level allows processing to be done separately, and | subflow level allows processing to be done separately, and | |||
a receiver has the freedom to drop segments after acknowledgment | a receiver has the freedom to drop segments after acknowledgment | |||
at the subflow level, for example, due to memory constraints | at the subflow level -- for example, due to memory constraints | |||
when many segments arrive out of order.</t> | when many segments arrive out of order.</t> | |||
<t pn="section-3.3.2-4">An MPTCP sender <bcp14>MUST NOT</bcp14> free d | ||||
<t>An MPTCP sender MUST NOT free data from the send buffer until | ata from the send buffer until | |||
it has been acknowledged by both a Data ACK received on any subflow | it has been acknowledged by both a Data ACK received on any subflow | |||
and at the subflow level by all subflows on which the data was sent. | and at the subflow level by all subflows on which the data was sent. | |||
The former condition ensures liveness of the | The former condition ensures liveness of the | |||
connection and the latter condition ensures liveness and | connection, and the latter condition ensures liveness and | |||
self-consistence of a subflow when data needs to be | self-consistence of a subflow when data needs to be | |||
retransmitted. | retransmitted. | |||
Note, however, that if some data needs to be retransmitted multiple | Note, however, that if some data needs to be retransmitted multiple | |||
times over a subflow, there is a risk of blocking the sending | times over a subflow, there is a risk of blocking the send | |||
window. In this case, the MPTCP sender can decide to terminate the | window. In this case, the MPTCP sender can decide to terminate the | |||
subflow that is behaving badly by sending a RST, using an appropriate | subflow that is behaving badly by sending a RST, using an appropriate | |||
MP_TCPRST (<xref target="sec_reset"/>) error code.</t> | MP_TCPRST (<xref target="sec_reset" format="default" sectionFormat="of" | |||
derivedContent="Section 3.6"/>) error code.</t> | ||||
<t>The Data ACK MAY be included in all segments; however, optimizations | <t pn="section-3.3.2-5">The Data ACK <bcp14>MAY</bcp14> be included in | |||
SHOULD be considered in more advanced implementations, where the | all segments; however, optimizations | |||
<bcp14>SHOULD</bcp14> be considered in more advanced implementations, wh | ||||
ere the | ||||
Data ACK is present in segments | Data ACK is present in segments | |||
only when the Data ACK value advances, and this behavior MUST | only when the Data ACK value advances, and this behavior <bcp14>MUST</bc | |||
be treated as valid. This behavior ensures the sender buffer | p14> | |||
be treated as valid. This behavior ensures that the send buffer | ||||
is freed, while reducing overhead when the data transfer is | is freed, while reducing overhead when the data transfer is | |||
unidirectional.</t> | unidirectional.</t> | |||
</section> | </section> | |||
<section anchor="sec_close" numbered="true" toc="include" removeInRFC="f | ||||
<section title="Closing a Connection" anchor="sec_close"> | alse" pn="section-3.3.3"> | |||
<t>In regular TCP, a FIN announces the receiver that the sender has no m | <name slugifiedName="name-closing-a-connection">Closing a Connection</ | |||
ore data to send. | name> | |||
<t pn="section-3.3.3-1">In regular TCP, a FIN announces to the receive | ||||
r that the sender has no more data to send. | ||||
In order to allow subflows to operate independently and to keep the appearance o f TCP over the wire, | In order to allow subflows to operate independently and to keep the appearance o f TCP over the wire, | |||
a FIN in MPTCP only affects the subflow on which it is sent. This | a FIN in MPTCP only affects the subflow on which it is sent. This | |||
allows nodes to exercise considerable freedom over which paths are in use at any one time. | allows nodes to exercise considerable freedom over which paths are in use at any one time. | |||
The semantics of a FIN remain as for regular TCP; i.e., it is not until both sid es have ACKed | The semantics of a FIN remain as for regular TCP; i.e., it is not until both sid es have ACKed | |||
each other's FINs that the subflow is fully closed.</t> | each other's FINs that the subflow is fully closed.</t> | |||
<t>When an application calls close() on a socket, this indicates that it has no more | <t pn="section-3.3.3-2">When an application calls close() on a socket, this indicates that it has no more | |||
data to send; for regular TCP, this would result in a FIN on the connection. For MPTCP, an | data to send; for regular TCP, this would result in a FIN on the connection. For MPTCP, an | |||
equivalent mechanism is needed, and this is referred to as the DATA_FIN.</t> | equivalent mechanism is needed; this is referred to as the DATA_FIN.</t> | |||
<t pn="section-3.3.3-3">A DATA_FIN is an indication that the sender ha | ||||
<t>A DATA_FIN is an indication that the sender has no more data to send, | s no more data to send, and | |||
and | as such it can be used to verify that all data has been successfully rec | |||
as such can be used to verify that all data has been successfully receiv | eived. A DATA_FIN, | |||
ed. A DATA_FIN, | ||||
as with the FIN on a regular TCP connection, is a unidirectional signal. </t> | as with the FIN on a regular TCP connection, is a unidirectional signal. </t> | |||
<t pn="section-3.3.3-4">The DATA_FIN is signaled by setting the "F" fl | ||||
<t>The DATA_FIN is signaled by setting the 'F' flag in the Data Sequence | ag in the DSS | |||
Signal option (<xref target="tcpm_dsn"/>) to 1. A DATA_FIN occupies 1 octet (th | option (<xref target="tcpm_dsn" format="default" sectionFormat="of" de | |||
e final octet) of the connection-level sequence space. Note that the DATA_FIN is | rivedContent="Figure 9"/>) | |||
included in the Data-Level Length, but not at the subflow level: for example, a | to 1. A DATA_FIN occupies 1 octet (the final octet) of the | |||
segment with DSN 80, and Data-Level Length 11, with DATA_FIN set, would map 10 | connection-level sequence space. Note that the | |||
octets from the subflow into data sequence space 80-89, the DATA_FIN is DSN 90; | DATA_FIN is included in the Data-Level Length but not at the subflow | |||
therefore, this segment including DATA_FIN would be acknowledged with a DATA_ACK | level: for example, a segment with a DSN value of 80 and a | |||
of 91.</t> | Data-Level Length of 11, with DATA_FIN set, would map 10 octets from | |||
the subflow into data sequence space 80-89, and the DATA_FIN would | ||||
<t>Note that when the DATA_FIN is not attached to a TCP segment containi | be DSN 90; therefore, this segment, including DATA_FIN, would be | |||
ng data, the Data Sequence Signal MUST have a subflow sequence number of 0, a Da | acknowledged with a DATA_ACK of 91.</t> | |||
ta-Level Length of 1, and the data sequence number that corresponds with the DAT | <t pn="section-3.3.3-5">Note that when the DATA_FIN is not attached to | |||
A_FIN itself. The checksum in this case will only cover the pseudo-header.</t> | a TCP segment containing data, the DSS <bcp14>MUST</bcp14> have a subflow seque | |||
nce number of 0, a Data-Level Length of 1, and the data sequence number that cor | ||||
<t>A DATA_FIN has the semantics and behavior as a regular TCP FIN, but a | responds with the DATA_FIN itself. The checksum in this case will only cover the | |||
t the connection level. Notably, it is only DATA_ACKed once all data has been su | pseudo-header.</t> | |||
ccessfully received at the connection level. Note, therefore, that a DATA_FIN is | <t pn="section-3.3.3-6">A DATA_FIN has the same semantics and behavior | |||
decoupled from a subflow FIN. It is only permissible to combine these signals o | as a regular TCP FIN, but at the connection level. Notably, it is only DATA_ACK | |||
n one subflow if there is no data outstanding on other subflows. Otherwise, it m | ed once all data has been successfully received at the connection level. Note, t | |||
ay be necessary to retransmit data on different subflows. Essentially, a host MU | herefore, that a DATA_FIN is decoupled from a subflow FIN. It is only permissibl | |||
ST NOT close all functioning subflows unless it is safe to do so, i.e., until al | e to combine these signals on one subflow if there is no data outstanding on oth | |||
l outstanding data has been DATA_ACKed, or until the segment with the DATA_FIN f | er subflows. Otherwise, it may be necessary to retransmit data on different subf | |||
lag set is the only outstanding segment.</t> | lows. Essentially, a host <bcp14>MUST NOT</bcp14> close all functioning subflows | |||
unless it is safe to do so, i.e., until all outstanding data has been DATA_ACKe | ||||
<t>Once a DATA_FIN has been acknowledged, all remaining subflows MUST be | d or until the segment with the DATA_FIN flag set is the only outstanding segmen | |||
closed with standard FIN exchanges. Both hosts SHOULD send FINs on all subflows | t.</t> | |||
, as a courtesy to allow middleboxes to clean up state even if an individual sub | <t pn="section-3.3.3-7">Once a DATA_FIN has been acknowledged, all rem | |||
flow has failed. It is also encouraged to reduce the timeouts (Maximum Segment L | aining subflows | |||
ifetime) on subflows at end hosts after receiving a DATA_FIN. In particular, any | <bcp14>MUST</bcp14> be closed with standard FIN exchanges. Both | |||
subflows where there is still outstanding data queued (which has been retransmi | hosts <bcp14>SHOULD</bcp14> send FINs on all subflows, as a courtesy, | |||
tted on other subflows in order to get the DATA_FIN acknowledged) MAY be closed | to allow middleboxes to clean up state even if an individual subflow | |||
with a RST with MP_TCPRST (<xref target="sec_reset"/>) error code for "too much | has failed. Reducing the timeouts (MSL) on subflows at end hosts after | |||
outstanding data".</t> | receiving a | |||
DATA_FIN is also encouraged. In particular, any subflows where there i | ||||
<t>A connection is considered closed once both hosts' DATA_FINs have bee | s still | |||
n acknowledged by DATA_ACKs.</t> | outstanding data queued (which has been retransmitted on other | |||
subflows in order to get the DATA_FIN acknowledged) | ||||
<t>As specified above, a standard TCP FIN on an individual subflow only | <bcp14>MAY</bcp14> be closed with a RST with an MP_TCPRST (<xref targe | |||
shuts down the subflow on which it was sent. If all subflows have been closed wi | t="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/> | |||
th a FIN exchange, but no DATA_FIN has been received and acknowledged, the MPTCP | ) error code for "too much outstanding data".</t> | |||
connection is treated as closed only after a timeout. This implies that an impl | <t pn="section-3.3.3-8">A connection is considered closed once both ho | |||
ementation will have TIME_WAIT states at both the subflow and connection levels | sts' DATA_FINs have been acknowledged by DATA_ACKs.</t> | |||
(see <xref target="app_fsm"/>). This permits "break-before-make" scenarios where | <t pn="section-3.3.3-9">As specified above, a standard TCP FIN on an i | |||
connectivity is lost on all subflows before a new one can be re-established.</t | ndividual subflow | |||
> | only shuts down the subflow on which it was sent. If all subflows | |||
</section> | have been closed with a FIN exchange but no DATA_FIN has been | |||
received and acknowledged, the MPTCP connection is treated as closed | ||||
<section title="Receiver Considerations" anchor="sec_rwin"> | only after a timeout. This implies that an implementation will have | |||
<t>Regular TCP advertises a receive window in each packet, telling the | TIME_WAIT states at both the subflow level and the connection level (s | |||
sender how much data the receiver | ee <xref target="app_fsm" format="default" sectionFormat="of" derivedContent="Ap | |||
is willing to accept past the cumulative ack. The receive window is used to impl | pendix D"/>). This permits "break-before-make" scenarios where connectivity is l | |||
ement flow control, throttling | ost on all subflows before a new one can be re‑established.</t> | |||
</section> | ||||
<section anchor="sec_rwin" numbered="true" toc="include" removeInRFC="fa | ||||
lse" pn="section-3.3.4"> | ||||
<name slugifiedName="name-receiver-considerations">Receiver Considerat | ||||
ions</name> | ||||
<t pn="section-3.3.4-1">Regular TCP advertises a receive window in eac | ||||
h packet, telling the sender how much data the receiver | ||||
is willing to accept past the cumulative ACK. The receive window is used to impl | ||||
ement flow control, throttling | ||||
down fast senders when receivers cannot keep up. </t> | down fast senders when receivers cannot keep up. </t> | |||
<t pn="section-3.3.4-2">MPTCP also uses a unique receive window, share | ||||
<t>MPTCP also uses a unique receive window, shared between the subflow | d between the subflows. The idea is to allow any | |||
s. The idea is to allow any | subflow to send data as long as the receiver is willing to accept it. The | |||
subflow to send data as long as the receiver is willing to accept it. The altern | alternative -- maintaining per-subflow | |||
ative, maintaining per subflow | receive windows -- could end up stalling some subflows while others would not us | |||
receive windows, could end up stalling some subflows while others would not use | e up their window.</t> | |||
up their window.</t> | <t pn="section-3.3.4-3">The receive window is relative to the DATA_ACK | |||
. As in TCP, a receiver <bcp14>MUST NOT</bcp14> shrink the right edge of the rec | ||||
<t>The receive window is relative to the DATA_ACK. As in TCP, a receiv | eive window (i.e., DATA_ACK + receive window). The receiver will | |||
er MUST NOT shrink the right edge of the receive window (i.e., DATA_ACK + receiv | ||||
e window). The receiver will | ||||
use the data sequence number to tell if a packet should be accepted at the conne ction level.</t> | use the data sequence number to tell if a packet should be accepted at the conne ction level.</t> | |||
<t pn="section-3.3.4-4">When deciding to accept packets at the subflow | ||||
<t>When deciding to accept packets at subflow level, regular TCP check | level, regular TCP checks | |||
s | ||||
the sequence number in the packet against the allowed receive window. | the sequence number in the packet against the allowed receive window. | |||
With multipath, such a check is done using only the connection-level window. A s | With MPTCP, such a check is done using only the connection-level window. A sanit | |||
anity | y | |||
check SHOULD be performed at subflow level to ensure that the subflow and mapped | check <bcp14>SHOULD</bcp14> be performed at the subflow level to ensure that the | |||
sequence | subflow and mapped sequence | |||
numbers meet the following test: SSN - SUBFLOW_ACK <= DSN - DATA_ACK, where S SN is the subflow sequence number of the received packet and SUBFLOW_ACK is the RCV.NXT (next expected sequence number) of the subflow (with the equivalent conn ection-level definitions for DSN and DATA_ACK).</t> | numbers meet the following test: SSN - SUBFLOW_ACK <= DSN - DATA_ACK, where S SN is the subflow sequence number of the received packet and SUBFLOW_ACK is the RCV.NXT (next expected sequence number) of the subflow (with the equivalent conn ection-level definitions for DSN and DATA_ACK).</t> | |||
<t pn="section-3.3.4-5">In regular TCP, once a segment is deemed in-wi | ||||
<t>In regular TCP, once a segment is deemed in-window, it is put either | ndow, it is put in either | |||
in the in-order receive queue or in the out-of-order queue. | the in-order receive queue or the out-of-order queue. | |||
In Multipath TCP, the same happens but at the connection level: a segment | In Multipath TCP, the same thing happens, but at the connection level: a segment | |||
is placed in the connection level in-order or out-of-order queue if | is placed in the connection-level in-order or out-of-order queue if | |||
it is in-window at both connection and subflow levels. | it is in-window at both the connection level and the subflow level. | |||
The stack still has to remember, for each subflow, which segments were | The stack still has to remember, for each subflow, which segments were | |||
received successfully so that it can ACK them at subflow level appropriately. | received successfully so that it can ACK them at the subflow level appropriately | |||
Typically, this will be implemented by keeping per subflow out-of-order | . | |||
queues (containing only message headers, not the payloads) and remembering | Typically, this will be implemented by keeping per-subflow out-of-order | |||
queues (containing only message headers -- not the payloads) and remembering | ||||
the value of the cumulative ACK. | the value of the cumulative ACK. | |||
</t> | </t> | |||
<t pn="section-3.3.4-6">It is important for implementers to understand | ||||
<t>It is important for implementers to understand how large | how large | |||
a receiver buffer is appropriate. The lower bound for full | a receive buffer is appropriate. The lower bound for full | |||
network utilization is the maximum bandwidth-delay product | network utilization is the maximum bandwidth-delay product | |||
of any one of the paths. However, this might be insufficient | of any one of the paths. However, this might be insufficient | |||
when a packet is lost on a slower subflow and needs to be | when a packet is lost on a slower subflow and needs to be | |||
retransmitted (see <xref target="sec_retransmit"/>). A tight | retransmitted (see <xref target="sec_retransmit" format="default" sect ionFormat="of" derivedContent="Section 3.3.6"/>). A tight | |||
upper bound would be the maximum round-trip time (RTT) of any path mul tiplied | upper bound would be the maximum round-trip time (RTT) of any path mul tiplied | |||
by the total bandwidth available across all paths. This | by the total bandwidth available across all paths. This | |||
permits all subflows to continue at full speed while a | permits all subflows to continue at full speed while a | |||
packet is fast-retransmitted on the maximum RTT path. Even | packet is fast-retransmitted on the maximum RTT path. Even | |||
this might be insufficient to maintain full performance in | this might be insufficient to maintain full performance in | |||
the event of a retransmit timeout on the maximum RTT path. | the event of a retransmit timeout on the maximum RTT path. | |||
It is for future study to determine the relationship between | Determining the relationship between | |||
retransmission strategies and receive buffer sizing.</t> | retransmission strategies and receive buffer sizing is left for future | |||
study.</t> | ||||
</section> | </section> | |||
<section anchor="sec_sender" numbered="true" toc="include" removeInRFC=" | ||||
<section title="Sender Considerations" anchor="sec_sender"> | false" pn="section-3.3.5"> | |||
<t>The sender remembers receiver window advertisements from the receiv | <name slugifiedName="name-sender-considerations">Sender Considerations | |||
er. It should only update its local receive window values when the largest seque | </name> | |||
nce number allowed (i.e., DATA_ACK + receive window) increases, on the receipt o | <t pn="section-3.3.5-1">The sender remembers receive window advertisem | |||
f a DATA_ACK. This is important to allow using paths with different RTTs, and th | ents from the | |||
us different feedback loops. </t> | receiver. It should only update its local receive window values when | |||
the largest sequence number allowed (i.e., DATA_ACK + receive | ||||
<t>MPTCP uses a single receive window across all subflows, and if the | window) increases on the receipt of a DATA_ACK. This is important | |||
receive window was guaranteed to be unchanged end-to-end, a host could always re | for allowing the use of paths with different RTTs and thus different f | |||
ad the most recent receive window value. However, some classes of middleboxes ma | eedback loops. </t> | |||
y alter the TCP-level receive window. Typically, these will | <t pn="section-3.3.5-2">MPTCP uses a single receive window across all | |||
subflows, and if | ||||
the receive window was guaranteed to be unchanged end to end, a host c | ||||
ould always read the most recent receive window value. However, some classes of | ||||
middleboxes may alter the TCP-level receive window. Typically, these will | ||||
shrink the offered window, although for short periods of time it may be possible for the window to be larger (however, | shrink the offered window, although for short periods of time it may be possible for the window to be larger (however, | |||
note that this would not continue for long periods since ultimately the middlebo x must keep up with | note that this would not continue for long periods, since ultimately the middleb ox must keep up with | |||
delivering data to the receiver). Therefore, if receive window sizes differ on m ultiple subflows, | delivering data to the receiver). Therefore, if receive window sizes differ on m ultiple subflows, | |||
when sending data MPTCP SHOULD take the largest of the most recent window sizes as the one to use in calculations. | when sending data MPTCP <bcp14>SHOULD</bcp14> take the largest of the most recen t window sizes as the one to use in calculations. | |||
This rule is implicit in the requirement not to reduce the right edge of the win dow.</t> | This rule is implicit in the requirement not to reduce the right edge of the win dow.</t> | |||
<t pn="section-3.3.5-3">The sender <bcp14>MUST</bcp14> also remember t | ||||
<t>The sender MUST also remember the receive windows advertised by eac | he receive windows advertised by each subflow. | |||
h subflow. | ||||
The allowed window for subflow i is (ack_i, ack_i + rcv_wnd_i), where ack_i is t he | The allowed window for subflow i is (ack_i, ack_i + rcv_wnd_i), where ack_i is t he | |||
subflow-level cumulative ACK of subflow i. This ensures data will not be sent to a middlebox | subflow-level cumulative ACK of subflow i. This ensures that data will not be se nt to a middlebox | |||
unless there is enough buffering for the data. </t> | unless there is enough buffering for the data. </t> | |||
<t pn="section-3.3.5-4">Putting the two rules together, we get the fol | ||||
<t>Putting the two rules together, we get the following: a sender is a | lowing: a sender is allowed to send | |||
llowed to send | ||||
data segments with data-level sequence numbers between (DATA_ACK, DATA_ACK + rec eive_window). | data segments with data-level sequence numbers between (DATA_ACK, DATA_ACK + rec eive_window). | |||
Each of these segments will be mapped onto subflows, as long as subflow sequence numbers | Each of these segments will be mapped onto subflows, as long as subflow sequence numbers | |||
are in the allowed windows for those subflows. Note that subflow sequence number s do not | are in the allowed windows for those subflows. Note that subflow sequence number s do not | |||
generally affect flow control if the same receive window is advertised across al l subflows. | generally affect flow control if the same receive window is advertised across al l subflows. | |||
They will perform flow control for those subflows with a smaller advertised rece ive window. | They will perform flow control for those subflows with a smaller advertised rece ive window. | |||
</t> | </t> | |||
<t pn="section-3.3.5-5">The send buffer <bcp14>MUST</bcp14>, at a mini | ||||
<t>The send buffer MUST, at a minimum, be as big as the receive buffer | mum, be as big as the receive buffer, to enable the sender to reach maximum thro | |||
, to enable the sender to reach maximum throughput.</t> | ughput.</t> | |||
</section> | </section> | |||
<section anchor="sec_retransmit" numbered="true" toc="include" removeInR | ||||
<section title="Reliability and Retransmissions" anchor="sec_retransmit" | FC="false" pn="section-3.3.6"> | |||
> | <name slugifiedName="name-reliability-and-retransmiss">Reliability and | |||
Retransmissions</name> | ||||
<t>The data sequence mapping allows senders to resend data with the sa | <t pn="section-3.3.6-1">The Data Sequence Mapping allows senders to re | |||
me data sequence number on a different subflow. When doing this, a host MUST sti | send data with the | |||
ll retransmit the original data on the original subflow, in order to preserve th | same data sequence number on a different subflow. When doing this, a | |||
e subflow integrity (middleboxes could replay old data, and/or could reject hole | host <bcp14>MUST</bcp14> still retransmit the original data on the | |||
s in subflows), and a receiver will ignore these retransmissions. While this is | original subflow, in order to preserve the subflow's integrity | |||
clearly suboptimal, for compatibility reasons this is sensible behavior. Optimiz | (middleboxes could replay old data and/or could reject holes in | |||
ations could be negotiated in future versions of this protocol. Note also that t | subflows), and a receiver will ignore these retransmissions. While | |||
his property would also permit a sender to always send the same data, with the s | this is clearly suboptimal, for compatibility reasons this is | |||
ame data sequence number, on multiple subflows, if desired for reliability reaso | sensible behavior. Optimizations could be negotiated in future | |||
ns.</t> | versions of this protocol. Note also that this property would also per | |||
mit a sender to always send the same data, with the same data sequence number, o | ||||
<t>This protocol specification does not mandate any mechanisms for han | n multiple subflows, if desired for reliability reasons.</t> | |||
dling retransmissions, and much will be dependent upon local policy | <t pn="section-3.3.6-2">This protocol specification does not mandate a | |||
(as discussed in <xref target="sec_policy"/>). One can imagine aggressive connec | ny mechanisms for handling retransmissions, and much will be dependent upon loca | |||
tion-level retransmissions policies where every packet lost at subflow level is | l policy | |||
retransmitted on | (as discussed in <xref target="sec_policy" format="default" sectionFormat="of" d | |||
a different subflow (hence, wasting bandwidth but possibly reducing application- | erivedContent="Section 3.3.8"/>). One can imagine aggressive connection-level re | |||
to-application delays), or conservative retransmission policies where connection | transmission policies where every packet lost at the subflow level is retransmit | |||
-level retransmits | ted on | |||
a different subflow (hence wasting bandwidth but possibly reducing application-t | ||||
o-application delays) or conservative retransmission policies where connection-l | ||||
evel retransmissions | ||||
are only used after a few subflow-level retransmission timeouts occur.</t> | are only used after a few subflow-level retransmission timeouts occur.</t> | |||
<t pn="section-3.3.6-3">It is envisaged that a standard connection-lev | ||||
<t>It is envisaged that a standard connection-level retransmission mec | el retransmission mechanism | |||
hanism | ||||
would be implemented around a connection-level data queue: all segments that hav en't | would be implemented around a connection-level data queue: all segments that hav en't | |||
been DATA_ACKed are stored. A timer is set when | been DATA_ACKed are stored. A timer is set when | |||
the head of the connection-level is ACKed at subflow level but its corresponding | the head of the connection level is ACKed at the subflow level but is not DATA_A | |||
data | CKed at the data level. This timer will guard against retransmission failures | |||
is not ACKed at data level. This timer will guard against failures in retransmis | ||||
sion | ||||
by middleboxes that proactively ACK data.</t> | by middleboxes that proactively ACK data.</t> | |||
<t pn="section-3.3.6-4">The sender <bcp14>MUST</bcp14> keep data in it | ||||
<t>The sender MUST keep data in its send buffer as long as the data ha | s send buffer as | |||
s not been acknowledged at both connection level and on all subflows on which it | long as the data has not been acknowledged both (1) at the | |||
connection level and (2) on all subflows on which it | ||||
has been sent. In this way, the sender can always retransmit the data if needed, on the same subflow or on a different one. A special case is when a subflow fai ls: the sender | has been sent. In this way, the sender can always retransmit the data if needed, on the same subflow or on a different one. A special case is when a subflow fai ls: the sender | |||
will typically resend the data on other working subflows after a timeout, and wi | will typically resend the data on other working subflows after a timeout and wil | |||
ll keep trying to retransmit the data | l keep trying to retransmit the data | |||
on the failed subflow too. The sender will declare the subflow failed after a pr | on the failed subflow too. The sender will declare the subflow failed after a pr | |||
edefined upper bound on retransmissions is reached (which MAY be lower than the | edefined upper bound on retransmissions is reached (which <bcp14>MAY</bcp14> be | |||
usual TCP limits of the Maximum Segment Life), or on the receipt of an ICMP erro | lower than the usual TCP limits of the MSL) or on the receipt of an ICMP error, | |||
r, and only then delete the outstanding data segments. </t> | and only then delete the outstanding data segments. </t> | |||
<t pn="section-3.3.6-5">If multiple retransmissions that indicate that | ||||
<t>If multiple retransmissions are triggered that indicate that a subf | a | |||
low performs badly, this MAY lead to a host resetting the subflow with a RST. Ho | subflow is performing badly are triggered, this <bcp14>MAY</bcp14> lea | |||
wever, additional research is required to understand the heuristics of how and w | d to a host resetting the subflow with a RST. However, additional research is re | |||
hen to reset underperforming subflows. For example, a highly asymmetric path may | quired to understand the heuristics of how and when to reset underperforming sub | |||
be misdiagnosed as underperforming. A RST for this purpose SHOULD be accompanie | flows. For example, a highly asymmetric path may be misdiagnosed as underperform | |||
d with an "Unacceptable performance" MP_TCPRST option (<xref target="sec_reset"/ | ing. A RST for this purpose <bcp14>SHOULD</bcp14> be accompanied by an "Unaccept | |||
>).</t> | able performance" MP_TCPRST option (<xref target="sec_reset" format="default" se | |||
ctionFormat="of" derivedContent="Section 3.6"/>).</t> | ||||
</section> | </section> | |||
<section anchor="sec_cc" numbered="true" toc="include" removeInRFC="fals | ||||
<section title="Congestion Control Considerations" anchor="sec_cc"> | e" pn="section-3.3.7"> | |||
<t>Different subflows in an MPTCP connection have different congestion | <name slugifiedName="name-congestion-control-consider">Congestion Cont | |||
windows. | rol Considerations</name> | |||
<t pn="section-3.3.7-1">Different subflows in an MPTCP connection have | ||||
different congestion windows. | ||||
To achieve fairness at bottlenecks and resource pooling, it is necessary to coup le the | To achieve fairness at bottlenecks and resource pooling, it is necessary to coup le the | |||
congestion windows in use on each subflow, in order to push most traffic to unco ngested links. | congestion windows in use on each subflow, in order to push most traffic to unco ngested links. | |||
One algorithm for achieving this is presented in <xref target="RFC6356"/>; | One algorithm for achieving this is presented in <xref target="RFC6356" format=" default" sectionFormat="of" derivedContent="RFC6356"/>; | |||
the algorithm does not achieve perfect resource pooling but is "safe" in that it is readily | the algorithm does not achieve perfect resource pooling but is "safe" in that it is readily | |||
deployable in the current Internet. By this, we mean that it does not take up mo re capacity | deployable in the current Internet. By this we mean that it does not take up mor e capacity | |||
on any one path than if it was a single path flow using only that route, so this ensures | on any one path than if it was a single path flow using only that route, so this ensures | |||
fair coexistence with single-path TCP at shared bottlenecks.</t> | fair coexistence with single-path TCP at shared bottlenecks.</t> | |||
<t pn="section-3.3.7-2">It is foreseeable that different congestion co | ||||
<t>It is foreseeable that different congestion controllers will be imp | ntrollers will be | |||
lemented for MPTCP, each aiming to achieve different properties in the resource | implemented for MPTCP, each aiming to achieve different properties | |||
pooling/fairness/stability design space, as well as those for achieving differen | in the resource pooling / fairness / stability design space, as well a | |||
t properties in quality of service, reliability, and resilience.</t> | s those for achieving different properties in quality of service, reliability, a | |||
nd resilience.</t> | ||||
<t>Regardless of the algorithm used, | <t pn="section-3.3.7-3">Regardless of the algorithm used, | |||
the design of the MPTCP protocol aims to provide the congestion control implemen | the design of MPTCP aims to provide the congestion control | |||
tations sufficient information | implementations with sufficient information | |||
to take the right decisions; this information includes, for each subflow, which | to make the right decisions; this information includes, for each subflow, which | |||
packets were lost and when. </t> | packets were lost and when. </t> | |||
</section> | </section> | |||
<section anchor="sec_policy" numbered="true" toc="include" removeInRFC=" | ||||
<section title="Subflow Policy" anchor="sec_policy"> | false" pn="section-3.3.8"> | |||
<t>Within a local MPTCP implementation, a host may use any local polic | <name slugifiedName="name-subflow-policy">Subflow Policy</name> | |||
y it wishes to decide how to share the traffic to be sent over the available pat | <t pn="section-3.3.8-1">Within a local MPTCP implementation, a host ma | |||
hs.</t> | y use any local policy it wishes to decide how to share the traffic to be sent o | |||
<t>In the typical use case, where the goal is to maximize throughput, | ver the available paths.</t> | |||
all available paths will be used simultaneously for data transfer, using coupled | <t pn="section-3.3.8-2">In the typical use case, where the goal is to | |||
congestion control as described in <xref target="RFC6356"/>. It is expected, ho | maximize throughput, all available paths will be used simultaneously for data tr | |||
wever, that other use cases will appear.</t> | ansfer, using coupled congestion control as described in <xref target="RFC6356" | |||
<t>For instance, a possibility is an 'all-or-nothing' approach, i.e., | format="default" sectionFormat="of" derivedContent="RFC6356"/>. It is expected, | |||
have a second path ready for use in the event of | however, that other use cases will appear.</t> | |||
<t pn="section-3.3.8-3">For instance, one possibility is an "all-or-no | ||||
thing" approach, i.e., have a second path ready for use in the event of | ||||
failure of the first path, but alternatives could include entirely saturating on e path before using an additional | failure of the first path, but alternatives could include entirely saturating on e path before using an additional | |||
path (the 'overflow' case). Such choices would be most likely based on the monet ary cost of links, but may also be | path (the "overflow" case). Such choices would be most likely based on the monet ary cost of links but may also be | |||
based on properties such as the delay or jitter of links, where stability (of de lay or bandwidth) is more important than throughput. Application | based on properties such as the delay or jitter of links, where stability (of de lay or bandwidth) is more important than throughput. Application | |||
requirements such as these are discussed in detail in <xref target="RFC6897"/>.< | requirements such as these are discussed in detail in <xref target="RFC6897" for | |||
/t> | mat="default" sectionFormat="of" derivedContent="RFC6897"/>.</t> | |||
<t>The ability to make effective choices at the sender requires full k | <t pn="section-3.3.8-4">The ability to make effective choices at the s | |||
nowledge of the path "cost", which | ender requires full knowledge of the path "cost", which | |||
is unlikely to be the case. It would be desirable for a receiver to be able to s ignal their own preferences for paths, | is unlikely to be the case. It would be desirable for a receiver to be able to s ignal their own preferences for paths, | |||
since they will often be the multihomed party, and may have to pay for metered i | since they will often be the multihomed party and may have to pay for metered in | |||
ncoming bandwidth.</t> | coming bandwidth.</t> | |||
<t>To enable this, the MP_JOIN option (see <xref target="sec_join"/>) | <t pn="section-3.3.8-5">To enable this behavior, the MP_JOIN option (s | |||
contains the 'B' bit, which allows a host to indicate to its peer that this path | ee <xref target="sec_join" format="default" sectionFormat="of" derivedContent="S | |||
should be treated as a backup path to use only in the event of failure of other | ection 3.2"/>) contains the "B" bit, | |||
working subflows (i.e., a subflow where the receiver has indicated B=1 SHOULD N | which allows a host to indicate to its peer that this path should be | |||
OT be used to send data unless there are no usable subflows where B=0).</t> | treated as a backup path to use only in the event of failure of | |||
<t>In the event that the available set of paths changes, a host may wi | other working subflows (i.e., a subflow where the receiver has | |||
sh to signal a change in priority of subflows to the peer (e.g., a subflow that | indicated that B=1 <bcp14>SHOULD NOT</bcp14> be used to send data unle | |||
was previously set as backup should now take priority over all remaining subflow | ss there are no usable subflows where B=0).</t> | |||
s). Therefore, the MP_PRIO option, shown in <xref target="tcpm_prio"/>, can be u | <t pn="section-3.3.8-6">In the event that the available set of paths c | |||
sed to change the 'B' flag of the subflow on which it is sent.</t> | hanges, a host may | |||
<t>Another use of the MP_PRIO option is to set the 'B' flag on a subfl | wish to signal a change in priority of subflows to the peer (e.g., a | |||
ow to cleanly retire its use before closing it and removing it with REMOVE_ADDR | subflow that was previously set as a backup should now take priority | |||
<xref target="sec_remove_addr"/>, for example to support make-before-break sessi | over all remaining subflows). Therefore, the MP_PRIO option, shown | |||
on continuity, where new subflows are added before the previously used ones are | in <xref target="tcpm_prio" format="default" sectionFormat="of" derive | |||
closed.</t> | dContent="Figure 11"/>, can be used to | |||
<?rfc needLines='8'?> | change the "B" flag of the subflow on which it is sent.</t> | |||
<figure align="center" anchor="tcpm_prio" title="Change Subflow Priori | <figure anchor="tcpm_prio" align="left" suppress-title="false" pn="fig | |||
ty (MP_PRIO) Option"> | ure-11"> | |||
<artwork align="left"><![CDATA[ | <name slugifiedName="name-change-subflow-priority-mp_">Change Subflo | |||
1 2 3 | w Priority (MP_PRIO) Option</name> | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | <artwork align="left" name="" type="" alt="" pn="section-3.3.8-7.1"> | |||
+---------------+---------------+-------+-----+-+ | 1 2 3 | |||
| Kind | Length |Subtype|(rsv)|B| | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+---------------+---------------+-------+-----+-+ | +---------------+---------------+-------+-----+-+ | |||
]]></artwork> | | Kind | Length |Subtype|(rsv)|B| | |||
+---------------+---------------+-------+-----+-+ </artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.3.8-8">Another use of the MP_PRIO option is to set th | ||||
<t>It should be noted that the backup flag is a request from a data receiver to | e "B" flag on a | |||
a data sender only, and the data sender SHOULD adhere to these requests. A host | subflow to cleanly "retire" its use before closing it and removing it | |||
cannot assume that the data sender will do so, however, since local policies -- | with REMOVE_ADDR (<xref target="sec_remove_addr" format="default" sect | |||
or technical difficulties -- may override MP_PRIO requests. Note also that this | ionFormat="of" derivedContent="Section 3.4.2"/>) -- for example, to support make | |||
signal applies to a single direction, and so the sender of this option could cho | -before-break session continuity, where new subflows are added before the previo | |||
ose to continue using the subflow to send data even if it has signaled B=1 to th | usly used subflows are closed.</t> | |||
e other host.</t> | <t pn="section-3.3.8-9">It should be noted that the backup flag is a r | |||
equest from a data receiver to a data sender only, and the data sender <bcp14>SH | ||||
OULD</bcp14> adhere to these requests. A host cannot assume that the data sender | ||||
will do so, however, since local policies -- or technical difficulties -- may o | ||||
verride MP_PRIO requests. Note also that this signal applies to a single directi | ||||
on, and so the sender of this option could choose to continue using the subflow | ||||
to send data even if it has signaled B=1 to the other host.</t> | ||||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sec_pm" numbered="true" toc="include" removeInRFC="false" | ||||
<section title="Address Knowledge Exchange (Path Management)" anchor="sec_ | pn="section-3.4"> | |||
pm"> | <name slugifiedName="name-address-knowledge-exchange-">Address Knowledge | |||
<t>We use the term "path management" to refer to the exchange of informa | Exchange (Path Management)</name> | |||
tion about additional paths between hosts, which in this design is managed by mu | <t pn="section-3.4-1">We use the term "path management" to refer to the | |||
ltiple addresses at hosts. For more detail of the architectural thinking behind | exchange of information about additional paths between hosts, which in this desi | |||
this design, see the MPTCP Architecture document <xref target="RFC6182"/>.</t> | gn is managed by multiple addresses at hosts. For more details regarding the arc | |||
<t>This design makes use of two methods of sharing such | hitectural thinking behind this design, see the MPTCP architecture document <xre | |||
f target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/ | ||||
>.</t> | ||||
<t pn="section-3.4-2">This design makes use of two methods of sharing su | ||||
ch | ||||
information, and both can be used on a connection. | information, and both can be used on a connection. | |||
The first is the direct | The first is the direct | |||
setup of new subflows, already described in | setup of new subflows (described in | |||
<xref target="sec_join"/>, where the initiator has an | <xref target="sec_join" format="default" sectionFormat="of" derivedConte | |||
additional address. The second method, described in the | nt="Section 3.2"/>), where the initiator has an | |||
following subsections, signals addresses explicitly to the | additional address. The second method (described in the | |||
following subsections) signals addresses explicitly to the | ||||
other host to allow it to initiate new subflows. The | other host to allow it to initiate new subflows. The | |||
two mechanisms are complementary: the first is implicit and | two mechanisms are complementary: the first is implicit and | |||
simple, while the explicit is more complex but is more | simple, while the second (explicit) is more complex but is more | |||
robust. Together, the mechanisms allow addresses to change in | robust. Together, these mechanisms allow addresses to change in | |||
flight (and thus support operation through NATs, since the | flight (and thus support operation through NATs, since the | |||
source address need not be known), and also allow the | source address need not be known); they also allow the | |||
signaling of previously unknown addresses, and of addresses | signaling of previously unknown addresses and of addresses | |||
belonging to other address families (e.g., both IPv4 and IPv6).</t> | belonging to other address families (e.g., both IPv4 and IPv6).</t> | |||
<t pn="section-3.4-3">Here is an example of typical operation of the pro | ||||
<t>Here is an example of typical operation of the protocol: | tocol: | |||
<list style="symbols"> | </t> | |||
<t>An MPTCP connection is initially set up between address/port A1 o | <ul spacing="normal" bare="false" empty="false" pn="section-3.4-4"> | |||
f Host A | <li pn="section-3.4-4.1">An MPTCP connection is initially set up betwe | |||
and address/port B1 of Host B. If Host A is multihomed and | en address/port A1 of Host A | |||
and address/port B1 of Host B. If Host A is multihomed and | ||||
multiaddressed, it can start an additional subflow from | multiaddressed, it can start an additional subflow from | |||
its address A2 to B1, by sending a SYN with a Join | its address A2 to B1, by sending a SYN with an MP_JOIN | |||
option from A2 to B1, using B's previously declared | option from A2 to B1, using B's previously declared | |||
token for this connection. Alternatively, if B is | token for this connection. Alternatively, if B is | |||
multihomed, it can try to set up a new subflow from B2 to | multihomed, it can try to set up a new subflow from B2 to | |||
A1, using A's previously declared token. In either | A1, using A's previously declared token. In either | |||
case, the SYN will be sent to the port already in use | case, the SYN will be sent to the port already in use | |||
for the original subflow on the receiving host.</t> | for the original subflow on the receiving host.</li> | |||
<li pn="section-3.4-4.2">Simultaneously (or after a timeout), an ADD_A | ||||
<t>Simultaneously (or after a timeout), an ADD_ADDR option | DDR option | |||
(<xref target="sec_add_address"/>) is sent on an existing subflow, informing | (<xref target="sec_add_address" format="default" sectionFormat="of" derivedConte | |||
nt="Section 3.4.1"/>) is sent on an existing subflow, informing | ||||
the receiver of the sender's alternative address(es). The recipient can use | the receiver of the sender's alternative address(es). The recipient can use | |||
this information to open a new subflow to the sender's additional address. | this information to open a new subflow to the sender's additional address(es). | |||
In our example, A will send ADD_ADDR option informing B of address/port A2. | In our example, A will send the ADD_ADDR option informing B of address/port A2. | |||
The mix of using the SYN-based option and the ADD_ADDR option, including | The mix of using the SYN‑based option and the ADD_ADDR option, including | |||
timeouts, is implementation specific and can be tailored to agree with local pol | timeouts, is implementation specific and can be tailored to agree with local pol | |||
icy.</t> | icy.</li> | |||
<li pn="section-3.4-4.3">If subflow A2-B1 is successfully set up, Host | ||||
<t>If subflow A2-B1 is successfully set up, Host B can use the Addre | B can use the Address ID in | |||
ss ID in | the MP_JOIN option to correlate this source address with the ADD_ADDR option tha | |||
the Join option to correlate this with the ADD_ADDR option that will also arrive | t will also arrive on | |||
on | ||||
an existing subflow; now B knows not to open A2-B1, ignoring the ADD_ADDR. | an existing subflow; now B knows not to open A2-B1, ignoring the ADD_ADDR. | |||
Otherwise, if B has not received the A2-B1 MP_JOIN SYN but received the ADD_ADDR , | Otherwise, if B has not received the A2-B1 MP_JOIN SYN but received the ADD_ADDR , | |||
it can try to initiate a new subflow from one or more of its addresses to addres s | it can try to initiate a new subflow from one or more of its addresses to addres s | |||
A2. This permits new sessions to be opened if one host is behind a NAT.</t> | A2. This permits new sessions to be opened if one host is behind a NAT.</li> | |||
</list> | </ul> | |||
<t pn="section-3.4-5"> | ||||
Other ways of using the two signaling mechanisms are possible; for instan ce, | Other ways of using the two signaling mechanisms are possible; for instan ce, | |||
signaling addresses in other address families can only be done explicitly using | signaling addresses in other address families can only be done explicitly | |||
the Add Address option. | using the Add Address (ADD_ADDR) option. | |||
</t> | </t> | |||
<section anchor="sec_add_address" numbered="true" toc="include" removeIn | ||||
<section title="Address Advertisement" anchor="sec_add_address"> | RFC="false" pn="section-3.4.1"> | |||
<t>The Add Address (ADD_ADDR) MPTCP option announces additional addresse | <name slugifiedName="name-address-advertisement">Address Advertisement | |||
s (and optionally, ports) on which a | </name> | |||
host can be reached (<xref target="tcpm_address"/>). | <t pn="section-3.4.1-1">The ADD_ADDR MPTCP option announces additional | |||
addresses (and, optionally, ports) on which a | ||||
host can be reached (<xref target="tcpm_address" format="default" sectionFormat= | ||||
"of" derivedContent="Figure 12"/>). | ||||
This option can be used at any time during a connection, depending on when the | This option can be used at any time during a connection, depending on when the | |||
sender wishes to enable multiple paths and/or when paths become available. As wi | sender wishes to enable multiple paths and/or when paths become available. As w | |||
th all MPTCP | ith all MPTCP | |||
signals, the receiver MUST undertake standard TCP validity checks, e.g. <xref ta | signals, the receiver <bcp14>MUST</bcp14> undertake standard TCP validity | |||
rget="RFC5961"/>, before acting upon it.</t> | checks, e.g., per <xref target="RFC5961" format="default" sectionForma | |||
t="of" derivedContent="RFC5961"/>, before | ||||
<t>Every address has an Address ID that can be used for uniquely identif | acting upon it.</t> | |||
ying the address within a connection for address removal. The Address ID is also | <figure anchor="tcpm_address" align="left" suppress-title="false" pn=" | |||
used to identify MP_JOIN options (see <xref target="sec_join"/>) relating to | figure-12"> | |||
the same address, even when address translators are in use. The Address ID MUST | <name slugifiedName="name-add-address-add_addr-option">Add Address ( | |||
uniquely | ADD_ADDR) Option</name> | |||
identify the address for the sender of the option (within the scope of the conne | <artwork align="left" name="" type="" alt="" pn="section-3.4.1-2.1"> | |||
ction), but the mechanism for | 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+-------+---------------+ | ||||
| Kind | Length |Subtype|(rsv)|E| Address ID | | ||||
+---------------+---------------+-------+-------+---------------+ | ||||
| Address (IPv4: 4 octets / IPv6: 16 octets) | | ||||
+-------------------------------+-------------------------------+ | ||||
| Port (2 octets, optional) | | | ||||
+-------------------------------+ | | ||||
| Truncated HMAC (8 octets, if E=0) | | ||||
| +-------------------------------+ | ||||
| | | ||||
+-------------------------------+ </artwork> | ||||
</figure> | ||||
<t pn="section-3.4.1-3">Every address has an Address ID that can be us | ||||
ed for uniquely identifying the address within a connection for address removal. | ||||
The Address ID is also | ||||
used to identify MP_JOIN options (see <xref target="sec_join" format="default" s | ||||
ectionFormat="of" derivedContent="Section 3.2"/>) relating to | ||||
the same address, even when address translators are in use. The Address ID <bcp1 | ||||
4>MUST</bcp14> uniquely | ||||
identify the address for the sender of the option (within the scope of the conne | ||||
ction); the mechanism for | ||||
allocating such IDs is implementation specific.</t> | allocating such IDs is implementation specific.</t> | |||
<t pn="section-3.4.1-4">All Address IDs learned via either MP_JOIN or | ||||
<t>All address IDs learned via either MP_JOIN or ADD_ADDR | ADD_ADDR | |||
SHOULD be stored by the receiver in a data structure that gathers all th | <bcp14>SHOULD</bcp14> be stored by the receiver in a data structure | |||
e Address ID | that gathers all the Address-ID-to-address mappings for a connection | |||
to address mappings for a connection (identified by a token pair). In th | (identified by a token pair). In this way, there is | |||
is way, there is | a stored mapping between the Address ID, observed source address, and to | |||
a stored mapping between Address ID, observed source address, and token | ken pair for | |||
pair for | ||||
future processing of control information for a connection. Note that an implementation | future processing of control information for a connection. Note that an implementation | |||
MAY discard incoming address advertisements at will, for example, for av | <bcp14>MAY</bcp14> discard incoming address advertisements at will -- fo | |||
oiding updating | r example, to avoid updating | |||
mapping state, or because advertised addresses are of no use to it (for | mapping state or because advertised addresses are of no use to it (for | |||
example, IPv6 addresses when it has IPv4 only). Therefore, a host MUST t | example, IPv6 addresses when it has IPv4 only). Therefore, a host <bcp14 | |||
reat address | >MUST</bcp14> treat address | |||
advertisements as soft state, and it MAY choose to refresh advertisement | advertisements as soft state, and it <bcp14>MAY</bcp14> choose to refres | |||
s periodically. | h advertisements periodically. | |||
Note also that an implementation MAY choose to cache these address adver | Note also that an implementation <bcp14>MAY</bcp14> choose to cache thes | |||
tisements even | e address advertisements even | |||
if they are not currently relevant but may be relevant in the future, su ch as IPv4 | if they are not currently relevant but may be relevant in the future, su ch as IPv4 | |||
addresses when IPv6 connectivity is available but IPv4 is awaiting DHCP. </t> | addresses when IPv6 connectivity is available but IPv4 is awaiting DHCP. </t> | |||
<t pn="section-3.4.1-5">This option is shown in <xref target="tcpm_add | ||||
<t>This option is shown in <xref target="tcpm_address"/>. The illustrati | ress" format="default" sectionFormat="of" derivedContent="Figure 12"/>. The illu | |||
on is sized for | stration is sized for | |||
IPv4 addresses. For IPv6, the length of the address will be 16 octets (i | IPv4 addresses. For IPv6, the length of the address will be 16 octets (i | |||
nstead of 4).</t> | nstead of 4).</t> | |||
<t pn="section-3.4.1-6">The 2 octets that specify the TCP port number | ||||
<t>The 2 octets that specify the TCP port number to use are optional and | to use are optional, and their presence | |||
their presence | ||||
can be inferred from the length of the option. Although it is expected t hat the majority of | can be inferred from the length of the option. Although it is expected t hat the majority of | |||
use cases will use the same port pairs as used for the initial subflow ( e.g., port | use cases will use the same port pairs as those used for the initial sub flow (e.g., port | |||
80 remains port 80 on all subflows, as does the ephemeral port at the cl ient), there | 80 remains port 80 on all subflows, as does the ephemeral port at the cl ient), there | |||
may be cases (such as port-based load balancing) where the explicit spec ification of | may be cases (such as port-based load balancing) where the explicit spec ification of | |||
a different port is required. If no port is specified, MPTCP SHOULD atte | a different port is required. If no port is specified, MPTCP <bcp14>SHOU | |||
mpt to | LD</bcp14> attempt to | |||
connect to the specified address on the same port as is already in use b | connect to the specified address on the same port as the port that is al | |||
y the subflow | ready in use by the subflow | |||
on which the ADD_ADDR signal was sent; this is discussed in more detail | on which the ADD_ADDR signal was sent; this is discussed in more detail | |||
in <xref target="heuristics"/>.</t> | in <xref target="heuristics" format="default" sectionFormat="of" derivedContent= | |||
"Section 3.9"/>.</t> | ||||
<t>The Truncated HMAC present in this Option is the rightmost 64 bits of | <t pn="section-3.4.1-7">The Truncated HMAC parameter present in this o | |||
an HMAC, negotiated and | ption is the rightmost 64 bits of an HMAC, negotiated and | |||
calculated in the same way as for MP_JOIN as described in <xref target=" | calculated in the same way as for MP_JOIN as described in <xref target=" | |||
sec_join"/>. For this | sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>. Fo | |||
r this | ||||
specification of MPTCP, as there is only one hash algorithm option speci fied, this will be HMAC as | specification of MPTCP, as there is only one hash algorithm option speci fied, this will be HMAC as | |||
defined in <xref target="RFC2104"/>, using the SHA-256 hash algorithm <x ref target="RFC6234"/>. | defined in <xref target="RFC2104" format="default" sectionFormat="of" de rivedContent="RFC2104"/>, using the SHA-256 hash algorithm <xref target="RFC6234 " format="default" sectionFormat="of" derivedContent="RFC6234"/>. | |||
In the same way as for MP_JOIN, the key for the HMAC | In the same way as for MP_JOIN, the key for the HMAC | |||
algorithm, in the case of the message transmitted by Host A, will be Key -A followed by Key-B, and in | algorithm, in the case of the message transmitted by Host A, will be Key -A followed by Key-B, and in | |||
the case of Host B, Key-B followed by Key-A. These are the keys that we re exchanged in the original | the case of Host B, Key-B followed by Key-A. These are the keys that we re exchanged in the original | |||
MP_CAPABLE handshake. The message for the HMAC is the Address ID, IP Add ress, and Port which precede | MP_CAPABLE handshake. The message for the HMAC is the Address ID, IP add ress, and port that precede | |||
the HMAC in the ADD_ADDR option. If the port is not present in the ADD_A DDR option, the HMAC message | the HMAC in the ADD_ADDR option. If the port is not present in the ADD_A DDR option, the HMAC message | |||
will nevertheless include two octets of value zero. The rationale for th e HMAC is to | will nevertheless include 2 octets of value zero. The rationale for the HMAC is to | |||
prevent unauthorized entities from injecting ADD_ADDR signals in an atte mpt to hijack a connection. | prevent unauthorized entities from injecting ADD_ADDR signals in an atte mpt to hijack a connection. | |||
Note that additionally the presence of this HMAC prevents the address be | Note that, additionally, the presence of this HMAC prevents the | |||
ing changed in flight unless | address from being changed in flight unless | |||
the key is known by an intermediary. If a host receives an ADD_ADDR opti on for which it cannot | the key is known by an intermediary. If a host receives an ADD_ADDR opti on for which it cannot | |||
validate the HMAC, it SHOULD silently ignore the option.</t> | validate the HMAC, it <bcp14>SHOULD</bcp14> silently ignore the option.< | |||
/t> | ||||
<t>A set of four flags are present after the subtype and before the Addr | <t pn="section-3.4.1-8">A set of four flags is present after the subty | |||
ess ID. Only the rightmost | pe and before the Address ID. Only the rightmost | |||
bit - labelled 'E' - is assigned in this specification. The other bits a | bit -- labeled "E" -- is assigned in this specification. The other | |||
re currently unassigned and MUST | bits are currently unassigned; they <bcp14>MUST</bcp14> | |||
be set to zero by a sender and MUST be ignored by the receiver.</t> | be set to 0 by a sender and <bcp14>MUST</bcp14> be ignored by the receiv | |||
er.</t> | ||||
<t>The 'E' flag exists to provide reliability for this option. Because t | <t pn="section-3.4.1-9">The "E" flag exists to provide reliability for | |||
his option will often be sent | this option. Because this option will often be sent | |||
on pure ACKs, there is no guarantee of reliability. Therefore, a receive r receiving a fresh ADD_ADDR | on pure ACKs, there is no guarantee of reliability. Therefore, a receive r receiving a fresh ADD_ADDR | |||
option (where E=0), will send the same option back to the sender, but no | option (where E=0) will send the same option back to the sender, but not | |||
t including the HMAC, and | including the HMAC and | |||
with E=1, to indicate receipt. The lack of this echo can be used by the | with E=1, to indicate receipt. According to local policy, the lack of | |||
initial ADD_ADDR sender to | this type of "echo" can indicate to the initial ADD_ADDR sender that the | |||
retransmit the ADD_ADDR according to local policy.</t> | ADD_ADDR needs to be retransmitted.</t> | |||
<t pn="section-3.4.1-10">Due to the proliferation of NATs, it is reaso | ||||
<?rfc needLines='11'?> | nably likely that | |||
<figure align="center" anchor="tcpm_address" title="Add Address (ADD_ADD | one host may attempt to advertise private addresses <xref target="RFC1 | |||
R) Option"> | 918" format="default" sectionFormat="of" derivedContent="RFC1918"/>. It is not d | |||
<artwork align="left"><![CDATA[ | esirable to prohibit | |||
1 2 3 | this behavior, since there may be cases where both hosts have additional | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | interfaces on the same private network, and a host | |||
+---------------+---------------+-------+-------+---------------+ | <bcp14>MAY</bcp14> advertise such addresses. The MP_JOIN handshake | |||
| Kind | Length |Subtype|(rsv)|E| Address ID | | to create a new subflow (<xref target="sec_join" format="default" sect | |||
+---------------+---------------+-------+-------+---------------+ | ionFormat="of" derivedContent="Section 3.2"/>) | |||
| Address (IPv4 - 4 octets / IPv6 - 16 octets) | | provides mechanisms to minimize security risks. The MP_JOIN message | |||
+-------------------------------+-------------------------------+ | contains a 32-bit token that uniquely identifies the connection to | |||
| Port (2 octets, optional) | | | the receiving host. If the token is unknown, the host will respond | |||
+-------------------------------+ | | with a RST. In the unlikely event that the token is valid at the | |||
| Truncated HMAC (8 octets, if E=0) | | receiving host, subflow setup will continue, but the HMAC exchange | |||
| +-------------------------------+ | must occur for authentication. The HMAC exchange | |||
| | | will fail and will provide | |||
+-------------------------------+ | sufficient protection against two unconnected hosts accidentally | |||
]]></artwork> | setting up a new subflow upon the signal of a private address. | |||
</figure> | Further security considerations around the issue of ADD_ADDR messages that acci | |||
dentally misdirect, or maliciously direct, new MP_JOIN attempts are discussed in | ||||
<t>Due to the proliferation of NATs, it is reasonably likely that one ho | <xref target="sec_security" format="default" sectionFormat="of" derivedContent= | |||
st may attempt to advertise private addresses <xref target="RFC1918"/>. It is no | "Section 5"/>.</t> | |||
t desirable to prohibit this, since there may be cases where both hosts have add | <t pn="section-3.4.1-11">A host that receives an ADD_ADDR but finds th | |||
itional interfaces on the same private network, and a host MAY advertise such ad | at a connection set up to that IP address and port number is unsuccessful <bcp14 | |||
dresses. The MP_JOIN handshake to create a new subflow (<xref target="sec_join"/ | >SHOULD NOT</bcp14> perform further connection attempts to this address/port co | |||
>) provides mechanisms to minimize security risks. The MP_JOIN message contains | mbination for this connection. A sender that wants to trigger a new incoming con | |||
a 32-bit token that uniquely identifies the connection to the receiving host. If | nection attempt on a previously advertised address/port combination can therefo | |||
the token is unknown, the host will return with a RST. In the unlikely event th | re refresh ADD_ADDR information by sending the option again.</t> | |||
at the token is valid at the receiving host, subflow setup will continue, but th | <t pn="section-3.4.1-12">A host can therefore send an ADD_ADDR message | |||
e HMAC exchange must occur for authentication. This will fail, and will provide | with an | |||
sufficient protection against two unconnected hosts accidentally setting up a ne | already-assigned Address ID, but the address <bcp14>MUST</bcp14> be | |||
w subflow upon the signal of a private address. Further security considerations | the same as the address previously assigned to this Address ID. A | |||
around the issue of ADD_ADDR messages that accidentally misdirect, or maliciousl | new ADD_ADDR may have the same port number or a different port number. | |||
y direct, new MP_JOIN attempts are discussed in <xref target="sec_security"/>.</ | If the port number is different, the receiving host <bcp14>SHOULD</bcp14> try t | |||
t> | o set up a new subflow to this new address/port combination.</t> | |||
<t pn="section-3.4.1-13">A host wishing to replace an existing Address | ||||
<t>A host that receives an ADD_ADDR but finds a connection set up to tha | ID <bcp14>MUST</bcp14> first remove the existing one (<xref target="sec_remove_ | |||
t IP address and port number is unsuccessful SHOULD NOT perform further connecti | addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/>).</t> | |||
on attempts to this address/port combination for this connection. A sender that | <t pn="section-3.4.1-14">During normal MPTCP operation, it is unlikely | |||
wants to trigger a new incoming connection attempt on a previously advertised ad | that there will be sufficient TCP option space for ADD_ADDR to be included alon | |||
dress/port combination can therefore refresh ADD_ADDR information by sending the | g with those for data sequence numbering (<xref target="sec_dsn" format="default | |||
option again.</t> | " sectionFormat="of" derivedContent="Section 3.3.1"/>). Therefore, it is expecte | |||
d that an MPTCP implementation will send the ADD_ADDR option on separate ACKs. A | ||||
<t>A host can therefore send an ADD_ADDR message with an already assigne | s discussed earlier, however, an MPTCP implementation <bcp14>MUST NOT</bcp14> tr | |||
d Address ID, but the Address MUST be the same as previously assigned to this Ad | eat duplicate ACKs with any MPTCP option, with the exception of the DSS option, | |||
dress ID. A new ADD_ADDR may have the same, or different, port number. If the po | as indications of congestion <xref target="RFC5681" format="default" sectionForm | |||
rt number is different, the receiving host SHOULD try to set up a new subflow to | at="of" derivedContent="RFC5681"/>, and an MPTCP implementation <bcp14>SHOULD NO | |||
this new address/port combination.</t> | T</bcp14> send more than two duplicate ACKs in a row for signaling purposes.</t> | |||
</section> | ||||
<t>A host wishing to replace an existing Address ID MUST first remove th | <section anchor="sec_remove_addr" numbered="true" toc="include" removeIn | |||
e existing one (<xref target="sec_remove_addr"/>).</t> | RFC="false" pn="section-3.4.2"> | |||
<name slugifiedName="name-remove-address">Remove Address</name> | ||||
<t>During normal MPTCP operation, it is unlikely that there will be suff | <t pn="section-3.4.2-1">If, during the lifetime of an MPTCP connection | |||
icient TCP option space for ADD_ADDR to be included along with those for data se | , a previously | |||
quence numbering (<xref target="sec_dsn"/>). Therefore, it is expected that an M | announced address becomes invalid (e.g., if the interface | |||
PTCP implementation will send the ADD_ADDR option on separate ACKs. As discussed | disappears or an IPv6 address is no longer preferred), the affected | |||
earlier, however, an MPTCP implementation MUST NOT treat duplicate ACKs with an | host <bcp14>SHOULD</bcp14> announce this situation so that the peer ca | |||
y MPTCP option, with the exception of the DSS option, as indications of congesti | n remove | |||
on <xref target="RFC5681"/>, and an MPTCP implementation SHOULD NOT send more th | subflows related to this address. Even if an address is not in use | |||
an two duplicate ACKs in a row for signaling purposes.</t> | by an MPTCP connection, if it has been previously announced, an | |||
implementation <bcp14>SHOULD</bcp14> announce its removal. A host | ||||
</section> | <bcp14>MAY</bcp14> also choose to announce that a valid IP address | |||
<section title="Remove Address" anchor="sec_remove_addr"> | should not be used any longer -- for example, for make‑before-break se | |||
<t>If, during the lifetime of an MPTCP connection, a previously announce | ssion continuity.</t> | |||
d address becomes invalid (e.g., if the interface disappears, or an IPv6 address | <t pn="section-3.4.2-2">This is achieved through the Remove Address (R | |||
is no longer preferred), the affected host SHOULD announce this so that the pee | EMOVE_ADDR) option | |||
r can remove subflows related to this address. Even if an address is not in use | (<xref target="tcpm_remove" format="default" sectionFormat="of" derive | |||
by a MPTCP connection, if it has been previously announced, an implementation SH | dContent="Figure 13"/>), which will remove a | |||
OULD announce its removal. A host MAY also choose to announce that a valid IP ad | previously added address (or list of addresses) from a connection | |||
dress should not be used any longer, for example for make-before-break session c | and terminate any subflows currently using that address.</t> | |||
ontinuity.</t> | <figure anchor="tcpm_remove" align="left" suppress-title="false" pn="f | |||
<t>This is achieved through the Remove Address (REMOVE_ADDR) option (<xr | igure-13"> | |||
ef target="tcpm_remove"/>), which will remove a previously added address (or lis | <name slugifiedName="name-remove-address-remove_addr-">Remove Addres | |||
t of addresses) from a connection and terminate any subflows currently using tha | s (REMOVE_ADDR) Option</name> | |||
t address.</t> | <artwork align="left" name="" type="" alt="" pn="section-3.4.2-3.1"> | |||
<t>For security purposes, if a host receives a REMOVE_ADDR option, it mu | 1 2 3 | |||
st ensure the affected path(s) are no longer in use before it instigates closure | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
. The receipt of REMOVE_ADDR SHOULD first trigger the sending of a TCP keepalive | +---------------+---------------+-------+-------+---------------+ | |||
<xref target="RFC1122"/> on the path, and if a response is received the path SH | | Kind |Length = 3 + n |Subtype|(resvd)| Address ID | ... | |||
OULD NOT be removed. If the path is found to still be alive, the receiving host | +---------------+---------------+-------+-------+---------------+ | |||
SHOULD no longer use the specified address for future connections, but it is the | (followed by n-1 Address IDs, if required) </artwork> | |||
responsibility of the host which sent the REMOVE_ADDR to shut down the subflow. | </figure> | |||
The requesting host MAY also use MP_PRIO (<xref target="sec_policy"/>) to reque | <t pn="section-3.4.2-4">For security purposes, if a host receives a RE | |||
st a path is no longer used, before removal. Typical TCP validity tests on the s | MOVE_ADDR option, | |||
ubflow (e.g., ensuring sequence and ACK numbers are correct) MUST also be undert | it must ensure that the affected path or paths are no longer in use | |||
aken. An implementation can use indications of these test failures as part of in | before it instigates closure. The receipt of REMOVE_ADDR | |||
trusion detection or error logging.</t> | <bcp14>SHOULD</bcp14> first trigger the sending of a TCP keepalive | |||
<t>The sending and receipt (if no keepalive response was received) of th | <xref target="RFC1122" format="default" sectionFormat="of" derivedCont | |||
is message SHOULD trigger the sending of RSTs by both hosts on the affected subf | ent="RFC1122"/> on the path, and if a | |||
low(s) (if possible), as a courtesy to cleaning up middlebox state, before clean | response is received, the path <bcp14>SHOULD NOT</bcp14> be | |||
ing up any local state.</t> | removed. If the path is found to still be alive, the receiving host | |||
<t>Address removal is undertaken by ID, so as to permit the use of NATs | <bcp14>SHOULD</bcp14> no longer use the specified address for future | |||
and other middleboxes that rewrite source addresses. If there is no address at t | connections, but it is the responsibility of the host that sent the | |||
he requested ID, the receiver will silently ignore the request.</t> | REMOVE_ADDR to shut down the subflow. Before the address is removed, | |||
<t>A subflow that is still functioning MUST be closed with a FIN exchang | the requesting host | |||
e as in regular TCP, rather than using this option. For more information, see <x | <bcp14>MAY</bcp14> also use MP_PRIO (<xref target="sec_policy" format= | |||
ref target="sec_close"/>.</t> | "default" sectionFormat="of" derivedContent="Section 3.3.8"/>) to request that a | |||
<?rfc needLines='8'?> | path no longer be used. Typical TCP validity tests on the subflow (e.g., ensuri | |||
<figure align="center" anchor="tcpm_remove" title="Remove Address (REMOV | ng | |||
E_ADDR) Option"> | that sequence and ACK numbers are correct) <bcp14>MUST</bcp14> also be | |||
<artwork align="left"><![CDATA[ | undertaken. An implementation can use indications of these test failures as par | |||
1 2 3 | t of intrusion detection or error logging.</t> | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | <t pn="section-3.4.2-5">The sending and receipt (if no keepalive respo | |||
+---------------+---------------+-------+-------+---------------+ | nse was received) | |||
| Kind | Length = 3+n |Subtype|(resvd)| Address ID | ... | of this message <bcp14>SHOULD</bcp14> trigger the sending of RSTs by | |||
+---------------+---------------+-------+-------+---------------+ | both hosts on the affected subflow(s) (if possible), as a courtesy, | |||
(followed by n-1 Address IDs, if required) | to allow the cleanup of middlebox state before cleaning up any local s | |||
]]></artwork> | tate.</t> | |||
</figure> | <t pn="section-3.4.2-6">Address removal is undertaken according to the | |||
</section> | Address ID, so as to | |||
permit the use of NATs and other middleboxes that rewrite source | ||||
addresses. If an Address ID is not known, the receiver will | ||||
silently ignore the request.</t> | ||||
<t pn="section-3.4.2-7">A subflow that is still functioning <bcp14>MUS | ||||
T</bcp14> be closed with a FIN exchange as in regular TCP, rather than using thi | ||||
s option. For more information, see <xref target="sec_close" format="default" se | ||||
ctionFormat="of" derivedContent="Section 3.3.3"/>.</t> | ||||
</section> | ||||
</section> | </section> | |||
<section anchor="sec_fastclose" numbered="true" toc="include" removeInRFC= | ||||
<section title="Fast Close" anchor="sec_fastclose"> | "false" pn="section-3.5"> | |||
<t>Regular TCP has the means of sending a reset (RST) signal to abruptly | <name slugifiedName="name-fast-close">Fast Close</name> | |||
close a connection. With MPTCP, a regular RST only has the scope of the | <t pn="section-3.5-1">Regular TCP has the means of sending a RST signal | |||
subflow | to abruptly | |||
and will only close the concerned subflow but not affect the remaining | close a connection. With MPTCP, a regular RST only has the scope of | |||
the subflow; it | ||||
will only close the applicable subflow and will not affect the remaining | ||||
subflows. MPTCP's connection will stay alive at the data level, in order | subflows. MPTCP's connection will stay alive at the data level, in order | |||
to permit break-before-make handover between subflows. It is therefore | to permit break-before-make handover between subflows. It is therefore | |||
necessary to provide an MPTCP-level "reset" to allow the abrupt closure | necessary to provide an MPTCP-level "reset" to allow the abrupt closure | |||
of the whole MPTCP connection, and this is the MP_FASTCLOSE option.</t> | of the whole MPTCP connection; this is done via the MP_FASTCLOSE option. | |||
</t> | ||||
<t>MP_FASTCLOSE is used to indicate to the peer that the connection will | <t pn="section-3.5-2">MP_FASTCLOSE is used to indicate to the peer that | |||
be | the connection will be | |||
abruptly closed and no data will be accepted anymore. The reasons for | abruptly closed and no data will be accepted anymore. The reasons for | |||
triggering an MP_FASTCLOSE are implementation specific. Regular TCP does | triggering an MP_FASTCLOSE are implementation specific. Regular TCP does | |||
not allow sending a RST while the connection is in a synchronized | not allow the sending of a RST while the connection is in a synchronized | |||
state <xref target="RFC0793"/>. Nevertheless, implementations allow | state <xref target="RFC0793" format="default" sectionFormat="of" derived | |||
the sending of a RST in this state, if, for example, the operating | Content="RFC0793"/>. Nevertheless, implementations allow | |||
the sending of a RST in this state if, for example, the operating | ||||
system is running out of resources. In these cases, MPTCP should send | system is running out of resources. In these cases, MPTCP should send | |||
the MP_FASTCLOSE. This option is illustrated in <xref target="tcpm_fastc | the MP_FASTCLOSE. This option is illustrated in <xref target="tcpm_fastc | |||
lose"/>.</t> | lose" format="default" sectionFormat="of" derivedContent="Figure 14"/>.</t> | |||
<figure anchor="tcpm_fastclose" align="left" suppress-title="false" pn=" | ||||
<?rfc needLines='12'?> | figure-14"> | |||
<figure align="center" anchor="tcpm_fastclose" title="Fast Close (MP_FAS | <name slugifiedName="name-fast-close-mp_fastclose-opt">Fast Close (MP_ | |||
TCLOSE) Option"> | FASTCLOSE) Option</name> | |||
<artwork align="left"><![CDATA[ | <artwork align="left" name="" type="" alt="" pn="section-3.5-3.1"> | |||
1 2 3 | 1 2 3 | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
+---------------+---------------+-------+-----------------------+ | +---------------+---------------+-------+-----------------------+ | |||
| Kind | Length |Subtype| (reserved) | | | Kind | Length |Subtype| (reserved) | | |||
+---------------+---------------+-------+-----------------------+ | +---------------+---------------+-------+-----------------------+ | |||
| Option Receiver's Key | | | Option Receiver's Key | | |||
| (64 bits) | | | (64 bits) | | |||
| | | | | | |||
+---------------------------------------------------------------+ | +---------------------------------------------------------------+ </artwork> | |||
]]></artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.5-4">If Host A wants to force the closure of an MPTCP c | ||||
<t>If Host A wants to force the closure of an MPTCP connection, it has t | onnection, it can | |||
wo | do so via two | |||
different options: | options: | |||
<list style="symbols"> | </t> | |||
<t>Option A (ACK) : Host A sends an ACK containing the MP_FASTCLOSE | <ul spacing="normal" bare="false" empty="false" pn="section-3.5-5"> | |||
option on one subflow, containing the key of Host B as declared in | <li pn="section-3.5-5.1">Option A (ACK): Host A sends an ACK containin | |||
the initial connection handshake. On all the other subflows, Host A | g the MP_FASTCLOSE | |||
sends a regular TCP RST to close these subflows, and tears them down. | option on one subflow, containing the key of Host B as declared in | |||
Host A now enters FASTCLOSE_WAIT state.</t> | the initial connection handshake. On all the other subflows, Host A | |||
sends a regular TCP RST to close these subflows and tears them down. | ||||
<t>Option R (RST) : Host A sends a RST containing the MP_FASTCLOSE | Host A now enters FASTCLOSE_WAIT state.</li> | |||
option on all subflows, containing the key of Host B as declared in | <li pn="section-3.5-5.2">Option R (RST): Host A sends a RST containing | |||
the initial connection handshake. Host A can tear the subflows and | the MP_FASTCLOSE | |||
the connection down immediately.</t> | option on all subflows, containing the key of Host B as declared in | |||
</list> | the initial connection handshake. Host A can tear down the subflows | |||
</t> | and | |||
the connection immediately.</li> | ||||
<t>If host A decides to force the closure by using Option A and sending | </ul> | |||
an ACK with the MP_FASTCLOSE option, the connection shall proceed as foll | <t pn="section-3.5-6">If Host A decides to force the closure by using Op | |||
ows: | tion A and sending | |||
<list style="symbols"> | an ACK with the MP_FASTCLOSE option, the connection shall proceed as fol | |||
<t>Upon receipt of an ACK with MP_FASTCLOSE by Host B, containing th | lows: | |||
e valid key, Host B answers | </t> | |||
on the same subflow with a TCP RST and tears down all subflows also | <ul spacing="normal" bare="false" empty="false" pn="section-3.5-7"> | |||
through sending TCP RST signals. Host B can | <li pn="section-3.5-7.1">Upon receipt of an ACK with MP_FASTCLOSE by H | |||
now close the whole MPTCP connection (it transitions directly to CLO | ost B, containing the valid key, Host B answers | |||
SED state).</t> | on the same subflow with a TCP RST and tears down all subflows | |||
also through sending TCP RST signals. Host B can | ||||
<t>As soon as Host A has received the TCP RST on the remaining subfl | now close the whole MPTCP connection (it transitions directly to CLO | |||
ow, it | SED state).</li> | |||
<li pn="section-3.5-7.2">As soon as Host A has received the TCP RST on | ||||
the remaining subflow, it | ||||
can close this subflow and tear down the whole connection (transitio n from | can close this subflow and tear down the whole connection (transitio n from | |||
FASTCLOSE_WAIT to CLOSED states). If Host A receives an MP_FASTCLOSE instead | FASTCLOSE_WAIT state to CLOSED state). If Host A receives an MP_FAST CLOSE instead | |||
of a TCP RST, both hosts attempted fast closure simultaneously. Host A should | of a TCP RST, both hosts attempted fast closure simultaneously. Host A should | |||
reply with a TCP RST and tear down the connection.</t> | reply with a TCP RST and tear down the connection.</li> | |||
<li pn="section-3.5-7.3">If Host A does not receive a TCP RST in reply | ||||
<t>If Host A does not receive a TCP RST in reply to its MP_FASTCLOSE | to its MP_FASTCLOSE after one | |||
after one | retransmission timeout (RTO) (the RTO of the subflow where the MP_FA | |||
retransmission timeout (RTO) (the RTO of the subflow where the MP_FA | STCLOSE has been sent), it <bcp14>SHOULD</bcp14> | |||
STCLOSE has been sent), it SHOULD | retransmit the MP_FASTCLOSE. To keep this connection from being | |||
retransmit the MP_FASTCLOSE. The number of retransmissions SHOULD be | retained for a long time, the number of retransmissions <bcp14>SHOUL | |||
limited to avoid this connection from being retained for a long time | D</bcp14> be | |||
, but | limited; | |||
this limit is implementation specific. A RECOMMENDED number is 3. If | this limit is implementation specific. A <bcp14>RECOMMENDED</bcp14> | |||
no TCP RST | number is 3. If no TCP RST | |||
is received in response, Host A SHOULD send a TCP RST with the MP_FA | is received in response, Host A <bcp14>SHOULD</bcp14> send a TCP RST | |||
STCLOSE option | with the MP_FASTCLOSE option | |||
itself when it releases state in order to clear any remaining state a | itself when it releases state in order to clear any remaining state | |||
t middleboxes.</t> | at middleboxes.</li> | |||
</list> | </ul> | |||
</t> | <t pn="section-3.5-8">If, however, Host A decides to force the closure b | |||
y using Option R and | ||||
<t>If however host A decides to force the closure by using Option R and | sending a RST with the MP_FASTCLOSE option, Host B will act as follows: | |||
sending a RST with the MP_FASTCLOSE option, Host B will act as follows: | upon receipt of a RST with MP_FASTCLOSE, containing the valid key, | |||
Upon receipt of a RST with MP_FASTCLOSE, containing the valid key, | Host B tears down all subflows by sending a TCP RST. Host B can now clos | |||
Host B tears down all subflows by sending a TCP RST. Host B can now close | e the whole MPTCP | |||
the whole MPTCP | connection (it transitions directly to CLOSED state).</t> | |||
connection (it transitions directly to CLOSED state).</t> | ||||
</section> | </section> | |||
<section anchor="sec_reset" numbered="true" toc="include" removeInRFC="fal | ||||
<section title="Subflow Reset" anchor="sec_reset"> | se" pn="section-3.6"> | |||
<t>An implementation of MPTCP may also need to send a regular TCP RST to | <name slugifiedName="name-subflow-reset">Subflow Reset</name> | |||
force | <t pn="section-3.6-1">An implementation of MPTCP may also need to send a | |||
the closure of a subflow. A host sends a TCP RST in order to close a subf | regular TCP RST to force | |||
low | the closure of a subflow. A host sends a TCP RST in order to close a sub | |||
or reject an attempt to open a subflow (MP_JOIN). In order to inform the | flow | |||
receiving host why a subflow is being closed or rejected, the TCP RST pac | or reject an attempt to open a subflow (MP_JOIN). In order to let the | |||
ket | receiving host know why a subflow is being closed or rejected, the TCP R | |||
MAY include the MP_TCPRST Option. The host MAY use this information to | ST packet | |||
decide, for example, whether it tries to re-establish the subflow | <bcp14>MAY</bcp14> include the MP_TCPRST option (<xref target="tcpm_rese | |||
immediately, later, or never.</t> | t" format="default" sectionFormat="of" derivedContent="Figure 15"/>). The host < | |||
bcp14>MAY</bcp14> use this information to | ||||
<?rfc needLines='8'?> | decide, for example, whether it tries to re-establish the subflow | |||
<figure align="center" anchor="tcpm_reset" title="TCP RST Reason (MP_TCP | immediately, later, or never.</t> | |||
RST) Option"> | <figure anchor="tcpm_reset" align="left" suppress-title="false" pn="figu | |||
<artwork align="left"><![CDATA[ | re-15"> | |||
1 2 3 | <name slugifiedName="name-tcp-rst-reason-mp_tcprst-op">TCP RST Reason | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | (MP_TCPRST) Option</name> | |||
+---------------+---------------+-------+-----------------------+ | <artwork align="left" name="" type="" alt="" pn="section-3.6-2.1"> | |||
| Kind | Length |Subtype|U|V|W|T| Reason | | 1 2 3 | |||
+---------------+---------------+-------+-----------------------+ | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
]]></artwork> | +---------------+---------------+-------+-----------------------+ | |||
| Kind | Length |Subtype|U|V|W|T| Reason | | ||||
+---------------+---------------+-------+-----------------------+ </artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.6-3">The MP_TCPRST option contains a reason code that a | ||||
<t>The MP_TCPRST option contains a reason code that allows the | llows the | |||
sender of the option to provide more information about the reason for | sender of the option to provide more information about the reason for | |||
the termination of the subflow. Using 12 bits of option space, the | the termination of the subflow. Using 12 bits of option space, the | |||
first four bits are reserved for flags (only one of which is currently | first 4 bits are reserved for flags (only one of which is currently | |||
defined), and the remaining octet is used to express a reason code for | defined), and the remaining octet is used to express a reason code for | |||
this subflow termination, from which a receiver MAY infer information | this subflow termination, from which a receiver <bcp14>MAY</bcp14> infer information | |||
about the usability of this path.</t> | about the usability of this path.</t> | |||
<t pn="section-3.6-4">The "T" flag is used by the sender to indicate whe | ||||
<t>The "T" flag is used by the sender to indicate whether the error | ther the error | |||
condition that is reported is Transient (T bit set to 1) or Permanent | condition that is reported is Transient ("T" bit set to 1) or Permanent | |||
(T bit set to 0). If the error condition is considered to be | ("T" bit set to 0). If the error condition is considered to be | |||
Transient by the sender of the RST segment, the recipient of this | Transient by the sender of the RST segment, the recipient of this | |||
segment MAY try to reestablish a subflow for this connection over the | segment <bcp14>MAY</bcp14> try to re-establish a subflow for this connec | |||
failed path. The time at which a receiver may try to re-establish this | tion over the | |||
is implementation-specific, but SHOULD take into account the properties | failed path. The time at which a receiver may try to | |||
of the failure defined by the following reason code. If the error condi | re‑establish this subflow | |||
tion | is implementation specific but <bcp14>SHOULD</bcp14> take into account t | |||
is considered to be permanent, the receiver of the RST segment SHOULD NO | he properties | |||
T try | of the failure as defined by the provided reason code. If the error con | |||
to reestablish a subflow for this connection over this path. The "U", " | dition | |||
V" | is considered to be Permanent, the receiver of the RST segment <bcp14>SH | |||
OULD NOT</bcp14> try | ||||
to re‑establish a subflow for this connection over this path. The "U", | ||||
"V", | ||||
and "W" flags are not defined by this specification and are reserved for | and "W" flags are not defined by this specification and are reserved for | |||
future use. An implementation of this specification MUST set these flags | future use. An implementation of this specification <bcp14>MUST</bcp14> | |||
to 0, and a receiver MUST ignore them.</t> | set these flags | |||
to 0, and a receiver <bcp14>MUST</bcp14> ignore them.</t> | ||||
<t>The "Reason" code is an 8-bit field that indicates the reason for | <t pn="section-3.6-5">"Reason" is an 8-bit field that indicates the reas | |||
on code for | ||||
the termination of the subflow. The following codes are defined in | the termination of the subflow. The following codes are defined in | |||
this document: | this document: | |||
<list style="symbols"> | </t> | |||
<t>Unspecified error (code 0x0). This is the default error implying | <ul spacing="normal" bare="false" empty="false" pn="section-3.6-6"> | |||
the | <li pn="section-3.6-6.1">Unspecified error (code 0x00). This is the d | |||
efault error; | ||||
it implies that the | ||||
subflow is no longer available. The presence of this option shows | subflow is no longer available. The presence of this option shows | |||
that the RST was generated by a MPTCP-aware device.</t> | that the RST was generated by an MPTCP-aware device.</li> | |||
<li pn="section-3.6-6.2">MPTCP-specific error (code 0x01). An error h | ||||
<t>MPTCP specific error (code 0x01). An error has been detected in | as been detected in the | |||
the | ||||
processing of MPTCP options. This is the usual reason code to retur n | processing of MPTCP options. This is the usual reason code to retur n | |||
in the cases where a RST is being sent to close a subflow for reason | in the cases where a RST is being sent to close a subflow because | |||
s | of an invalid response.</li> | |||
of an invalid response.</t> | <li pn="section-3.6-6.3">Lack of resources (code 0x02). This code ind | |||
icates that the | ||||
<t>Lack of resources (code 0x02). This code indicates that the | ||||
sending host does not have enough resources to support the | sending host does not have enough resources to support the | |||
terminated subflow.</t> | terminated subflow.</li> | |||
<li pn="section-3.6-6.4">Administratively prohibited (code 0x03). Thi | ||||
<t>Administratively prohibited (code 0x03). This code indicates tha | s code indicates that | |||
t | ||||
the requested subflow is prohibited by the policies of the sending | the requested subflow is prohibited by the policies of the sending | |||
host.</t> | host.</li> | |||
<li pn="section-3.6-6.5">Too much outstanding data (code 0x04). This | ||||
<t>Too much outstanding data (code 0x04). This code indicates that | code indicates that | |||
there is an excessive amount of data that need to be transmitted | there is an excessive amount of data that needs to be transmitted | |||
over the terminated subflow while having already been acknowledged | over the terminated subflow while having already been acknowledged | |||
over one or more other subflows. This may occur if a path has been | over one or more other subflows. This may occur if a path has been | |||
unavailable for a short period and it is more efficient to reset and | unavailable for a short period and it is more efficient to reset and | |||
start again than it is to retransmit the queued data.</t> | start again than it is to retransmit the queued data.</li> | |||
<li pn="section-3.6-6.6">Unacceptable performance (code 0x05). This c | ||||
<t>Unacceptable performance (code 0x05). This code indicates that | ode indicates that | |||
the performance of this subflow was too low compared to the other | the performance of this subflow was too low compared to the other | |||
subflows of this Multipath TCP connection.</t> | subflows of this Multipath TCP connection.</li> | |||
<li pn="section-3.6-6.7">Middlebox interference (code 0x06). Middlebo | ||||
<t>Middlebox interference (code 0x06). Middlebox interference has | x interference has | |||
been detected over this subflow making MPTCP signaling invalid. For | been detected over this subflow, making MPTCP signaling invalid. Fo | |||
example, this may be sent if the checksum does not validate.</t> | r | |||
</list> | example, this may be sent if the checksum does not validate.</li> | |||
</t> | </ul> | |||
</section> | </section> | |||
<section anchor="sec_fallback" numbered="true" toc="include" removeInRFC=" | ||||
<section title="Fallback" anchor="sec_fallback"> | false" pn="section-3.7"> | |||
<t>Sometimes, middleboxes will exist on a path that could prevent the op | <name slugifiedName="name-fallback">Fallback</name> | |||
eration of MPTCP. MPTCP has been designed in order to cope with many middlebox m | <t pn="section-3.7-1">Sometimes, middleboxes will exist on a path that c | |||
odifications (see <xref target="sec_middleboxes"/>), but there are still some ca | ould prevent the | |||
ses where a subflow could fail to operate within the MPTCP requirements. These c | operation of MPTCP. MPTCP has been designed to cope with many | |||
ases are notably the following: the loss of MPTCP options on a path, and the mod | middlebox modifications (see <xref target="sec_middleboxes" format="defa | |||
ification of payload data. If such an event occurs, it is necessary to "fall bac | ult" sectionFormat="of" derivedContent="Section 6"/>), but there are still some | |||
k" to the previous, safe operation. This may be either falling back to regular T | cases where a subflow | |||
CP or removing a problematic subflow.</t> | could fail to operate within the MPTCP requirements. Notably, these case | |||
s are the following: the loss of MPTCP options on a path and the modification of | ||||
<t>At the start of an MPTCP connection (i.e., the first subflow), it is | payload data. If such an event occurs, it is necessary to "fall back" to the pr | |||
important to ensure that the path is fully MPTCP capable and the necessary MPTCP | evious, safe operation. This may be either falling back to regular TCP or removi | |||
options can reach each host. The handshake as described in <xref target="sec_in | ng a problematic subflow.</t> | |||
it"/> SHOULD fall back to regular TCP if either of the SYN messages do not have | <t pn="section-3.7-2">At the start of an MPTCP connection (i.e., the fir | |||
the MPTCP options: this is the same, and desired, behavior in the case where a h | st subflow), it is important to ensure that the path is fully MPTCP capable and | |||
ost is not MPTCP capable, or the path does not support the MPTCP options. When a | the necessary MPTCP options can reach each host. The handshake as described in < | |||
ttempting to join an existing MPTCP connection (<xref target="sec_join"/>), if a | xref target="sec_init" format="default" sectionFormat="of" derivedContent="Secti | |||
path is not MPTCP capable and the MPTCP options do not get through on the SYNs, | on 3.1"/> <bcp14>SHOULD</bcp14> fall back to regular TCP if either of the SYN me | |||
the subflow will be closed according to the MP_JOIN logic.</t> | ssages does not have the MPTCP options: this is the same, and desired, behavior | |||
in the case where a host is not MPTCP capable or the path does not support the M | ||||
<t>There is, however, another corner case that should be addressed. That | PTCP options. When attempting to join an existing MPTCP connection (<xref target | |||
is one of MPTCP options getting through on the SYN, but not on regular packets. | ="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>), | |||
This can be resolved if the subflow is the first subflow, and thus all data in | if a path is not MPTCP capable and the MPTCP options do not get through on the | |||
flight is contiguous, using the following rules.</t> | SYNs, the subflow will be closed according to the MP_JOIN logic.</t> | |||
<t pn="section-3.7-3">There is, however, another corner case that should | ||||
<t>A sender MUST include a DSS option with data sequence mapping in ever | be addressed: | |||
y segment until one of the sent segments has been acknowledged with a DSS option | the case where MPTCP options get through on the SYN but not on regular | |||
containing a Data ACK. Upon reception of the acknowledgment, the sender has the | packets. If the subflow is the first subflow and thus all data in | |||
confirmation that the DSS option passes in both directions and may choose to se | flight is contiguous, this situation can be resolved by using the follow | |||
nd fewer DSS options than once per segment.</t> | ing rules:</t> | |||
<ul spacing="normal" bare="false" empty="false" pn="section-3.7-4"> | ||||
<t>If, however, an ACK is received for data (not just for the SYN) witho | <li pn="section-3.7-4.1">A sender <bcp14>MUST</bcp14> include a DSS op | |||
ut a DSS option containing a Data ACK, the sender determines the path is not MPT | tion with Data Sequence Mapping in every segment until one of the sent segments | |||
CP capable. In the case of this occurring on an additional subflow (i.e., one st | has been acknowledged with a DSS option containing a Data ACK. Upon reception of | |||
arted with MP_JOIN), the host MUST close the subflow with a RST, which SHOULD co | the acknowledgment, the sender has the confirmation that the DSS option passes | |||
ntain a MP_TCPRST option (<xref target="sec_reset"/>) with a "Middlebox interfer | in both directions and may choose to send fewer DSS options than once per segmen | |||
ence" reason code.</t> | t.</li> | |||
<li pn="section-3.7-4.2">If, however, an ACK is received for data (not | ||||
<t>In the case of such an ACK being received on the first subflow (i.e., | just for the SYN) | |||
that started with MP_CAPABLE), before any additional subflows are added, the im | without a DSS option containing a Data ACK, the sender determines that t | |||
plementation MUST drop out of an MPTCP mode, back to regular TCP. The sender wil | he path is not MPTCP capable. In the case of this occurring on an additional sub | |||
l send one final data sequence mapping, with the Data-Level Length value of 0 in | flow (i.e., one started with MP_JOIN), the host <bcp14>MUST</bcp14> close the su | |||
dicating an infinite mapping (to inform the other end in case the path drops opt | bflow with a RST, which <bcp14>SHOULD</bcp14> contain an MP_TCPRST option (<xref | |||
ions in one direction only), and then revert to sending data on the single subfl | target="sec_reset" format="default" sectionFormat="of" derivedContent="Section | |||
ow without any MPTCP options.</t> | 3.6"/>) with a "Middlebox interference" reason code.</li> | |||
<li pn="section-3.7-4.3">In the case of such an ACK being received on | ||||
<t>If a subflow breaks during operation, e.g. if it is re-routed and MPT | the first subflow | |||
CP options are no longer permitted, then once this is detected (by the subflow-l | (i.e., that started with MP_CAPABLE), before any additional subflows | |||
evel receive buffer filling up, since there is no mapping available in order to | are added, the implementation <bcp14>MUST</bcp14> drop out of MPTCP | |||
DATA_ACK this data), the subflow SHOULD be treated as broken and closed with a R | mode and fall back to regular TCP. The sender will send one final Data S | |||
ST, since no data can be delivered to the application layer, and no fallback sig | equence Mapping, with the Data-Level Length value of 0 indicating an infinite ma | |||
nal can be reliably sent. This RST SHOULD include the MP_TCPRST option (<xref ta | pping (to inform the other end in case the path drops options in one direction o | |||
rget="sec_reset"/>) with a "Middlebox interference" reason code.</t> | nly), and then revert to sending data on the single subflow without any MPTCP op | |||
tions.</li> | ||||
<t>These rules should cover all cases where such a failure could happen: | <li pn="section-3.7-4.4">If a subflow breaks during operation, e.g., i | |||
whether it's on the forward or reverse path and whether the server or the clien | f it is rerouted and | |||
t first sends data.</t> | MPTCP options are no longer permitted, then once this is detected (by | |||
the subflow-level receive buffer filling up, since there is no mapping | ||||
<t>So far this section has discussed the loss of MPTCP options, either i | available in order to DATA_ACK this data), the subflow | |||
nitially, or during the course of the connection. As described in <xref target=" | <bcp14>SHOULD</bcp14> be treated as broken and closed with a RST, | |||
sec_generalop"/>, each portion of data for which there is a mapping is protected | since no data can be delivered to the application layer and no | |||
by a checksum, if checksums have been negotiated. This mechanism is used to det | fallback signal can be reliably sent. This RST <bcp14>SHOULD</bcp14> | |||
ect if middleboxes have made any adjustments to the payload (added, removed, or | include the MP_TCPRST option (<xref target="sec_reset" format="default" | |||
changed data). A checksum will fail if the data has been changed in any way. Thi | sectionFormat="of" derivedContent="Section 3.6"/>) with a "Middlebox interferenc | |||
s will also detect if the length of data on the subflow is increased or decrease | e" reason code.</li> | |||
d, and this means the data sequence mapping is no longer valid. The sender no lo | </ul> | |||
nger knows what subflow-level sequence number the receiver is genuinely operatin | <t pn="section-3.7-5">These rules should cover all cases where such a fa | |||
g at (the middlebox will be faking ACKs in return), and it cannot signal any fur | ilure could | |||
ther mappings. Furthermore, in addition to the possibility of payload modificati | happen -- whether it's on the forward or reverse path and whether the se | |||
ons that are valid at the application layer, there is the possibility that such | rver or the client first sends data.</t> | |||
modifications could be triggered across MPTCP segment boundaries, corrupting the | <t pn="section-3.7-6">So far, this section has discussed the loss of MPT | |||
data. Therefore, all data from the start of the segment that failed the checksu | CP options, | |||
m onwards is not trustworthy.</t> | either initially or during the course of the connection. As described | |||
in <xref target="sec_generalop" format="default" sectionFormat="of" deri | ||||
<t>Note that if checksum usage has not been negotiated, this fallback me | vedContent="Section 3.3"/>, each portion of | |||
chanism cannot be used unless there is some higher or lower layer signal to info | data for which there is a mapping is protected by a checksum, if | |||
rm the MPTCP implementation that the payload has been tampered with.</t> | checksums have been negotiated. This mechanism is used to detect if | |||
middleboxes have made any adjustments to the payload (added, removed, | ||||
<t>When multiple subflows are in use, the data in flight on a subflow wi | or changed data). A checksum will fail if the data has been changed in | |||
ll likely involve data that is not contiguously part of the connection-level str | any way. The use of a checksum will also detect whether the length of da | |||
eam, since segments will be spread across the multiple subflows. Due to the prob | ta on the subflow is | |||
lems identified above, it is not possible to determine what adjustment has done | increased or decreased, and this means the Data Sequence Mapping is no | |||
to the data (notably, any changes to the subflow sequence numbering). Therefore, | longer valid. The sender no longer knows what subflow-level sequence | |||
it is not possible to recover the subflow, and the affected subflow must be imm | number the receiver is genuinely operating at (the middlebox will be | |||
ediately closed with a RST, featuring an MP_FAIL option (<xref target="tcpm_fall | faking ACKs in return), and it cannot signal any further | |||
back"/>), which defines the data sequence number at the start of the segment (de | mappings. Furthermore, in addition to the possibility of payload | |||
fined by the data sequence mapping) that had the checksum failure. Note that the | modifications that are valid at the application layer, it is possible th | |||
MP_FAIL option requires the use of the full 64-bit sequence number, even if 32- | at such modifications could be triggered across MPTCP segment boundaries, corrup | |||
bit sequence numbers are normally in use in the DSS signals on the path.</t> | ting the data. Therefore, all data from the start of the segment that failed the | |||
checksum onward is not trustworthy.</t> | ||||
<?rfc needLines='8'?> | <t pn="section-3.7-7">Note that if checksum usage has not been negotiate | |||
<figure align="center" anchor="tcpm_fallback" title="Fallback (MP_FAIL) | d, this fallback mechanism cannot be used unless there is some higher-layer or l | |||
Option"> | ower‑layer signal to inform the MPTCP implementation that the payload has been t | |||
<artwork align="left"><![CDATA[ | ampered with.</t> | |||
1 2 3 | <t pn="section-3.7-8">When multiple subflows are in use, the data in fli | |||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ght on a subflow | |||
+---------------+---------------+-------+----------------------+ | will likely involve data that is not contiguously part of the | |||
| Kind | Length=12 |Subtype| (reserved) | | connection-level stream, since segments will be spread across the | |||
+---------------+---------------+-------+----------------------+ | multiple subflows. Due to the problems identified above, it is not | |||
| | | possible to determine what adjustments have been done to the data (notab | |||
| Data Sequence Number (8 octets) | | ly, | |||
| | | any changes to the subflow sequence numbering). Therefore, it is not | |||
+--------------------------------------------------------------+ | possible to recover the subflow, and the affected subflow must be | |||
immediately closed with a RST that includes an MP_FAIL option (<xref tar | ||||
]]></artwork> | get="tcpm_fallback" format="default" sectionFormat="of" derivedContent="Figure 1 | |||
6"/>), which defines the data sequence number at the start of the segment (defin | ||||
ed by the Data Sequence Mapping) that had the checksum failure. Note that the MP | ||||
_FAIL option requires the use of the full 64-bit sequence number, even if 32-bit | ||||
sequence numbers are normally in use in the DSS signals on the path.</t> | ||||
<figure anchor="tcpm_fallback" align="left" suppress-title="false" pn="f | ||||
igure-16"> | ||||
<name slugifiedName="name-fallback-mp_fail-option">Fallback (MP_FAIL) | ||||
Option</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-3.7-9.1"> | ||||
1 2 3 | ||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||||
+---------------+---------------+-------+----------------------+ | ||||
| Kind | Length=12 |Subtype| (reserved) | | ||||
+---------------+---------------+-------+----------------------+ | ||||
| | | ||||
| Data Sequence Number (8 octets) | | ||||
| | | ||||
+--------------------------------------------------------------+ </artwork> | ||||
</figure> | </figure> | |||
<t pn="section-3.7-10">The receiver of this option <bcp14>MUST</bcp14> d | ||||
iscard all data following the data sequence number specified. | ||||
Failed data <bcp14>MUST NOT</bcp14> be DATA_ACKed and so will be retrans | ||||
mitted on other subflows (<xref target="sec_retransmit" format="default" section | ||||
Format="of" derivedContent="Section 3.3.6"/>). </t> | ||||
<t pn="section-3.7-11">A special case is when there is a single subflow | ||||
and it fails with a checksum error. If it is known that all unacknowledged data | ||||
in | ||||
flight is contiguous (which will usually be the case with a single | ||||
subflow), an infinite mapping can be applied to the subflow without | ||||
the need to close it first, essentially turning off all further | ||||
MPTCP signaling. | ||||
<t>The receiver of this option MUST discard all data following the data | In this case, if a receiver identifies a checksum failure | |||
sequence number specified. | ||||
Failed data MUST NOT be DATA_ACKed and so will be retransmitted on other | ||||
subflows (<xref target="sec_retransmit"/>). </t> | ||||
<t>A special case is when there is a single subflow and it fails with a | ||||
checksum error. | ||||
If it is known that all unacknowledged data in flight is | ||||
contiguous (which will usually be the case with a single subflow), an infinite m | ||||
apping can be applied to the subflow without the need to close it first, and | ||||
essentially turn off all further MPTCP signaling. In this case, if a receiver id | ||||
entifies a checksum failure | ||||
when there is only one path, it will send back an MP_FAIL option on the subflow- level ACK, referring to the data-level sequence number of the start of the | when there is only one path, it will send back an MP_FAIL option on the subflow- level ACK, referring to the data-level sequence number of the start of the | |||
segment on which the checksum error was detected. The sender will receive | segment on which the checksum error was detected. The sender will receive | |||
this, and if all unacknowledged data in flight is contiguous, will signal an inf | this information and, if all unacknowledged data in flight is contiguous, will s | |||
inite mapping. | ignal an infinite mapping. | |||
This infinite mapping will be a DSS option (<xref target="sec_generalop"/>) | This infinite mapping will be a DSS option (<xref target="sec_generalop" format= | |||
on the first new packet, containing a data sequence mapping that acts retroactiv | "default" sectionFormat="of" derivedContent="Section 3.3"/>) | |||
ely, referring to the start of the subflow sequence | on the first new packet, containing a Data Sequence Mapping that acts retroactiv | |||
number of the most recent segment that was known to be delivered intact (i.e. wa | ely, referring to the start of the subflow sequence | |||
s successfully DATA_ACKed). From that point onwards, data can be altered | number of the most recent segment that was known to be delivered intact (i.e., w | |||
as successfully DATA_ACKed). From that point onward, data can be altered | ||||
by a middlebox without affecting MPTCP, as the data stream is equivalent to a re gular, legacy TCP session. | by a middlebox without affecting MPTCP, as the data stream is equivalent to a re gular, legacy TCP session. | |||
Whilst in theory paths may only be damaged in one direction, and the MP_FAIL sig | While in theory paths may only be damaged in one direction -- and the MP_FAIL | |||
nal affects only one direction of traffic, | signal affects only one direction of traffic -- | |||
for implementation simplicity, the receiver of an MP_FAIL MUST also respond with | for simplicity of implementation, the receiver of an MP_FAIL <bcp14>MUST</bcp14> | |||
an MP_FAIL in the reverse direction and entirely revert to a regular TCP sessio | also respond with an MP_FAIL in the reverse direction and entirely revert to a | |||
n.</t> | regular TCP session.</t> | |||
<t pn="section-3.7-12">In the rare case that the data is not contiguous | ||||
<t>In the rare case that the data is not contiguous (which could happen | (which could happen when there is only one subflow but it is retransmitting data | |||
when there is only one subflow but it is retransmitting data from a subflow | from a subflow | |||
that has recently been uncleanly closed), the receiver MUST close the subflow wi | that has recently been uncleanly closed), the receiver <bcp14>MUST</bcp14> close | |||
th a RST with MP_FAIL. The receiver MUST discard all data that follows the | the subflow with a RST with MP_FAIL. The receiver <bcp14>MUST</bcp14> discard a | |||
data sequence number specified. The sender MAY attempt to create a new subflow b | ll data that follows the | |||
elonging to the same connection, and, if it chooses to do so, SHOULD place | data sequence number specified. The sender <bcp14>MAY</bcp14> attempt to | |||
the single subflow immediately in single-path mode by setting an infinite data s | create a new subflow belonging to the same connection and, if it chooses to do | |||
equence mapping. This mapping will begin from the data-level sequence number | so, <bcp14>SHOULD</bcp14> immediately place | |||
the single subflow in single-path mode by setting an infinite Data Sequence Mapp | ||||
ing. This mapping will begin from the data-level sequence number | ||||
that was declared in the MP_FAIL.</t> | that was declared in the MP_FAIL.</t> | |||
<t pn="section-3.7-13">After a sender signals an infinite mapping, it <b | ||||
<t>After a sender signals an infinite mapping, it MUST only use subflow | cp14>MUST</bcp14> only use subflow ACKs to clear its send buffer. | |||
ACKs to clear its send buffer. | ||||
This is because Data ACKs may become misaligned with the subflow ACKs when middl eboxes insert or delete data. | This is because Data ACKs may become misaligned with the subflow ACKs when middl eboxes insert or delete data. | |||
The receive SHOULD stop generating Data ACKs after it receives an infinite mappi | The receiver <bcp14>SHOULD</bcp14> stop generating Data ACKs after it receives | |||
ng. </t> | an infinite mapping.</t> | |||
<t pn="section-3.7-14">When a connection has fallen back with an infinit | ||||
<t>When a connection has fallen back with an infinite mapping, only one | e mapping, only one subflow can send data; otherwise, the receiver would not kno | |||
subflow can send data; otherwise, the receiver would not know how to reorder the | w how to reorder the data. In practice, this means that all MPTCP subflows will | |||
data. In practice, this means that all MPTCP subflows will have to be terminate | have to be terminated except one. Once MPTCP falls back to regular TCP, it <bcp1 | |||
d except one. Once MPTCP falls back to regular TCP, it MUST NOT revert to MPTCP | 4>MUST NOT</bcp14> revert to MPTCP later in the connection.</t> | |||
later in the connection.</t> | <t pn="section-3.7-15">It should be emphasized that MPTCP is not attempt | |||
ing to prevent the use of middleboxes that want to adjust the payload. An MPTCP- | ||||
<t>It should be emphasized that MPTCP is not attempting to prevent the u | aware middlebox could provide such functionality by also rewriting checksums.</t | |||
se of middleboxes that want to adjust the payload. An MPTCP-aware middlebox coul | > | |||
d provide such functionality by also rewriting checksums.</t> | ||||
</section> | </section> | |||
<section anchor="sec_errors" numbered="true" toc="include" removeInRFC="fa | ||||
<section title="Error Handling" anchor="sec_errors"> | lse" pn="section-3.8"> | |||
<t>In addition to the fallback mechanism as described above, the standar | <name slugifiedName="name-error-handling">Error Handling</name> | |||
d classes of TCP errors may need to be handled in an MPTCP-specific way. Note th | <t pn="section-3.8-1">In addition to the fallback mechanism described ab | |||
at changing semantics -- such as the relevance of a RST -- are covered in <xref | ove, the standard classes of TCP errors may need to be handled in an MPTCP‑speci | |||
target="sec_semantics"/>. Where possible, we do not want to deviate from regular | fic way. Note that changing semantics -- such as the relevance of a RST -- are c | |||
TCP behavior.</t> | overed in <xref target="sec_semantics" format="default" sectionFormat="of" deriv | |||
<t>The following list covers possible errors and the appropriate MPTCP b | edContent="Section 4"/>. Where possible, we do not want to deviate from regular | |||
ehavior: | TCP behavior.</t> | |||
<list style="symbols"> | <t pn="section-3.8-2">The following list covers possible errors and the | |||
<t>Unknown token in MP_JOIN (or HMAC failure in MP_JOIN ACK, or miss | appropriate MPTCP behavior: | |||
ing MP_JOIN in SYN/ACK response): send RST (analogous to TCP's behavior on an un | ||||
known port)</t> | ||||
<t>DSN out of window (during normal operation): drop the data, do no | ||||
t send Data ACKs</t> | ||||
<t>Remove request for unknown address ID: silently ignore</t> | ||||
</list> | ||||
</t> | </t> | |||
<ul spacing="normal" bare="false" empty="false" pn="section-3.8-3"> | ||||
<li pn="section-3.8-3.1">Unknown token in MP_JOIN (or HMAC failure in | ||||
MP_JOIN ACK, or missing MP_JOIN in SYN/ACK response): send RST (analogous to TCP | ||||
's behavior on an unknown port)</li> | ||||
<li pn="section-3.8-3.2">DSN out of window (during normal operation): | ||||
drop the data; do not send Data ACKs</li> | ||||
<li pn="section-3.8-3.3">Remove request for unknown Address ID: silent | ||||
ly ignore</li> | ||||
</ul> | ||||
</section> | </section> | |||
<section anchor="heuristics" numbered="true" toc="include" removeInRFC="fa | ||||
<section title="Heuristics" anchor="heuristics"> | lse" pn="section-3.9"> | |||
<name slugifiedName="name-heuristics">Heuristics</name> | ||||
<t>There are a number of heuristics that are needed for | <t pn="section-3.9-1">There are a number of heuristics that are needed f | |||
or | ||||
performance or deployment but that are not required for | performance or deployment but that are not required for | |||
protocol correctness. In this section, we detail such | protocol correctness. In this section, we detail such | |||
heuristics. Note that discussion of buffering and certain | heuristics. Note that discussions of buffering and certain | |||
sender and receiver window behaviors are presented in Sections | sender and receiver window behaviors are presented in Sections | |||
<xref target="sec_rwin" format="counter"/> and <xref target="sec_sender" | <xref target="sec_rwin" format="counter" sectionFormat="of" derivedConte | |||
format="counter"/>, | nt="3.3.4"/> and <xref target="sec_sender" format="counter" sectionFormat="of" d | |||
as well as retransmission in <xref target="sec_retransmit"/>.</t> | erivedContent="3.3.5"/>, | |||
and retransmission is discussed in <xref target="sec_retransmit" format= | ||||
<section title="Port Usage"> | "default" sectionFormat="of" derivedContent="Section 3.3.6"/>.</t> | |||
<t>Under typical operation, an MPTCP implementation SHOULD use | <section numbered="true" toc="include" removeInRFC="false" pn="section-3 | |||
the same ports as already in use. In other words, the | .9.1"> | |||
destination port of a SYN containing an MP_JOIN option SHOULD | <name slugifiedName="name-port-usage">Port Usage</name> | |||
<t pn="section-3.9.1-1">Under typical operation, an MPTCP implementati | ||||
on <bcp14>SHOULD</bcp14> use | ||||
the same ports as the ports that are already in use. In other words, t | ||||
he | ||||
destination port of a SYN containing an MP_JOIN option <bcp14>SHOULD</ | ||||
bcp14> | ||||
be the same as the remote port of the first subflow in the | be the same as the remote port of the first subflow in the | |||
connection. The local port for such SYNs SHOULD also be the | connection. The local port for such SYNs <bcp14>SHOULD</bcp14> also b | |||
same as for the first subflow (and as such, an | e the | |||
implementation SHOULD reserve ephemeral ports across all | same as the port for the first subflow (and as such, an | |||
implementation <bcp14>SHOULD</bcp14> reserve ephemeral ports across al | ||||
l | ||||
local IP addresses), although there may be cases where this | local IP addresses), although there may be cases where this | |||
is infeasible. This strategy is intended to maximize the | is infeasible. This strategy is intended to maximize the | |||
probability of the SYN being permitted by a firewall or NAT | probability of the SYN being permitted by a firewall or NAT | |||
at the recipient and to avoid confusing any network | at the recipient and to avoid confusing any network-monitoring softwar | |||
monitoring software.</t> | e.</t> | |||
<t pn="section-3.9.1-2">There may also be cases, however, where a host | ||||
<t>There may also be cases, however, where a host wishes to | wishes to | |||
signal that a specific port should be used, and this facility | signal that a specific port should be used; this facility | |||
is provided in the ADD_ADDR option as documented in | is provided in the ADD_ADDR option as documented in | |||
<xref target="sec_add_address"/>. It is therefore feasible | <xref target="sec_add_address" format="default" sectionFormat="of" der ivedContent="Section 3.4.1"/>. It is therefore feasible | |||
to allow multiple subflows between the same two addresses | to allow multiple subflows between the same two addresses | |||
but using different port pairs, and | but using different port pairs, and | |||
such a facility could be used to allow load balancing within | such a facility could be used to allow load balancing within | |||
the network based on 5-tuples (e.g., some ECMP implementations <xref t arget="RFC2992"/>).</t> | the network based on 5-tuples (e.g., some ECMP implementations <xref t arget="RFC2992" format="default" sectionFormat="of" derivedContent="RFC2992"/>). </t> | |||
</section> | </section> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-3 | ||||
<section title="Delayed Subflow Start and Subflow Symmetry"> | .9.2"> | |||
<t>Many TCP connections are short-lived and consist only of a few | <name slugifiedName="name-delayed-subflow-start-and-s">Delayed Subflow | |||
segments, and so the overheads | Start and Subflow Symmetry</name> | |||
of using MPTCP outweigh any benefits. A heuristic is required, | <t pn="section-3.9.2-1">Many TCP connections are short-lived and consi | |||
st only of a few | ||||
segments, and so the overhead | ||||
of using MPTCP outweighs any benefits. A heuristic is required, | ||||
therefore, to decide when to start using additional subflows in | therefore, to decide when to start using additional subflows in | |||
an MPTCP connection. Experimental deployments have shown that | an MPTCP connection. Experimental deployments have shown that | |||
MPTCP can be applied in a range of scenarios so an implementation | MPTCP can be applied in a range of scenarios, so an implementation | |||
is likely to need to take into account factors including the type of | will likely need to take into account such factors as the type of | |||
traffic being sent and duration of session, and this information | traffic being sent and the duration of the session; this information | |||
MAY be signalled by the application layer.</t> | <bcp14>MAY</bcp14> be signaled by the application layer.</t> | |||
<t pn="section-3.9.2-2">However, for standard TCP traffic, a suggested | ||||
<t>However, for standard TCP traffic, a suggested general-purpose | general-purpose | |||
heuristic that an implementation MAY choose to employ is as follows.</ | heuristic that an implementation <bcp14>MAY</bcp14> choose to employ i | |||
t> | s as follows.</t> | |||
<t pn="section-3.9.2-3">If a host has data buffered for its peer (whic | ||||
<t>If a host has data buffered for its peer (which implies that the | h implies that the | |||
application has received a request for data), the host opens one | application has received a request for data), the host opens one | |||
subflow for each initial window's worth of data that is buffered.</t> | subflow for each initial window's worth of data that is buffered.</t> | |||
<t pn="section-3.9.2-4">Consideration should also be given to limiting | ||||
<t>Consideration should also be given to limiting the rate of adding | the rate of adding | |||
new subflows, as well as limiting the total number of subflows open | new subflows, as well as limiting the total number of subflows open | |||
for a particular connection. A host may choose to vary these values | for a particular connection. A host may choose to vary these values | |||
based on its load or knowledge of traffic and path characteristics.</t > | based on its load or knowledge of traffic and path characteristics.</t > | |||
<t pn="section-3.9.2-5">Note that this heuristic alone is probably ins | ||||
<t>Note that this heuristic alone is probably insufficient. Traffic | ufficient. Traffic | |||
for many common applications, such as downloads, is highly asymmetric | for many common applications, such as downloads, is highly asymmetric, | |||
and | and | |||
the host that is multihomed may well be the client that will never fil l | the host that is multihomed may well be the client that will never fil l | |||
its buffers, and thus never use MPTCP according to this heuristic. Adv anced APIs that allow an | its buffers and thus never use MPTCP according to this heuristic. Adva nced APIs that allow an | |||
application to signal its traffic requirements would aid in these deci sions.</t> | application to signal its traffic requirements would aid in these deci sions.</t> | |||
<t pn="section-3.9.2-6">An additional time-based heuristic could be ap | ||||
<t>An additional time-based heuristic could be applied, opening additi | plied, opening additional | |||
onal | ||||
subflows after a given period of time has passed. This would alleviate the | subflows after a given period of time has passed. This would alleviate the | |||
above issue, and also provide resilience for low-bandwidth but long-li ved | above issue and also provide resilience for low‑bandwidth but long-liv ed | |||
applications.</t> | applications.</t> | |||
<t pn="section-3.9.2-7">Another issue is that both communicating hosts | ||||
<t>Another issue is that both communicating hosts may simultaneously t | may simultaneously try to | |||
ry to | set up a subflow between the same pair of addresses. This leads to an | |||
set up a subflow between the same pair of addresses. This leads to an | inefficient use of resources.</t> | |||
inefficient use of resources.</t> | <t pn="section-3.9.2-8">If the same ports are used on all subflows, as | |||
recommended above, | ||||
<t>If the same ports are used on all subflows, as recommended above, | then standard TCP simultaneous-open logic should take care of this sit | |||
then standard TCP simultaneous open logic should take care of this sit | uation | |||
uation | ||||
and only one subflow will be established between the address pairs. Ho wever, | and only one subflow will be established between the address pairs. Ho wever, | |||
this relies on the same ports being used at both end hosts. If a host does | this relies on the same ports being used at both end hosts. If a host does | |||
not support TCP simultaneous open, it is RECOMMENDED that some element | not support TCP simultaneous open, it is <bcp14>RECOMMENDED</bcp14> th | |||
of randomization is applied to the time to wait before opening new sub | at some element | |||
flows, | of randomization be applied to the time to wait before opening new sub | |||
flows, | ||||
so that only one subflow is created between a given address pair. If, however, | so that only one subflow is created between a given address pair. If, however, | |||
hosts signal additional ports to use (for example, for leveraging ECMP on-path), | hosts signal additional ports to use (for example, for leveraging ECMP on-path), | |||
this heuristic is not appropriate.</t> | this heuristic is not appropriate.</t> | |||
<t pn="section-3.9.2-9">This section has shown some of the factors tha | ||||
<t>This section has shown some of the considerations that an implement | t an implementer | |||
er | should consider when developing MPTCP heuristics, but it is not intend | |||
should give when developing MPTCP heuristics, but is not intended to b | ed to be | |||
e | ||||
prescriptive.</t> | prescriptive.</t> | |||
</section> | </section> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-3 | ||||
<section title="Failure Handling"> | .9.3"> | |||
<t>Requirements for MPTCP's handling of unexpected signals have been | <name slugifiedName="name-failure-handling">Failure Handling</name> | |||
given in <xref target="sec_errors"/>. There are other failure cases, | <t pn="section-3.9.3-1">Requirements for MPTCP's handling of unexpecte | |||
however, where a hosts can choose appropriate behavior.</t> | d signals are | |||
given in <xref target="sec_errors" format="default" sectionFormat="of" | ||||
<t>For example, <xref target="sec_init"/> suggests that a host SHOULD | derivedContent="Section 3.8"/>. There are other failure cases, | |||
however, where hosts can choose appropriate behavior.</t> | ||||
<t pn="section-3.9.3-2">For example, <xref target="sec_init" format="d | ||||
efault" sectionFormat="of" derivedContent="Section 3.1"/> suggests that a host < | ||||
bcp14>SHOULD</bcp14> | ||||
fall back to trying regular TCP SYNs after one or more failures of MPT CP | fall back to trying regular TCP SYNs after one or more failures of MPT CP | |||
SYNs for a connection. A host may keep a system-wide cache of such | SYNs for a connection. A host may keep a system-wide cache of such | |||
information, so that it can back off from using MPTCP, firstly for tha t | information, so that it can back off from using MPTCP, firstly for tha t | |||
particular destination host, and eventually on a whole interface, if | particular destination host and, eventually, on a whole interface, if | |||
MPTCP connections continue failing. The duration of such a cache would | MPTCP connections continue to fail. The duration of such a cache would | |||
be implementation-specific.</t> | be implementation specific.</t> | |||
<t pn="section-3.9.3-3">Another failure could occur when the MP_JOIN h | ||||
<t>Another failure could occur when the MP_JOIN handshake fails. | andshake fails. | |||
<xref target="sec_errors"/> specifies that an incorrect handshake MUST | <xref target="sec_errors" format="default" sectionFormat="of" derivedC | |||
ontent="Section 3.8"/> specifies that an incorrect handshake <bcp14>MUST</bcp14> | ||||
lead to the subflow being closed with a RST. A host operating an activ e | lead to the subflow being closed with a RST. A host operating an activ e | |||
intrusion detection system may choose to start blocking MP_JOIN packet s | intrusion-detection system may choose to start blocking MP_JOIN packet s | |||
from the source host if multiple failed MP_JOIN attempts are seen. Fro m | from the source host if multiple failed MP_JOIN attempts are seen. Fro m | |||
the connection initiator's point of view, if an MP_JOIN fails, it SHOU | the connection initiator's point of view, if an MP_JOIN fails, it | |||
LD | <bcp14>SHOULD NOT</bcp14> | |||
NOT attempt to connect to the same IP address and port during the life | attempt to connect to the same IP address and port during the lifetime | |||
time | ||||
of the connection, unless the other host refreshes the information wit h | of the connection, unless the other host refreshes the information wit h | |||
another ADD_ADDR option. Note that the ADD_ADDR option is informationa l | another ADD_ADDR option. Note that the ADD_ADDR option is informationa l | |||
only, and does not guarantee the other host will attempt a connection. | only and does not guarantee that the other host will attempt a connect | |||
</t> | ion.</t> | |||
<t pn="section-3.9.3-4">In addition, an implementation may learn, over | ||||
<t>In addition, an implementation may learn, over a number of connecti | a number of connections, | |||
ons, | ||||
that certain interfaces or destination addresses consistently fail and | that certain interfaces or destination addresses consistently fail and | |||
may default to not trying to use MPTCP for these. Behavior could also | may default to not trying to use MPTCP for such interfaces or | |||
be learned for particularly badly performing subflows or subflows that | addresses. The behavior of subflows that perform particularly badly | |||
regularly fail during use, in order to temporarily choose not to use | or subflows that regularly fail during use could also | |||
be learned, so that an implementation can temporarily choose not to us | ||||
e | ||||
these paths.</t> | these paths.</t> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="sec_semantics" numbered="true" toc="include" removeInRFC="f | ||||
<section title="Semantic Issues" anchor="sec_semantics"> | alse" pn="section-4"> | |||
<t>In order to support multipath operation, the semantics of some TCP comp | <name slugifiedName="name-semantic-issues">Semantic Issues</name> | |||
onents have changed. To aid clarity, this section collects these semantic change | <t pn="section-4-1">In order to support multipath operation, the semantics | |||
s as a reference. | of some TCP | |||
<list style="hanging"> | components have changed. To help clarify, this section lists these | |||
<t hangText="Sequence number:"> The (in-header) TCP sequence | semantic changes as a point of reference. | |||
</t> | ||||
<dl newline="false" spacing="normal" indent="3" pn="section-4-2"> | ||||
<dt pn="section-4-2.1">Sequence number:</dt> | ||||
<dd pn="section-4-2.2"> The (in-header) TCP sequence | ||||
number is specific to the subflow. To allow the receiver to | number is specific to the subflow. To allow the receiver to | |||
reorder application data, an additional data-level | reorder application data, an additional data-level | |||
sequence space is used. In this data-level sequence space, the initi | sequence space is used. In this data‑level sequence space, the initi | |||
al SYN and | al SYN and | |||
the final DATA_FIN occupy 1 octet of sequence space. This is to ensu | the final DATA_FIN occupy 1 octet of sequence space. This is done to | |||
re these | ensure that these | |||
signals are acknowledged at the connection level. There is an explic it | signals are acknowledged at the connection level. There is an explic it | |||
mapping of data sequence space to subflow sequence space, | mapping of data sequence space to subflow sequence space, | |||
which is signaled through TCP options in data | which is signaled through TCP options in data | |||
packets.</t> | packets.</dd> | |||
<dt pn="section-4-2.3">ACK:</dt> | ||||
<t hangText="ACK:"> The ACK field in the TCP header | <dd pn="section-4-2.4"> The ACK field in the TCP header | |||
acknowledges only the subflow sequence number, not the | acknowledges only the subflow sequence number -- not the | |||
data-level sequence space. Implementations SHOULD NOT | data-level sequence space. Implementations <bcp14>SHOULD NOT</bcp14> | |||
attempt to infer a data-level acknowledgment from the | attempt to infer a data-level acknowledgment from the | |||
subflow ACKs. | subflow ACKs. | |||
This separates subflow- and connection-level processing | This separates subflow-level and connection-level processing | |||
at an end host.</t> | at an end host.</dd> | |||
<dt pn="section-4-2.5">Duplicate ACK:</dt> | ||||
<t hangText="Duplicate ACK:"> A duplicate ACK that includes any MPTCP | <dd pn="section-4-2.6"> A duplicate ACK that includes any MPTCP signalin | |||
signaling | g | |||
(with the exception of the DSS option) MUST NOT be treated as a sign | (with the exception of the DSS option) <bcp14>MUST NOT</bcp14> be tr | |||
al of congestion. | eated as a signal of congestion. | |||
To limit the chances of non-MPTCP-aware entities mistakenly interpre ting duplicate | To limit the chances of non-MPTCP-aware entities mistakenly interpre ting duplicate | |||
ACKs as a signal of congestion, MPTCP SHOULD NOT send more than two | ACKs as a signal of congestion, MPTCP <bcp14>SHOULD NOT</bcp14> send | |||
duplicate ACKs | more than two duplicate ACKs | |||
containing (non-DSS) MPTCP signals in a row.</t> | containing (non-DSS) MPTCP signals in a row.</dd> | |||
<dt pn="section-4-2.7">Receive Window:</dt> | ||||
<t hangText="Receive Window:">The receive window in the TCP | <dd pn="section-4-2.8">The receive window in the TCP | |||
header indicates the amount of free buffer space for the | header indicates the amount of free buffer space for the | |||
whole data-level connection (as opposed to for this | whole data-level connection (as opposed to the amount of space for t | |||
subflow) that is available at the receiver. This is the | his | |||
same semantics as regular TCP, but to maintain these | subflow) that is available at the receiver. The | |||
semantics are the same as for regular TCP, but to maintain these | ||||
semantics the receive window must be interpreted at the | semantics the receive window must be interpreted at the | |||
sender as relative to the sequence number given in the | sender as relative to the sequence number given in the | |||
DATA_ACK rather than the subflow ACK in the TCP header. | DATA_ACK rather than the subflow ACK in the TCP header. | |||
In this way, the original flow control role is preserved. | In this way, the original role of flow control is preserved. | |||
Note that some middleboxes may change the receive window, | Note that some middleboxes may change the receive window, | |||
and so a host SHOULD use the maximum value of those recently | and so a host <bcp14>SHOULD</bcp14> use the maximum value of those r ecently | |||
seen on the constituent subflows for the connection-level | seen on the constituent subflows for the connection-level | |||
receive window, and also needs to maintain a subflow-level | receive window and also needs to maintain a subflow-level | |||
window for subflow-level processing.</t> | window for subflow-level processing.</dd> | |||
<dt pn="section-4-2.9">FIN:</dt> | ||||
<t hangText="FIN:"> The FIN flag in the TCP header applies | <dd pn="section-4-2.10"> The FIN flag in the TCP header applies | |||
only to the subflow it is sent on, not to the whole | only to the subflow it is sent on -- not to the whole | |||
connection. For connection-level FIN semantics, the | connection. For connection-level FIN semantics, the | |||
DATA_FIN option is used.</t> | DATA_FIN option is used.</dd> | |||
<dt pn="section-4-2.11">RST:</dt> | ||||
<t hangText="RST:"> The RST flag in the TCP header applies | <dd pn="section-4-2.12"> The RST flag in the TCP header applies | |||
only to the subflow it is sent on, not to the whole | only to the subflow it is sent on -- not to the whole | |||
connection. The MP_FASTCLOSE option provides the fast close | connection. The MP_FASTCLOSE option provides the Fast Close | |||
functionality of a RST at the MPTCP connection level.</t> | functionality of a RST at the MPTCP connection level.</dd> | |||
<dt pn="section-4-2.13">Address List:</dt> | ||||
<t hangText="Address List:"> Address list management (i.e., | <dd pn="section-4-2.14"> Address list management (i.e., | |||
knowledge of the local and remote hosts' lists of | knowledge of the local and remote hosts' lists of | |||
available IP addresses) is handled | available IP addresses) is handled | |||
on a per-connection basis (as opposed to per subflow, per | on a per-connection basis (as opposed to per subflow, per | |||
host, or per pair of communicating hosts). This permits | host, or per pair of communicating hosts). This permits | |||
the application of per-connection local policy. Adding an | the application of per-connection local policy. Adding an | |||
address to one connection (either explicitly through an Add | address to one connection (either explicitly through an | |||
Address message, or implicitly through a Join) has no implication | ADD_ADDR message or implicitly through an MP_JOIN) has no implicatio | |||
for other connections between the same pair of hosts.</t> | ns | |||
for other connections between the same pair of hosts.</dd> | ||||
<t hangText="5-tuple:"> The 5-tuple (protocol, local | <dt pn="section-4-2.15">5-tuple:</dt> | |||
<dd pn="section-4-2.16"> The 5-tuple (protocol, local | ||||
address, local port, remote address, remote port) | address, local port, remote address, remote port) | |||
presented by kernel APIs to the application layer in a | presented by kernel APIs to the application layer in a | |||
non-multipath-aware application is that of the first | non-multipath-aware application is that of the first | |||
subflow, even if the subflow has since been closed and | subflow, even if the subflow has since been closed and | |||
removed from the connection. This decision, and other | removed from the connection. This decision, and other | |||
related API issues, are discussed in more detail in | related API issues, are discussed in more detail in | |||
<xref target="RFC6897"/>.</t> | <xref target="RFC6897" format="default" sectionFormat="of" derivedCo | |||
</list> | ntent="RFC6897"/>.</dd> | |||
</t> | </dl> | |||
</section> | </section> | |||
<section anchor="sec_security" numbered="true" toc="include" removeInRFC="fa | ||||
<section title="Security Considerations" anchor="sec_security"> | lse" pn="section-5"> | |||
<t>As identified in <xref target="RFC6181"/>, the addition of multipath ca | <name slugifiedName="name-security-considerations">Security Considerations | |||
pability to TCP will bring with it a number of new classes of threat. In order t | </name> | |||
o prevent these, <xref target="RFC6182"/> presents a set of requirements for a s | <t pn="section-5-1">As identified in <xref target="RFC6181" format="defaul | |||
ecurity solution for MPTCP. The fundamental goal is for the security of MPTCP to | t" sectionFormat="of" derivedContent="RFC6181"/>, the | |||
be "no worse" than regular TCP today, and the key security requirements are: | addition of multipath capability to TCP will bring with it a number of | |||
<list style="symbols"> | new classes of threats. In order to prevent these threats, <xref target="R | |||
<t>Provide a mechanism to confirm that the parties in a subflow handsh | FC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> presents | |||
ake are the same as in the original connection setup.</t> | a set of requirements for a security | |||
<t>Provide verification that the peer can receive traffic at a new add | solution for MPTCP. The fundamental goal is for the security of MPTCP to | |||
ress before using it as part of a connection.</t> | be "no worse" than regular TCP today. The key security requirements | |||
<t>Provide replay protection, i.e., ensure that a request to add/remov | are as follows: | |||
e a subflow is 'fresh'.</t> | </t> | |||
</list> | <ul spacing="normal" bare="false" empty="false" pn="section-5-2"> | |||
<li pn="section-5-2.1">Provide a mechanism to confirm that the parties i | ||||
In order to achieve these goals, MPTCP includes a hash-based handshake a | n a subflow | |||
lgorithm documented in Sections <xref target="sec_init" format="counter"/> and < | handshake are the same as the parties in the original connection setup.< | |||
xref target="sec_join" format="counter"/>.</t> | /li> | |||
<li pn="section-5-2.2">Provide verification that the peer can receive tr | ||||
<t>The security of the MPTCP connection hangs on the use of keys that are | affic at a new address before using it as part of a connection.</li> | |||
shared once at the start of the first subflow, and are never sent again over the | <li pn="section-5-2.3">Provide replay protection, i.e., ensure that a re | |||
network (unless used in the fast close mechanism, <xref target="sec_fastclose"/ | quest to add/remove a subflow is "fresh".</li> | |||
>). To ease demultiplexing while not giving away any cryptographic material, fu | </ul> | |||
ture subflows use a truncated cryptographic hash of this key as the connection i | <t pn="section-5-3"> | |||
dentification "token". The keys are concatenated and used as keys for creating | In order to achieve these goals, MPTCP includes a hash-based handshake | |||
Hash-based Message Authentication Codes (HMACs) used on subflow setup, in order | algorithm, as documented in Sections <xref target="sec_init" format="count | |||
to verify that the parties in the handshake are the same as in the original conn | er" sectionFormat="of" derivedContent="3.1"/> and <xref target="sec_join" format | |||
ection setup. It also provides verification that the peer can receive traffic a | ="counter" sectionFormat="of" derivedContent="3.2"/>.</t> | |||
t this new address. Replay attacks would still be possible when only keys are u | <t pn="section-5-4">The security of the MPTCP connection hangs on the use | |||
sed; therefore, the handshakes use single-use random numbers (nonces) at both en | of keys that | |||
ds -- this ensures the HMAC will never be the same on two handshakes. Guidance o | are shared once at the start of the first subflow and are never sent | |||
n generating random numbers suitable for use as keys is given in <xref target="R | again over the network (unless used in the Fast Close mechanism (<xref tar | |||
FC4086"/> and discussed in <xref target="sec_init"/>. The nonces are valid for t | get="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section | |||
he lifetime of the TCP connection attempt. HMAC is also used to secure the ADD_A | 3.5"/>)). To ease demultiplexing | |||
DDR option, due to the threats identified in <xref target="RFC7430"/>.</t> | while not giving away any cryptographic material, future subflows use a | |||
<t>The use of crypto capability bits in the initial connection handshake t | truncated cryptographic hash of this key as the connection | |||
o negotiate use of a particular algorithm allows the deployment of additional cr | identification "token". The keys are concatenated and used as keys for | |||
ypto mechanisms in the future. This negotiation would nevertheless be susceptib | creating Hash-based Message Authentication Codes (HMACs) used on subflow | |||
le to a bid-down attack by an on-path active attacker who could modify the crypt | setup, in order to verify that the parties in the handshake are the same | |||
o capability bits in the response from the receiver to use a less secure crypto | as the parties in the original connection setup. It also provides verific | |||
mechanism. The security mechanism presented in this document should therefore pr | ation that | |||
otect against all forms of flooding and hijacking attacks discussed in <xref tar | the peer can receive traffic at this new address. Replay attacks would | |||
get="RFC6181"/>.</t> | still be possible when only keys are used; therefore, the handshakes use | |||
single-use random numbers (nonces) at both ends -- this ensures that the H | ||||
<t>The version negotiation specified in <xref target="sec_init"/>, if diff | MAC will never be the same on two handshakes. Guidance on generating random numb | |||
ering MPTCP versions shared a common negotiation format, would allow an on-path | ers suitable for use as keys is given in <xref target="RFC4086" format="default" | |||
attacker to apply a theoretical bid-down attack. Since the v1 and v0 protocols h | sectionFormat="of" derivedContent="RFC4086"/> and discussed in <xref target="se | |||
ave a different handshake, such an attack would require the client to re-establi | c_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>. The | |||
sh the connection using v0, and this being supported by the server. Note that an | nonces are valid for the lifetime of the TCP connection attempt. HMAC is also us | |||
on-path attacker would have access to the raw data, negating any other TCP-leve | ed to secure the ADD_ADDR option, due to the threats identified in <xref target= | |||
l security mechanisms. | "RFC7430" format="default" sectionFormat="of" derivedContent="RFC7430"/>.</t> | |||
Also a change from RFC6824 has removed the subflow identifier from the MP_ | <t pn="section-5-5">The use of crypto capability bits in the initial conne | |||
PRIO option (<xref target="sec_policy"/>), to remove the theoretical attack wher | ction handshake | |||
e a subflow could be placed in "backup" mode by an attacker.</t> | to negotiate the use of a particular algorithm allows the deployment of ad | |||
ditional crypto mechanisms in the future. This negotiation would nevertheless b | ||||
<t>During normal operation, regular TCP protection mechanisms (such as ens | e susceptible to a bid-down attack by an on-path active attacker who could modif | |||
uring sequence numbers are in-window) will provide the same level of protection | y the crypto capability bits in the response from the receiver to use a less sec | |||
against attacks on individual TCP subflows as exists for regular TCP today. Impl | ure crypto mechanism. The security mechanism presented in this document should t | |||
ementations will introduce additional buffers compared to regular TCP, to reasse | herefore protect against all forms of flooding and hijacking attacks discussed i | |||
mble data at the connection level. The application of window sizing will minimiz | n <xref target="RFC6181" format="default" sectionFormat="of" derivedContent="RFC | |||
e the risk of denial-of-service attacks consuming resources.</t> | 6181"/>.</t> | |||
<t pn="section-5-6">The version negotiation specified in <xref target="sec | ||||
<t>As discussed in <xref target="sec_add_address"/>, a host may advertise | _init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>, if di | |||
its private addresses, but these might point to different hosts in the receiver' | ffering MPTCP versions shared a common | |||
s network. The MP_JOIN handshake (<xref target="sec_join"/>) will ensure that th | negotiation format, would allow an on-path attacker to apply a | |||
is does not succeed in setting up a subflow to the incorrect host. However, it c | theoretical bid-down attack. Since the v1 and v0 protocols have a | |||
ould still create unwanted TCP handshake traffic. This feature of MPTCP could be | different handshake, such an attack would require that the client | |||
a target for denial-of-service exploits, with malicious participants in MPTCP c | re-establish the connection using v0 and that the server support v0. | |||
onnections encouraging the recipient to target other hosts in the network. There | Note that an on-path attacker would have access to the raw data, negating any o | |||
fore, implementations should consider heuristics (<xref target="heuristics"/>) a | ther TCP-level security mechanisms. As also noted in <xref target="app_changelog | |||
t both the sender and receiver to reduce the impact of this.</t> | " format="default" sectionFormat="of" derivedContent="Appendix E"/>, this docume | |||
nt specifies the removal of the AddrID field <xref target="RFC6824" format="defa | ||||
<t>To further protect against malicious ADD_ADDR messages sent by an off-p | ult" sectionFormat="of" derivedContent="RFC6824"/> in the MP_PRIO option (<xref | |||
ath attacker, the ADD_ADDR includes an HMAC using the keys negotiated during the | target="sec_policy" format="default" sectionFormat="of" derivedContent="Section | |||
handshake. This effectively prevents an attacker from diverting an MPTCP connec | 3.3.8"/>). | |||
tion through an off-path ADD_ADDR injection into the stream.</t> | This change eliminates the possibility of a theoretical attack where | |||
a subflow could be placed in "backup" mode by an attacker.</t> | ||||
<t>A small security risk could theoretically exist with key reuse, but in | <t pn="section-5-7">During normal operation, regular TCP protection mechan | |||
order to accomplish a replay attack, both the sender and receiver keys, and the | isms (such as | |||
sender and receiver random numbers, in the MP_JOIN handshake (<xref target="sec_ | ensuring that sequence numbers are in-window) will provide the same | |||
join"/>) would have to match.</t> | level of protection against attacks on individual TCP subflows as the | |||
level of protection that exists for regular TCP today. Implementations wil | ||||
<t>Whilst this specification defines a "medium" security solution, meeting | l introduce additional buffers compared to regular TCP, to reassemble data at th | |||
the criteria specified at the start of this section and the threat analysis (<x | e connection level. The application of window sizing will minimize the risk of d | |||
ref target="RFC6181"/>), since attacks only ever get worse, it is likely that a | enial-of-service attacks consuming resources.</t> | |||
future version of MPTCP would need to be able to support stronger security. Ther | <t pn="section-5-8">As discussed in <xref target="sec_add_address" format= | |||
e are several ways the security of MPTCP could potentially be improved; some of | "default" sectionFormat="of" derivedContent="Section 3.4.1"/>, a host may advert | |||
these would be compatible with MPTCP as defined in this document, whilst others | ise its private addresses, but these might point to different hosts in the recei | |||
may not be. For now, the best approach is to get experience with the current app | ver's network. The MP_JOIN handshake (<xref target="sec_join" format="default" s | |||
roach, establish what might work, and check that the threat analysis is still ac | ectionFormat="of" derivedContent="Section 3.2"/>) will ensure that this does not | |||
curate.</t> | succeed in setting up a subflow to the incorrect host. However, it could still | |||
create unwanted TCP handshake traffic. This feature of MPTCP could be a target f | ||||
<t>Possible ways of improving MPTCP security could include:<list style="symbols" | or denial-of-service exploits, with malicious participants in MPTCP connections | |||
> | encouraging the recipient to target other hosts in the network. Therefore, imple | |||
<t>defining a new MPCTP cryptographic algorithm, as negotiated in MP_CAPABLE. A | mentations should consider heuristics (<xref target="heuristics" format="default | |||
sub-case could be to include an additional deployment assumption, such as statef | " sectionFormat="of" derivedContent="Section 3.9"/>) at both the sender and rece | |||
ul servers, in order to allow a more powerful algorithm to be used.</t> | iver to reduce the impact of this.</t> | |||
<t>defining how to secure data transfer with MPTCP, whilst not changing the sign | <t pn="section-5-9">To further protect against malicious ADD_ADDR messages | |||
aling part of the protocol.</t> | sent by an off-path attacker, the ADD_ADDR includes an HMAC using the keys nego | |||
<t>defining security that requires more option space, perhaps in conjunction wit | tiated during the handshake. This effectively prevents an attacker from divertin | |||
h a "long options" proposal for extending the TCP options space (such as those s | g an MPTCP connection through an off-path ADD_ADDR injection into the stream.</t | |||
urveyed in <xref target="TCPLO"/>), or perhaps building on the current approach | > | |||
with a second stage of MPTCP-option-based security.</t> | <t pn="section-5-10">A small security risk could theoretically exist with | |||
<t>revisiting the working group's decision to exclusively use TCP options for MP | key reuse, but in order to accomplish a replay attack, both the sender and recei | |||
TCP signaling, and instead look at also making use of the TCP payloads.</t> | ver keys, and the sender and receiver random numbers, in the MP_JOIN handshake ( | |||
</list></t> | <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Sect | |||
ion 3.2"/>) would have to match.</t> | ||||
<t>MPTCP has been designed with several methods available to indicate a new secu | <t pn="section-5-11">While this specification defines a "medium" security | |||
rity mechanism, including: | solution, | |||
<list style="symbols"> | meeting the criteria specified at the start of this section and in the | |||
<t>available flags in MP_CAPABLE (<xref target="tcpm_capable"/>);</t> | threat analysis document <xref target="RFC6181" format="default" sectionFo | |||
<t>available subtypes in the MPTCP option (<xref target="fig_option"/>);</t> | rmat="of" derivedContent="RFC6181"/>, since attacks | |||
<t>the version field in MP_CAPABLE (<xref target="tcpm_capable"/>);</t> | only ever get worse, it is likely that a future version of MPTCP would | |||
</list></t> | need to be able to support stronger security. | |||
There are several ways the security of MPTCP could potentially be improved; som | ||||
e of these would be compatible with MPTCP as defined in this document, while oth | ||||
ers may not be. For now, the best approach is to gain experience with the curren | ||||
t approach, establish what might work, and check that the threat analysis is sti | ||||
ll accurate.</t> | ||||
<t pn="section-5-12">Possible ways of improving MPTCP security could inclu | ||||
de:</t> | ||||
<ul spacing="normal" bare="false" empty="false" pn="section-5-13"> | ||||
<li pn="section-5-13.1">defining a new MPTCP cryptographic algorithm, as | ||||
negotiated in | ||||
MP_CAPABLE. If an implementation was being deployed in a controlled | ||||
environment where additional assumptions could be made, such as the | ||||
ability for the servers to store state during the TCP handshake, then | ||||
it may be possible to use a stronger cryptographic algorithm than | ||||
would otherwise be possible.</li> | ||||
<li pn="section-5-13.2">defining how to secure data transfer with MPTCP, | ||||
while not changing the signaling part of the protocol.</li> | ||||
<li pn="section-5-13.3">defining security that requires more option spac | ||||
e, perhaps in | ||||
conjunction with a "long options" proposal for extending the TCP | ||||
option space (such as those surveyed in <xref target="I-D.ananth-tcpm-tc | ||||
poptext" format="default" sectionFormat="of" derivedContent="TCPLO"/>), or perha | ||||
ps | ||||
building on the current approach with a second stage of | ||||
security based on MPTCP options.</li> | ||||
<li pn="section-5-13.4">revisiting the working group's decision to exclu | ||||
sively use TCP | ||||
options for MPTCP signaling and instead looking at the | ||||
possibility of using TCP payloads as well.</li> | ||||
</ul> | ||||
<t pn="section-5-14">MPTCP has been designed with several methods availabl | ||||
e to indicate a new security mechanism, including: | ||||
</t> | ||||
<ul spacing="normal" bare="false" empty="false" pn="section-5-15"> | ||||
<li pn="section-5-15.1">available flags in MP_CAPABLE (<xref target="tcp | ||||
m_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>).</li | ||||
> | ||||
<li pn="section-5-15.2">available subtypes in the MPTCP option (<xref ta | ||||
rget="fig_option" format="default" sectionFormat="of" derivedContent="Figure 3"/ | ||||
>).</li> | ||||
<li pn="section-5-15.3">the Version field in MP_CAPABLE (<xref target="t | ||||
cpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>).</ | ||||
li> | ||||
</ul> | ||||
</section> | </section> | |||
<section anchor="sec_middleboxes" numbered="true" toc="include" removeInRFC= | ||||
<section title="Interactions with Middleboxes" anchor="sec_middleboxes"> | "false" pn="section-6"> | |||
<name slugifiedName="name-interactions-with-middlebox">Interactions with M | ||||
<t>Multipath TCP was designed to be deployable in the present world. Its | iddleboxes</name> | |||
design takes into account "reasonable" | <t pn="section-6-1">Multipath TCP was designed to be deployable in the pre | |||
sent world. Its design takes into account "reasonable" | ||||
existing middlebox behavior. In this section, we outline a few representative mi ddlebox-related failure scenarios and | existing middlebox behavior. In this section, we outline a few representative mi ddlebox-related failure scenarios and | |||
show how Multipath TCP handles them. Next, we list the design decisions multipat | show how Multipath TCP handles them. Next, we list the design decisions | |||
h has made to accommodate the different | Multipath TCP has made to accommodate the different | |||
middleboxes.</t> | middleboxes.</t> | |||
<t pn="section-6-2">A primary concern is our use of a new TCP option. Midd | ||||
<t>A primary concern is our use of a new TCP option. Middleboxes should | leboxes should forward packets | |||
forward packets | with unknown options unchanged, yet there are some that don't. We expect these | |||
with unknown options unchanged, yet there are some that don't. These we expect w | middleboxes to strip options and pass the data, | |||
ill either strip options and pass the data, | ||||
drop packets with new options, copy the same option into multiple segments (e.g. , when doing segmentation), or drop | drop packets with new options, copy the same option into multiple segments (e.g. , when doing segmentation), or drop | |||
options during segment coalescing.</t> | options during segment coalescing.</t> | |||
<t pn="section-6-3">MPTCP uses a single new TCP option called "Kind", and | ||||
all message types are defined by "subtype" values (see <xref target="IANA" forma | ||||
t="default" sectionFormat="of" derivedContent="Section 7"/>). This should reduce | ||||
the chances of only some types of MPTCP options being passed; instead, the key | ||||
differing characteristics are different paths and the presence of the SYN flag.< | ||||
/t> | ||||
<t pn="section-6-4">MPTCP SYN packets on the first subflow of a connection | ||||
contain the MP_CAPABLE option (<xref target="sec_init" format="default" section | ||||
Format="of" derivedContent="Section 3.1"/>). If this is dropped, MPTCP <bcp14>SH | ||||
OULD</bcp14> fall back to regular TCP. If packets with the MP_JOIN option (<xref | ||||
target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3 | ||||
.2"/>) are dropped, the paths will simply not be used.</t> | ||||
<t pn="section-6-5">If a middlebox strips options but otherwise passes the | ||||
packets | ||||
unchanged, MPTCP will behave safely. If an MP_CAPABLE option is dropped | ||||
on either the outgoing path or the return path, the initiating host can | ||||
fall back to regular TCP, as illustrated in <xref target="fig_syn" format= | ||||
"default" sectionFormat="of" derivedContent="Figure 17"/> and discussed in <xref | ||||
target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3 | ||||
.1"/>.</t> | ||||
<figure anchor="fig_syn" align="left" suppress-title="false" pn="figure-17 | ||||
"> | ||||
<name slugifiedName="name-connection-setup-with-middl">Connection Setup | ||||
with Middleboxes That Strip Options from Packets</name> | ||||
<artwork align="left" name="" type="" alt="" pn="section-6-6.1"> | ||||
Host A Host B | ||||
| Middlebox M | | ||||
| | | | ||||
| SYN (MP_CAPABLE) | SYN | | ||||
|-------------------|---------------->| | ||||
| SYN/ACK | | ||||
|<------------------------------------| | ||||
a) MP_CAPABLE option stripped on outgoing path | ||||
<t>MPTCP uses a single new TCP option "Kind", and all message types are | Host A Host B | |||
defined by "subtype" values (see <xref target="IANA"/>). This should reduce the | | SYN (MP_CAPABLE) | | |||
chances of only some types of MPTCP options being passed, and instead the key di | |-------------------------------------->| | |||
ffering characteristics are different paths, and the presence of the SYN flag.</ | | Middlebox M | | |||
t> | | | | | |||
| SYN/ACK |SYN/ACK (MP_CAPABLE)| | ||||
<t>MPTCP SYN packets on the first subflow of a connection contain the MP | |<-----------------|--------------------| | |||
_CAPABLE option (<xref target="sec_init"/>). If this is dropped, MPTCP SHOULD fa | b) MP_CAPABLE option stripped on return path </artwork> | |||
ll back to regular TCP. If packets with the MP_JOIN option (<xref target="sec_jo | </figure> | |||
in"/>) are dropped, the paths will simply not be used.</t> | <t pn="section-6-7">Subflow SYNs contain the MP_JOIN option. If this optio | |||
n is stripped on the outgoing path, | ||||
<t>If a middlebox strips options but otherwise passes the packets unchan | the SYN will appear to be a regular SYN to Host B. Depending on whether there i | |||
ged, MPTCP will behave safely. If an MP_CAPABLE option is dropped on either the | s a listening socket on | |||
outgoing or the return path, the initiating host can fall back to regular TCP, a | the target port, Host B will reply with either a SYN/ACK or a RST (subflow conne | |||
s illustrated in <xref target="fig_syn"/> and discussed in <xref target="sec_ini | ction fails). When Host A | |||
t"/>.</t> | receives the SYN/ACK, it sends a RST because the SYN/ACK does not contain the MP | |||
_JOIN option and its token. | ||||
<t>Subflow SYNs contain the MP_JOIN option. If this option is stripped | Either way, the subflow setup fails but otherwise does not affect the MPTCP conn | |||
on the outgoing path, | ection as a whole.</t> | |||
the SYN will appear to be a regular SYN to Host B. Depending on whether th | <t pn="section-6-8">We now examine data flow with MPTCP, assuming that the | |||
ere is a listening socket on | flow is | |||
the target port, Host B will reply either with SYN/ACK or RST (subflow connectio | correctly set up, which implies that the options in the SYN | |||
n fails). When Host A | ||||
receives the SYN/ACK it sends a RST because the SYN/ACK does not contain the MP_ | ||||
JOIN option and its token. | ||||
Either way, the subflow setup fails, but otherwise does not affect the MPTCP con | ||||
nection as a whole.</t> | ||||
<figure align="center" anchor="fig_syn" title="Connection Setup with Mid | ||||
dleboxes that Strip Options from Packets"> | ||||
<artwork align="left"><![CDATA[ | ||||
Host A Host B | ||||
| Middlebox M | | ||||
| | | | ||||
| SYN(MP_CAPABLE) | SYN | | ||||
|-------------------|---------------->| | ||||
| SYN/ACK | | ||||
|<------------------------------------| | ||||
a) MP_CAPABLE option stripped on outgoing path | ||||
Host A Host B | ||||
| SYN(MP_CAPABLE) | | ||||
|------------------------------------>| | ||||
| Middlebox M | | ||||
| | | | ||||
| SYN/ACK |SYN/ACK(MP_CAPABLE)| | ||||
|<----------------|-------------------| | ||||
b) MP_CAPABLE option stripped on return path | ||||
]]></artwork> | ||||
</figure> | ||||
<t>We now examine data flow with MPTCP, assuming the flow is correctly s | ||||
et up, which implies the options in the SYN | ||||
packets were allowed through by the relevant middleboxes. If options are allowed through and there is no resegmentation or | packets were allowed through by the relevant middleboxes. If options are allowed through and there is no resegmentation or | |||
coalescing to TCP segments, Multipath TCP flows can proceed without problems.</t > | coalescing to TCP segments, Multipath TCP flows can proceed without problems.</t > | |||
<t pn="section-6-9">The case when options get stripped on data packets is | ||||
<t>The case when options get stripped on data packets has been discussed | discussed | |||
in the Fallback section. | in <xref target="sec_fallback" format="default" sectionFormat="of" derived | |||
If only some MPTCP options are stripped, behavior is not deterministic. | Content="Section 3.7"/>. | |||
If some data sequence mappings are lost, the connection can continue so long as | If only some MPTCP options are stripped, behavior is not deterministic. | |||
mappings exist for the subflow-level data (e.g., if multiple maps have been sent | If some Data Sequence Mappings are lost, the connection can continue so long as | |||
that reinforce each other). If some subflow-level space is left unmapped, howev | mappings exist for the subflow-level data (e.g., if multiple maps have been sent | |||
er, the subflow is treated as broken and is closed, through the process describe | that reinforce each other). If some subflow-level space is left unmapped, howev | |||
d in <xref target="sec_fallback"/>. MPTCP should survive with a loss of some Dat | er, the subflow is treated as broken and is closed, using the process described | |||
a ACKs, but performance will degrade as the fraction of stripped options increas | in <xref target="sec_fallback" format="default" sectionFormat="of" derivedConten | |||
es. | t="Section 3.7"/>. MPTCP should survive with a loss of some Data ACKs, but perfo | |||
rmance will degrade as the fraction of stripped options increases. | ||||
We do not expect such cases to appear in practice, though: most | We do not expect such cases to appear in practice, though: most | |||
middleboxes will either strip all options or let them all through.</t> | middleboxes will either strip all options or let them all through.</t> | |||
<t pn="section-6-10">We end this section with a list of middlebox classes, | ||||
<t>We end this section with a list of middlebox classes, their behavior, | their behavior, and the elements in the MPTCP design | |||
and the elements in the MPTCP design | ||||
that allow operation through such middleboxes. Issues surrounding dropping packe ts with options | that allow operation through such middleboxes. Issues surrounding dropping packe ts with options | |||
or stripping options were discussed above, and are not included here: | or stripping options were discussed above and are not included here: | |||
<list style="symbols"> | </t> | |||
<t>NATs <xref target="RFC3022"/> (Network Address (and Port) Translato | <ul spacing="normal" bare="false" empty="false" pn="section-6-11"> | |||
rs) change the source address (and often source port) of packets. This means tha | <li pn="section-6-11.1">NATs (Network Address (and port) Translators) <x | |||
t a host will not know its | ref target="RFC3022" format="default" sectionFormat="of" derivedContent="RFC3022 | |||
"/> change the source address (and | ||||
often the source port) of packets. This means that a host will not know | ||||
its | ||||
public-facing address for signaling in MPTCP. Therefore, MPTCP permits impli cit address addition via the MP_JOIN option, | public-facing address for signaling in MPTCP. Therefore, MPTCP permits impli cit address addition via the MP_JOIN option, | |||
and the handshake mechanism ensures that connection attempts to private addr | and the handshake mechanism ensures that connection attempts to private addr | |||
esses <xref target="RFC1918"/>, since they are authenticated, will only set up s | esses <xref target="RFC1918" format="default" sectionFormat="of" derivedContent= | |||
ubflows to the correct hosts. | "RFC1918"/>, since they are authenticated, will only set up subflows to the corr | |||
Explicit address removal is undertaken by an Address ID to allow no knowledg | ect hosts. | |||
e of the source address.</t> | Explicit address removal is undertaken by an Address ID to allow no knowledg | |||
e of the source address.</li> | ||||
<t>Performance Enhancing Proxies (PEPs) <xref target="RFC3135"/> might | <li pn="section-6-11.2">Performance Enhancing Proxies (PEPs) <xref targe | |||
proactively ACK data to increase performance. MPTCP, however, relies on accurat | t="RFC3135" format="default" sectionFormat="of" derivedContent="RFC3135"/> might | |||
e congestion control signals from the end host, and non-MPTCP-aware PEPs will no | proactively ACK data to increase performance. MPTCP, however, relies on accurat | |||
t be able to provide such signals. MPTCP will, therefore, fall back to single-pa | e congestion control signals from the end host, and non‑MPTCP-aware PEPs will no | |||
th TCP, or close the problematic subflow (see <xref target="sec_fallback"/>).</t | t be able to provide such signals. MPTCP will, therefore, fall back to single-pa | |||
> | th TCP or close the problematic subflow (see <xref target="sec_fallback" format= | |||
"default" sectionFormat="of" derivedContent="Section 3.7"/>).</li> | ||||
<t>Traffic Normalizers <xref target="norm"/> may not allow holes in se | <li pn="section-6-11.3">Traffic normalizers <xref target="norm" format=" | |||
quence numbers, and may cache packets and retransmit the same data. | default" sectionFormat="of" derivedContent="norm"/> may not | |||
MPTCP looks like standard TCP on the wire, and will not retransmit different dat | allow holes in sequence numbers, and they may cache packets and retransm | |||
a on the same subflow sequence number. In the event of a retransmission, the sam | it the same data. | |||
e data will be retransmitted on the original TCP subflow even if it is additiona | MPTCP looks like standard TCP on the wire and will not retransmit different data | |||
lly retransmitted at the connection level on a different subflow.</t> | on the same subflow sequence number. In the event of a retransmission, the same | |||
data will be retransmitted on the original TCP subflow even if it is additional | ||||
<t>Firewalls <xref target="RFC2979"/> might perform initial sequence n | ly retransmitted at the connection level on a different subflow.</li> | |||
umber randomization on TCP connections. MPTCP uses relative | <li pn="section-6-11.4">Firewalls <xref target="RFC2979" format="default | |||
sequence numbers in data sequence mapping to cope with this. Like NATs, firewall | " sectionFormat="of" derivedContent="RFC2979"/> might perform | |||
s will not permit many incoming connections, so | Initial Sequence Number (ISN) randomization on TCP connections. MPTCP us | |||
es relative | ||||
sequence numbers in Data Sequence Mappings to cope with this. Like NATs, firewal | ||||
ls will not permit many incoming connections, so | ||||
MPTCP supports address signaling (ADD_ADDR) so that a multiaddressed host can in vite its peer behind the firewall/NAT to connect | MPTCP supports address signaling (ADD_ADDR) so that a multiaddressed host can in vite its peer behind the firewall/NAT to connect | |||
out to its additional interface.</t> | out to its additional interface.</li> | |||
<li pn="section-6-11.5">Intrusion Detection Systems / Intrusion Preventi | ||||
<t>Intrusion Detection/Prevention Systems (IDS/IPS) observe packet str | on Systems (IDSs/IPSs) observe packet streams for patterns and content that cou | |||
eams for patterns and content that could threaten a network. MPTCP may require t | ld threaten a network. MPTCP may require the | |||
he | instrumentation of additional paths, and an MPTCP-aware IDS or IPS would need to | |||
instrumentation of additional paths, and an MPTCP-aware IDS/IPS would need to re | read MPTCP tokens to correlate data from multiple subflows to maintain comparab | |||
ad MPTCP tokens to correlate data from mutliple subflows to maintain comparable | le visibility into all of the traffic between devices. Without such changes, an | |||
visibility into all of the traffic between devices. Without such changes, an IDS | IDS would get an incomplete view of the traffic, increasing the risk of missing | |||
would get an incomplete view of the traffic, increasing the risk of missing tra | traffic of interest (false negatives) and increasing the chances of erroneously | |||
ffic of interest (false negatives), and increasing the chances of erroneously id | identifying a subflow as a risk due to only seeing partial data (false positives | |||
entifying a subflow as a risk due to only seeing partial data (false positives). | ).</li> | |||
</t> | <li pn="section-6-11.6">Application-level middleboxes such as content-aw | |||
are firewalls may | ||||
<t>Application-level middleboxes such as content-aware firewalls may a | alter the payload within a subflow -- for example, rewriting URIs in | |||
lter the payload within a subflow, such as rewriting URIs in HTTP traffic. MPTCP | HTTP traffic. MPTCP will detect such changes using the checksum | |||
will detect these using the checksum | and close the affected subflow(s), if there are other subflows that can be used. | |||
and close the affected subflow(s), if there are other subflows that can be used. | If all subflows are affected, MPTCP | |||
If all subflows are affected, multipath | will fall back to TCP, allowing such middleboxes to change the payload. MPTCP-aw | |||
will fall back to TCP, allowing such middleboxes to change the payload. MPTCP-aw | are middleboxes should be able to adjust the payload and MPTCP metadata in order | |||
are middleboxes should be able to adjust the payload and MPTCP metadata in order | not to break the connection.</li> | |||
not to break the connection.</t> | </ul> | |||
</list> | <t pn="section-6-12"> | |||
In addition, all classes of middleboxes may affect TCP traffic in the fo llowing ways: | In addition, all classes of middleboxes may affect TCP traffic in the fo llowing ways: | |||
<list style="symbols"> | ||||
<t>TCP options may be removed, or packets with unknown options dropped | ||||
, by many classes of middleboxes. It is intended | ||||
that the initial SYN exchange, with a TCP option, will be sufficient to identify | ||||
the path capabilities. If such a packet does | ||||
not get through, MPTCP will end up falling back to regular TCP.</t> | ||||
<t>Segmentation/Coalescing (e.g., TCP segmentation offloading) might c | ||||
opy options between packets and might | ||||
strip some options. MPTCP's data sequence mapping includes the relative subflow | ||||
sequence number instead of using the sequence | ||||
number in the segment. In this way, the mapping is independent of the packets th | ||||
at carry it.</t> | ||||
<t>The receive window may be shrunk by some middleboxes at the subflow | ||||
level. MPTCP will use the maximum window at data level, but will also obey | ||||
subflow-specific windows.</t> | ||||
</list> | ||||
</t> | </t> | |||
<ul spacing="normal" bare="false" empty="false" pn="section-6-13"> | ||||
</section> | <li pn="section-6-13.1">TCP options may be removed, or packets with unkn | |||
own options dropped, by many classes of middleboxes. It is intended | ||||
<section anchor="Acknowledgments" title="Acknowledgments"> | that the initial SYN exchange, with a TCP option, will be sufficient to identify | |||
<!-- <t>The authors were originally supported by Trilogy (http://www.trilo | the path's capabilities. If such a packet does | |||
gy-project.org), a research project (ICT-216372) partially funded by the Europea | not get through, MPTCP will end up falling back to regular TCP.</li> | |||
n Community under its Seventh Framework Program.</t> | <li pn="section-6-13.2">Segmentation/coalescing (e.g., TCP segmentation | |||
<t>Alan Ford was originally supported by Roke Manor Research and later Cis | offloading) might copy options between packets and might | |||
co Systems.</t> --> | strip some options. MPTCP's Data Sequence Mapping includes the relative subflow | |||
<t>The authors gratefully acknowledge significant input into this document | sequence number instead of using the sequence | |||
from Sébastien Barré and Andrew McDonald.</t> | number in the segment. In this way, the mapping is independent of the packets th | |||
<t>The authors also wish to acknowledge reviews and contributions from Ilj | at carry it.</li> | |||
itsch van Beijnum, Lars Eggert, Marcelo Bagnulo, Robert Hancock, Pasi Sarolahti, | <li pn="section-6-13.3">The receive window may be shrunk by some middleb | |||
Toby Moncaster, Philip Eardley, Sergio Lembo, Lawrence Conroy, Yoshifumi Nishid | oxes at the | |||
a, Bob Briscoe, Stein Gjessing, Andrew McGregor, Georg Hampel, Anumita Biswas, W | subflow level. MPTCP will use the maximum window at the data level but w | |||
es Eddy, Alexey Melnikov, Francis Dupont, Adrian Farrel, Barry Leiba, Robert Spa | ill also obey | |||
rks, Sean Turner, Stephen Farrell, Martin Stiemerling, Gregory Detal, Fabien Duc | subflow-specific windows.</li> | |||
hene, Xavier de Foy, Rahul Jadhav, Klemens Schragel, Mirja Kuehlewind, Sheng Jia | </ul> | |||
ng, Alissa Cooper, Ines Robles, Roman Danyliw, Adam Roach, Barry Leiba, Alexey M | ||||
elnikov, Eric Vyncke, and Ben Kaduk.</t> | ||||
</section> | ||||
<section anchor="IANA" title="IANA Considerations"> | ||||
<t>This document obsoletes RFC6824 and as such IANA is requested to update | ||||
the TCP option space registry to point to this document for Multipath TCP, as f | ||||
ollows:</t> | ||||
<texttable anchor="table_tcpo" title="TCP Option Kind Numbers"> | ||||
<ttcol align="center">Kind</ttcol> | ||||
<ttcol align="center">Length</ttcol> | ||||
<ttcol align="center">Meaning</ttcol> | ||||
<ttcol align="center">Reference</ttcol> | ||||
<c>30</c> | ||||
<c>N</c> | ||||
<c>Multipath TCP (MPTCP)</c> | ||||
<c>This document</c> | ||||
</texttable> | ||||
<section anchor="IANA_subtypes" title="MPTCP Option Subtypes"> | ||||
<t>The 4-bit MPTCP subtype sub-registry ("MPTCP Option Subtypes" under the | ||||
"Transmission Control Protocol (TCP) Parameters" registry) was defined in RFC68 | ||||
24. Since RFC6824 was an Experimental not Standards Track RFC, and since no furt | ||||
her entries have occurred beyond those pointing to RFC6824, IANA is requested to | ||||
replace the existing registry with <xref target="table_iana"/> and with the fol | ||||
lowing explanatory note.</t> | ||||
<t>Note: This registry specifies the MPTCP Option Subtypes for MPTCP v1, w | ||||
hich obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please refe | ||||
r to RFC6824.</t> | ||||
<texttable anchor="table_iana" title="MPTCP Option Subtypes"> | ||||
<ttcol align="center">Value</ttcol> | ||||
<ttcol align="center">Symbol</ttcol> | ||||
<ttcol align="center">Name</ttcol> | ||||
<ttcol align="center">Reference</ttcol> | ||||
<c>0x0</c> | ||||
<c>MP_CAPABLE</c> | ||||
<c>Multipath Capable</c> | ||||
<c>This document, <xref target="sec_init"/></c> | ||||
<c>0x1</c> | ||||
<c>MP_JOIN</c> | ||||
<c>Join Connection</c> | ||||
<c>This document, <xref target="sec_join"/></c> | ||||
<c>0x2</c> | ||||
<c>DSS</c> | ||||
<c>Data Sequence Signal (Data ACK and data sequence mapping)</c> | ||||
<c>This document, <xref target="sec_generalop"/></c> | ||||
<c>0x3</c> | ||||
<c>ADD_ADDR</c> | ||||
<c>Add Address</c> | ||||
<c>This document, <xref target="sec_add_address"/></c> | ||||
<c>0x4</c> | ||||
<c>REMOVE_ADDR</c> | ||||
<c>Remove Address</c> | ||||
<c>This document, <xref target="sec_remove_addr"/></c> | ||||
<c>0x5</c> | ||||
<c>MP_PRIO</c> | ||||
<c>Change Subflow Priority</c> | ||||
<c>This document, <xref target="sec_policy"/></c> | ||||
<c>0x6</c> | ||||
<c>MP_FAIL</c> | ||||
<c>Fallback</c> | ||||
<c>This document, <xref target="sec_fallback"/></c> | ||||
<c>0x7</c> | ||||
<c>MP_FASTCLOSE</c> | ||||
<c>Fast Close</c> | ||||
<c>This document, <xref target="sec_fastclose"/></c> | ||||
<c>0x8</c> | ||||
<c>MP_TCPRST</c> | ||||
<c>Subflow Reset</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0xf</c> | ||||
<c>MP_EXPERIMENTAL</c> | ||||
<c>Reserved for private experiments</c> | ||||
<c></c> | ||||
</texttable> | ||||
<t>Values 0x9 through 0xe are currently unassigned. Option 0xf is reserved | ||||
for use by private experiments. Its use may be formalized in a future specifica | ||||
tion. Future assignments in this registry are to be defined by Standards Action | ||||
as defined by <xref target="RFC8126"/>. Assignments consist of the MPTCP subtyp | ||||
e's symbolic name and its associated value, and a reference to its specification | ||||
.</t> | ||||
</section> | </section> | |||
<section anchor="IANA" numbered="true" toc="include" removeInRFC="false" pn= | ||||
<section anchor="IANA_handshake" title="MPTCP Handshake Algorithms"> | "section-7"> | |||
<name slugifiedName="name-iana-considerations">IANA Considerations</name> | ||||
<t>The "MPTCP Handshake Algorithms" sub-registry under the "Transmission C | <t pn="section-7-1">This document obsoletes <xref target="RFC6824" format= | |||
ontrol Protocol (TCP) Parameters" registry was defined in RFC6824. Since RFC6824 | "default" sectionFormat="of" derivedContent="RFC6824"/>. As such, IANA has updat | |||
was an Experimental not Standards Track RFC, and since no further entries have | ed | |||
occurred beyond those pointing to RFC6824, IANA is requested to replace the exis | several registries to point to this document. In addition, this document | |||
ting registry with <xref target="table_crypto"/> and with the following explanat | creates one new registry. These topics are described in the following sub | |||
ory note.</t> | sections.</t> | |||
<section anchor="IANA-TCP-Option-Kind" numbered="true" toc="include" remov | ||||
<t>Note: This registry specifies the MPTCP Handshake Algorithms for MPTCP | eInRFC="false" pn="section-7.1"> | |||
v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please | <name slugifiedName="name-tcp-option-kind-numbers">TCP Option Kind Numbe | |||
refer to RFC6824.</t> | rs</name> | |||
<t pn="section-7.1-1">IANA has | ||||
<texttable anchor="table_crypto" title="MPTCP Handshake Algorithms"> | updated the "TCP Option Kind Numbers" registry to point to this document | |||
<ttcol align="center">Flag Bit</ttcol> | for Multipath TCP, as shown in <xref target="table_tcpo" format="default" | |||
<ttcol align="center">Meaning</ttcol> | sectionFormat="of" derivedContent="Table 1"/>:</t> | |||
<ttcol align="center">Reference</ttcol> | <table anchor="table_tcpo" align="center" pn="table-1"> | |||
<name slugifiedName="name-tcp-option-kind-numbers-2">TCP Option Kind N | ||||
<c>A</c> | umbers</name> | |||
<c>Checksum required</c> | <thead> | |||
<c>This document, <xref target="sec_init"/></c> | <tr> | |||
<th align="center" colspan="1" rowspan="1">Kind</th> | ||||
<c>B</c> | <th align="center" colspan="1" rowspan="1">Length</th> | |||
<c>Extensibility</c> | <th align="center" colspan="1" rowspan="1">Meaning</th> | |||
<c>This document, <xref target="sec_init"/></c> | <th align="center" colspan="1" rowspan="1">Reference</th> | |||
</tr> | ||||
<c>C</c> | </thead> | |||
<c>Do not attempt to establish new subflows to the source address.</c> | <tbody> | |||
<c>This document, <xref target="sec_init"/></c> | <tr> | |||
<td align="center" colspan="1" rowspan="1">30</td> | ||||
<c>D-G</c> | <td align="center" colspan="1" rowspan="1">N</td> | |||
<c>Unassigned</c> | <td align="center" colspan="1" rowspan="1">Multipath TCP (MPTCP)</ | |||
<c></c> | td> | |||
<td align="center" colspan="1" rowspan="1">RFC 8684</td> | ||||
<c>H</c> | </tr> | |||
<c>HMAC-SHA256</c> | </tbody> | |||
<c>This document, <xref target="sec_join"/></c> | </table> | |||
</texttable> | </section> | |||
<section anchor="IANA_subtypes" numbered="true" toc="include" removeInRFC= | ||||
<t>Note that the meanings of bits D through H can be dependent upon bit B, | "false" pn="section-7.2"> | |||
depending on how Extensibility is defined in future specifications; see | <name slugifiedName="name-mptcp-option-subtypes">MPTCP Option Subtypes</ | |||
<xref target="sec_init"/> for more information.</t> | name> | |||
<t pn="section-7.2-1">The 4-bit MPTCP subtype in the "MPTCP Option Subty | ||||
<t>Future assignments in this registry are also | pes" | |||
to be defined by Standards Action as defined by <xref target="RFC8126"/>. | subregistry under the "Transmission Control Protocol (TCP) Parameters" | |||
registry was defined in <xref target="RFC6824" format="default" sectionF | ||||
ormat="of" derivedContent="RFC6824"/>. Since <xref target="RFC6824" format="defa | ||||
ult" sectionFormat="of" derivedContent="RFC6824"/> is an | ||||
Experimental RFC and not a Standards Track RFC, and since no further | ||||
entries have occurred beyond those pointing to <xref target="RFC6824" fo | ||||
rmat="default" sectionFormat="of" derivedContent="RFC6824"/>, IANA has | ||||
replaced the existing registry with the contents of | ||||
<xref target="table_iana" format="default" sectionFormat="of" derivedCon | ||||
tent="Table 2"/> and with the following | ||||
explanatory note.</t> | ||||
<t pn="section-7.2-2">Note: This registry specifies the MPTCP Option Sub | ||||
types for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0 | ||||
subtypes, please refer to <xref target="RFC6824" format="default" sectionFormat= | ||||
"of" derivedContent="RFC6824"/>.</t> | ||||
<table anchor="table_iana" align="center" pn="table-2"> | ||||
<name slugifiedName="name-mptcp-option-subtypes-2">MPTCP Option Subtyp | ||||
es</name> | ||||
<thead> | ||||
<tr> | ||||
<th align="center" colspan="1" rowspan="1">Value</th> | ||||
<th align="center" colspan="1" rowspan="1">Symbol</th> | ||||
<th align="center" colspan="1" rowspan="1">Name</th> | ||||
<th align="center" colspan="1" rowspan="1">Reference</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x0</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_CAPABLE</td> | ||||
<td align="center" colspan="1" rowspan="1">Multipath Capable</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t | ||||
d> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x1</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_JOIN</td> | ||||
<td align="center" colspan="1" rowspan="1">Join Connection</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/></t | ||||
d> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x2</td> | ||||
<td align="center" colspan="1" rowspan="1">DSS</td> | ||||
<td align="center" colspan="1" rowspan="1">Data Sequence Signal (D | ||||
ata ACK and Data Sequence Mapping)</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3" | ||||
/></td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x3</td> | ||||
<td align="center" colspan="1" rowspan="1">ADD_ADDR</td> | ||||
<td align="center" colspan="1" rowspan="1">Add Address</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3. | ||||
4.1"/></td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x4</td> | ||||
<td align="center" colspan="1" rowspan="1">REMOVE_ADDR</td> | ||||
<td align="center" colspan="1" rowspan="1">Remove Address</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3. | ||||
4.2"/></td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x5</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_PRIO</td> | ||||
<td align="center" colspan="1" rowspan="1">Change Subflow Priority | ||||
</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/ | ||||
></td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x6</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_FAIL</td> | ||||
<td align="center" colspan="1" rowspan="1">Fallback</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/ | ||||
></td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x7</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_FASTCLOSE</td> | ||||
<td align="center" colspan="1" rowspan="1">Fast Close</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5" | ||||
/></td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x8</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_TCPRST</td> | ||||
<td align="center" colspan="1" rowspan="1">Subflow Reset</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0xf</td> | ||||
<td align="center" colspan="1" rowspan="1">MP_EXPERIMENTAL</td> | ||||
<td align="center" colspan="1" rowspan="1">Reserved for Private Us | ||||
e</td> | ||||
<td align="center" colspan="1" rowspan="1"/> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t pn="section-7.2-4">Values 0x9 through 0xe are currently unassigned. O | ||||
ption 0xf is reserved for use by private experiments. Its use may be formalized | ||||
in a future specification. Future assignments in this registry are to be defined | ||||
by Standards Action as defined by <xref target="RFC8126" format="default" secti | ||||
onFormat="of" derivedContent="RFC8126"/>. Assignments consist of the MPTCP subt | ||||
ype's symbolic name, its associated value, and a reference to its specification. | ||||
</t> | ||||
</section> | ||||
<section anchor="IANA_handshake" numbered="true" toc="include" removeInRFC | ||||
="false" pn="section-7.3"> | ||||
<name slugifiedName="name-mptcp-handshake-algorithms">MPTCP Handshake Al | ||||
gorithms</name> | ||||
<t pn="section-7.3-1">The "MPTCP Handshake Algorithms" subregistry under | ||||
the | ||||
"Transmission Control Protocol (TCP) Parameters" registry was defined | ||||
in <xref target="RFC6824" format="default" sectionFormat="of" derivedCon | ||||
tent="RFC6824"/>. Since <xref target="RFC6824" format="default" sectionFormat="o | ||||
f" derivedContent="RFC6824"/> is an Experimental RFC and not | ||||
a Standards Track RFC, and since no further entries have occurred | ||||
beyond those pointing to <xref target="RFC6824" format="default" section | ||||
Format="of" derivedContent="RFC6824"/>, IANA has replaced | ||||
the existing registry with the contents of | ||||
<xref target="table_crypto" format="default" sectionFormat="of" derivedContent= | ||||
"Table 3"/> and with the following explanatory note.</t> | ||||
<t pn="section-7.3-2">Note: This registry specifies the MPTCP Handshake | ||||
Algorithms for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTC | ||||
P v0 subtypes, please refer to <xref target="RFC6824" format="default" sectionFo | ||||
rmat="of" derivedContent="RFC6824"/>.</t> | ||||
<table anchor="table_crypto" align="center" pn="table-3"> | ||||
<name slugifiedName="name-mptcp-handshake-algorithms-2">MPTCP Handshak | ||||
e Algorithms</name> | ||||
<thead> | ||||
<tr> | ||||
<th align="center" colspan="1" rowspan="1">Flag Bit</th> | ||||
<th align="center" colspan="1" rowspan="1">Meaning</th> | ||||
<th align="center" colspan="1" rowspan="1">Reference</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">A</td> | ||||
<td align="center" colspan="1" rowspan="1">Checksum required</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t | ||||
d> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">B</td> | ||||
<td align="center" colspan="1" rowspan="1">Extensibility</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t | ||||
d> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">C</td> | ||||
<td align="center" colspan="1" rowspan="1">Do not attempt to estab | ||||
lish new subflows to the source address.</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t | ||||
d> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">D-G</td> | ||||
<td align="center" colspan="1" rowspan="1">Unassigned</td> | ||||
<td align="center" colspan="1" rowspan="1"/> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">H</td> | ||||
<td align="center" colspan="1" rowspan="1">HMAC-SHA256</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/></t | ||||
d> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t pn="section-7.3-4">Note that the meanings of bits "D" through "H" can | ||||
be dependent upon bit "B", | ||||
depending on how the Extensibility parameter is defined in future specific | ||||
ations; see | ||||
<xref target="sec_init" format="default" sectionFormat="of" derivedContent | ||||
="Section 3.1"/> for more information.</t> | ||||
<t pn="section-7.3-5">Future assignments in this registry are also | ||||
to be defined by Standards Action as defined by <xref target="RFC8126" for | ||||
mat="default" sectionFormat="of" derivedContent="RFC8126"/>. | ||||
Assignments consist of the value of the flags, a symbolic name for the alg orithm, | Assignments consist of the value of the flags, a symbolic name for the alg orithm, | |||
and a reference to its specification.</t> | and a reference to its specification.</t> | |||
</section> | ||||
<section anchor="IANA_rst" title="MP_TCPRST Reason Codes"> | ||||
<t>IANA is requested to create a further sub-registry, "MPTCP MP_TCPRST Re | ||||
ason Codes" under the "Transmission Control Protocol (TCP) Parameters" registry, | ||||
based on the reason code in MP_TCPRST (<xref target="sec_reset"/>) message. Ini | ||||
tial values for this registry are given in <xref target="table_rstcodes"/>; futu | ||||
re assignments are to be defined by Specification Required as defined by <xref t | ||||
arget="RFC8126"/>. Assignments consist of the value of the code, a short descrip | ||||
tion of its meaning, and a reference to its specification. The maximum value is | ||||
0xff.</t> | ||||
<t>As guidance to the Designated Expert <xref target="RFC8126"/>, assignme | ||||
nts should not normally be refused unless codepoint space is becoming scarce, pr | ||||
oviding that there is a clear distinction from other, already-existing codes, an | ||||
d also providing there is sufficient guidance for implementors both sending and | ||||
receiving these codes.</t> | ||||
<texttable anchor="table_rstcodes" title="MPTCP MP_TCPRST Reason Codes"> | ||||
<ttcol align="center">Code</ttcol> | ||||
<ttcol align="center">Meaning</ttcol> | ||||
<ttcol align="center">Reference</ttcol> | ||||
<c>0x00</c> | ||||
<c>Unspecified TCP error</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0x01</c> | ||||
<c>MPTCP specific error</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0x02</c> | ||||
<c>Lack of resources</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0x03</c> | ||||
<c>Administratively prohibited</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0x04</c> | ||||
<c>Too much outstanding data</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0x05</c> | ||||
<c>Unacceptable performance</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
<c>0x06</c> | ||||
<c>Middlebox interference</c> | ||||
<c>This document, <xref target="sec_reset"/></c> | ||||
</texttable> | ||||
</section> | </section> | |||
<section anchor="IANA_rst" numbered="true" toc="include" removeInRFC="fals | ||||
e" pn="section-7.4"> | ||||
<name slugifiedName="name-mp_tcprst-reason-codes">MP_TCPRST Reason Codes | ||||
</name> | ||||
<t pn="section-7.4-1">IANA has created a further subregistry, "MPTCP MP_ | ||||
TCPRST | ||||
Reason Codes" under the "Transmission Control Protocol (TCP) | ||||
Parameters" registry, based on the reason code in the MP_TCPRST (<xref t | ||||
arget="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3. | ||||
6"/>) message. Initial values for this registry are given in <xref target="table | ||||
_rstcodes" format="default" sectionFormat="of" derivedContent="Table 4"/>; futur | ||||
e assignments are to be defined by Specification Required as defined by <xref ta | ||||
rget="RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>. A | ||||
ssignments consist of the value of the code, a short description of its meaning, | ||||
and a reference to its specification. The maximum value is 0xff.</t> | ||||
<table anchor="table_rstcodes" align="center" pn="table-4"> | ||||
<name slugifiedName="name-mptcp-mp_tcprst-reason-code">MPTCP MP_TCPRST | ||||
Reason Codes</name> | ||||
<thead> | ||||
<tr> | ||||
<th align="center" colspan="1" rowspan="1">Code</th> | ||||
<th align="center" colspan="1" rowspan="1">Meaning</th> | ||||
<th align="center" colspan="1" rowspan="1">Reference</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x00</td> | ||||
<td align="center" colspan="1" rowspan="1">Unspecified error</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x01</td> | ||||
<td align="center" colspan="1" rowspan="1">MPTCP-specific error</t | ||||
d> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x02</td> | ||||
<td align="center" colspan="1" rowspan="1">Lack of resources</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x03</td> | ||||
<td align="center" colspan="1" rowspan="1">Administratively prohib | ||||
ited</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x04</td> | ||||
<td align="center" colspan="1" rowspan="1">Too much outstanding da | ||||
ta</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x05</td> | ||||
<td align="center" colspan="1" rowspan="1">Unacceptable performanc | ||||
e</td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center" colspan="1" rowspan="1">0x06</td> | ||||
<td align="center" colspan="1" rowspan="1">Middlebox interference< | ||||
/td> | ||||
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target= | ||||
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></ | ||||
td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
<t pn="section-7.4-3">As guidance to the designated expert <xref target= | ||||
"RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>, assign | ||||
ments should not normally be refused unless | ||||
codepoint space is becoming scarce, provided that there is a clear | ||||
distinction from other, already-existing codes and also provided that th | ||||
ere is sufficient guidance for implementers both sending and receiving these cod | ||||
es.</t> | ||||
</section> | ||||
</section> | </section> | |||
</middle> | </middle> | |||
<!-- *****BACK MATTER ***** --> | ||||
<back> | <back> | |||
<displayreference target="I-D.ananth-tcpm-tcpoptext" to="TCPLO"/> | ||||
<references title="Normative References"> | <references pn="section-8"> | |||
&RFC0793; | <name slugifiedName="name-references">References</name> | |||
&RFC2104; | <references pn="section-8.1"> | |||
&RFC2119; | <name slugifiedName="name-normative-references">Normative References</na | |||
&RFC5961; | me> | |||
&RFC6234; | <reference anchor="RFC0793" target="https://www.rfc-editor.org/info/rfc7 | |||
&RFC8174; | 93" quoteTitle="true" derivedAnchor="RFC0793"> | |||
<front> | ||||
<title>Transmission Control Protocol</title> | ||||
<author initials="J." surname="Postel" fullname="J. Postel"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="1981" month="September"/> | ||||
</front> | ||||
<seriesInfo name="STD" value="7"/> | ||||
<seriesInfo name="RFC" value="793"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC0793"/> | ||||
</reference> | ||||
<reference anchor="RFC2104" target="https://www.rfc-editor.org/info/rfc2 | ||||
104" quoteTitle="true" derivedAnchor="RFC2104"> | ||||
<front> | ||||
<title>HMAC: Keyed-Hashing for Message Authentication</title> | ||||
<author initials="H." surname="Krawczyk" fullname="H. Krawczyk"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Bellare" fullname="M. Bellare"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="R." surname="Canetti" fullname="R. Canetti"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="1997" month="February"/> | ||||
<abstract> | ||||
<t>This document describes HMAC, a mechanism for message authentic | ||||
ation using cryptographic hash functions. HMAC can be used with any iterative cr | ||||
yptographic hash function, e.g., MD5, SHA-1, in combination with a secret shared | ||||
key. The cryptographic strength of HMAC depends on the properties of the under | ||||
lying hash function. This memo provides information for the Internet community. | ||||
This memo does not specify an Internet standard of any kind</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="2104"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2104"/> | ||||
</reference> | ||||
<reference anchor="RFC2119" target="https://www.rfc-editor.org/info/rfc2 | ||||
119" quoteTitle="true" derivedAnchor="RFC2119"> | ||||
<front> | ||||
<title>Key words for use in RFCs to Indicate Requirement Levels</tit | ||||
le> | ||||
<author initials="S." surname="Bradner" fullname="S. Bradner"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="1997" month="March"/> | ||||
<abstract> | ||||
<t>In many standards track documents several words are used to sig | ||||
nify the requirements in the specification. These words are often capitalized. | ||||
This document defines these words as they should be interpreted in IETF document | ||||
s. This document specifies an Internet Best Current Practices for the Internet | ||||
Community, and requests discussion and suggestions for improvements.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="14"/> | ||||
<seriesInfo name="RFC" value="2119"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2119"/> | ||||
</reference> | ||||
<reference anchor="RFC5961" target="https://www.rfc-editor.org/info/rfc5 | ||||
961" quoteTitle="true" derivedAnchor="RFC5961"> | ||||
<front> | ||||
<title>Improving TCP's Robustness to Blind In-Window Attacks</title> | ||||
<author initials="A." surname="Ramaiah" fullname="A. Ramaiah"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="R." surname="Stewart" fullname="R. Stewart"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Dalal" fullname="M. Dalal"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2010" month="August"/> | ||||
<abstract> | ||||
<t>TCP has historically been considered to be protected against sp | ||||
oofed off-path packet injection attacks by relying on the fact that it is diffic | ||||
ult to guess the 4-tuple (the source and destination IP addresses and the source | ||||
and destination ports) in combination with the 32-bit sequence number(s). A co | ||||
mbination of increasing window sizes and applications using longer-term connecti | ||||
ons (e.g., H-323 or Border Gateway Protocol (BGP) [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="5961"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC5961"/> | ||||
</reference> | ||||
<reference anchor="RFC6234" target="https://www.rfc-editor.org/info/rfc6 | ||||
234" quoteTitle="true" derivedAnchor="RFC6234"> | ||||
<front> | ||||
<title>US Secure Hash Algorithms (SHA and SHA-based HMAC and HKDF)</ | ||||
title> | ||||
<author initials="D." surname="Eastlake 3rd" fullname="D. Eastlake 3 | ||||
rd"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="T." surname="Hansen" fullname="T. Hansen"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2011" month="May"/> | ||||
<abstract> | ||||
<t>Federal Information Processing Standard, FIPS</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6234"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6234"/> | ||||
</reference> | ||||
<reference anchor="RFC8174" target="https://www.rfc-editor.org/info/rfc8 | ||||
174" quoteTitle="true" derivedAnchor="RFC8174"> | ||||
<front> | ||||
<title>Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words</ti | ||||
tle> | ||||
<author initials="B." surname="Leiba" fullname="B. Leiba"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2017" month="May"/> | ||||
<abstract> | ||||
<t>RFC 2119 specifies common key words that may be used in protoco | ||||
l specifications. This document aims to reduce the ambiguity by clarifying tha | ||||
t only UPPERCASE usage of the key words have the defined special meanings.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="14"/> | ||||
<seriesInfo name="RFC" value="8174"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8174"/> | ||||
</reference> | ||||
</references> | ||||
<references pn="section-8.2"> | ||||
<name slugifiedName="name-informative-references">Informative References | ||||
</name> | ||||
<reference anchor="deployments" target="https://www.ietfjournal.org/mult | ||||
ipath-tcp-deployments/" quoteTitle="true" derivedAnchor="deployments"> | ||||
<front> | ||||
<title abbrev="MPTCP Deployments">Multipath TCP Deployments</title> | ||||
<seriesInfo name="IETF Journal" value="2016"/> | ||||
<author initials="O." surname="Bonaventure" fullname="Olivier Bonave | ||||
nture"> | ||||
<organization showOnFrontPage="true">Universite Catholique de Louv | ||||
ain</organization> | ||||
</author> | ||||
<author initials="S." surname="Seo" fullname="SungHoon Seo"/> | ||||
<date month="November" year="2016"/> | ||||
</front> | ||||
</reference> | ||||
<reference anchor="howhard" target="https://www.usenix.org/conference/ns | ||||
di12/technical-sessions/presentation/raiciu" quoteTitle="true" derivedAnchor="ho | ||||
whard"> | ||||
<front> | ||||
<title abbrev="How Hard Can It Be? Designing and Implementing a Depl | ||||
oyable Multipath TCP">How Hard Can It Be? Designing and Implementing a Deployabl | ||||
e Multipath TCP</title> | ||||
<seriesInfo name="Usenix Symposium on Networked Systems Design and I | ||||
mplementation" value="2012"/> | ||||
<author initials="C." surname="Raiciu" fullname="Costin Raiciu"> | ||||
<organization showOnFrontPage="true">Universitatea Politehnica Buc | ||||
uresti</organization> | ||||
</author> | ||||
<author initials="C." surname="Paasch" fullname="Christoph Paasch"> | ||||
<organization showOnFrontPage="true">Universite Catholique de Louv | ||||
ain</organization> | ||||
</author> | ||||
<author initials="S." surname="Barre" fullname="Sebastien Barre"> | ||||
<organization showOnFrontPage="true">Universite Catholique de Louv | ||||
ain</organization> | ||||
</author> | ||||
<author initials="A." surname="Ford" fullname="Alan Ford"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Honda" fullname="Michio Honda"> | ||||
<organization showOnFrontPage="true">Keio University</organization | ||||
> | ||||
</author> | ||||
<author initials="F." surname="Duchene" fullname="Fabien Duchene"> | ||||
<organization showOnFrontPage="true">Universite Catholique de Louv | ||||
ain</organization> | ||||
</author> | ||||
<author initials="O." surname="Bonaventure" fullname="Olivier Bonave | ||||
nture"> | ||||
<organization showOnFrontPage="true">Universite Catholique de Louv | ||||
ain</organization> | ||||
</author> | ||||
<author initials="M." surname="Handley" fullname="Mark Handley"> | ||||
<organization showOnFrontPage="true">University College London</or | ||||
ganization> | ||||
</author> | ||||
<date month="April" year="2012"/> | ||||
</front> | ||||
</reference> | ||||
<reference anchor="norm" target="https://www.usenix.org/legacy/events/se | ||||
c01/full_papers/handley/handley.pdf" quoteTitle="true" derivedAnchor="norm"> | ||||
<front> | ||||
<title abbrev="Network Intrusion Detection: Evasion, Traffic Normali | ||||
zation, and End-to-End Protocol Semantics">Network Intrusion Detection: Evasion, | ||||
Traffic Normalization, and End-to-End Protocol Semantics</title> | ||||
<seriesInfo name="Usenix Security Symposium" value="2001"/> | ||||
<author initials="M." surname="Handley" fullname="Mark Handley"> | ||||
<organization showOnFrontPage="true">ACIRI</organization> | ||||
</author> | ||||
<author initials="V." surname="Paxson" fullname="Vern Paxson"> | ||||
<organization showOnFrontPage="true">ACIRI</organization> | ||||
</author> | ||||
<author initials="C." surname="Kreibich" fullname="Christian Kreibic | ||||
h"> | ||||
<organization showOnFrontPage="true">Technische Universitat Munche | ||||
n</organization> | ||||
</author> | ||||
<date month="August" year="2001"/> | ||||
</front> | ||||
</reference> | ||||
<reference anchor="RFC1122" target="https://www.rfc-editor.org/info/rfc1 | ||||
122" quoteTitle="true" derivedAnchor="RFC1122"> | ||||
<front> | ||||
<title>Requirements for Internet Hosts - Communication Layers</title | ||||
> | ||||
<author initials="R." surname="Braden" fullname="R. Braden" role="ed | ||||
itor"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="1989" month="October"/> | ||||
<abstract> | ||||
<t>This RFC is an official specification for the Internet communit | ||||
y. It incorporates by reference, amends, corrects, and supplements the primary | ||||
protocol standards documents relating to hosts. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="STD" value="3"/> | ||||
<seriesInfo name="RFC" value="1122"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC1122"/> | ||||
</reference> | ||||
<reference anchor="RFC1918" target="https://www.rfc-editor.org/info/rfc1 | ||||
918" quoteTitle="true" derivedAnchor="RFC1918"> | ||||
<front> | ||||
<title>Address Allocation for Private Internets</title> | ||||
<author initials="Y." surname="Rekhter" fullname="Y. Rekhter"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="B." surname="Moskowitz" fullname="B. Moskowitz"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="D." surname="Karrenberg" fullname="D. Karrenberg"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="G. J." surname="de Groot" fullname="G. J. de Groot | ||||
"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="E." surname="Lear" fullname="E. Lear"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="1996" month="February"/> | ||||
<abstract> | ||||
<t>This document describes address allocation for private internet | ||||
s. This document specifies an Internet Best Current Practices for the Internet | ||||
Community, and requests discussion and suggestions for improvements.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="5"/> | ||||
<seriesInfo name="RFC" value="1918"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC1918"/> | ||||
</reference> | ||||
<reference anchor="RFC2018" target="https://www.rfc-editor.org/info/rfc2 | ||||
018" quoteTitle="true" derivedAnchor="RFC2018"> | ||||
<front> | ||||
<title>TCP Selective Acknowledgment Options</title> | ||||
<author initials="M." surname="Mathis" fullname="M. Mathis"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="J." surname="Mahdavi" fullname="J. Mahdavi"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="S." surname="Floyd" fullname="S. Floyd"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="A." surname="Romanow" fullname="A. Romanow"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="1996" month="October"/> | ||||
<abstract> | ||||
<t>This memo proposes an implementation of SACK and discusses its | ||||
performance and related issues. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="2018"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2018"/> | ||||
</reference> | ||||
<reference anchor="RFC2979" target="https://www.rfc-editor.org/info/rfc2 | ||||
979" quoteTitle="true" derivedAnchor="RFC2979"> | ||||
<front> | ||||
<title>Behavior of and Requirements for Internet Firewalls</title> | ||||
<author initials="N." surname="Freed" fullname="N. Freed"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2000" month="October"/> | ||||
<abstract> | ||||
<t>This memo defines behavioral characteristics of and interoperab | ||||
ility requirements for Internet firewalls. This memo provides information for t | ||||
he Internet community.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="2979"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2979"/> | ||||
</reference> | ||||
<reference anchor="RFC2992" target="https://www.rfc-editor.org/info/rfc2 | ||||
992" quoteTitle="true" derivedAnchor="RFC2992"> | ||||
<front> | ||||
<title>Analysis of an Equal-Cost Multi-Path Algorithm</title> | ||||
<author initials="C." surname="Hopps" fullname="C. Hopps"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2000" month="November"/> | ||||
<abstract> | ||||
<t>Equal-cost multi-path (ECMP) is a routing technique for routing | ||||
packets along multiple paths of equal cost. The forwarding engine identifies p | ||||
aths by next-hop. When forwarding a packet the router must decide which next-ho | ||||
p (path) to use. This document gives an analysis of one method for making that | ||||
decision. The analysis includes the performance of the algorithm and the disrup | ||||
tion caused by changes to the set of next-hops. This memo provides information | ||||
for the Internet community.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="2992"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC2992"/> | ||||
</reference> | ||||
<reference anchor="RFC3022" target="https://www.rfc-editor.org/info/rfc3 | ||||
022" quoteTitle="true" derivedAnchor="RFC3022"> | ||||
<front> | ||||
<title>Traditional IP Network Address Translator (Traditional NAT)</ | ||||
title> | ||||
<author initials="P." surname="Srisuresh" fullname="P. Srisuresh"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="K." surname="Egevang" fullname="K. Egevang"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2001" month="January"/> | ||||
<abstract> | ||||
<t>The NAT operation described in this document extends address tr | ||||
anslation introduced in RFC 1631 and includes a new type of network address and | ||||
TCP/UDP port translation. In addition, this document corrects the Checksum adju | ||||
stment algorithm published in RFC 1631 and attempts to discuss NAT operation and | ||||
limitations in detail. This memo provides information for the Internet communi | ||||
ty.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="3022"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC3022"/> | ||||
</reference> | ||||
<reference anchor="RFC3135" target="https://www.rfc-editor.org/info/rfc3 | ||||
135" quoteTitle="true" derivedAnchor="RFC3135"> | ||||
<front> | ||||
<title>Performance Enhancing Proxies Intended to Mitigate Link-Relat | ||||
ed Degradations</title> | ||||
<author initials="J." surname="Border" fullname="J. Border"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Kojo" fullname="M. Kojo"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="J." surname="Griner" fullname="J. Griner"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="G." surname="Montenegro" fullname="G. Montenegro"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="Z." surname="Shelby" fullname="Z. Shelby"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2001" month="June"/> | ||||
<abstract> | ||||
<t>This document is a survey of Performance Enhancing Proxies (PEP | ||||
s) often employed to improve degraded TCP performance caused by characteristics | ||||
of specific link environments, for example, in satellite, wireless WAN, and wire | ||||
less LAN environments. This memo provides information for the Internet communit | ||||
y.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="3135"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC3135"/> | ||||
</reference> | ||||
<reference anchor="RFC4086" target="https://www.rfc-editor.org/info/rfc4 | ||||
086" quoteTitle="true" derivedAnchor="RFC4086"> | ||||
<front> | ||||
<title>Randomness Requirements for Security</title> | ||||
<author initials="D." surname="Eastlake 3rd" fullname="D. Eastlake 3 | ||||
rd"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="J." surname="Schiller" fullname="J. Schiller"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="S." surname="Crocker" fullname="S. Crocker"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2005" month="June"/> | ||||
<abstract> | ||||
<t>Security systems are built on strong cryptographic algorithms t | ||||
hat foil pattern analysis attempts. However, the security of these systems is d | ||||
ependent on generating secret quantities for passwords, cryptographic keys, and | ||||
similar quantities. The use of pseudo-random processes to generate secret quant | ||||
ities can result in pseudo-security. A sophisticated attacker may find it easier | ||||
to reproduce the environment that produced the secret quantities and to search | ||||
the resulting small set of possibilities than to locate the quantities in the wh | ||||
ole of the potential number space.</t> | ||||
<t>Choosing random quantities to foil a resourceful and motivated | ||||
adversary is surprisingly difficult. This document points out many pitfalls in | ||||
using poor entropy sources or traditional pseudo-random number generation techni | ||||
ques for generating such quantities. It recommends the use of truly random hard | ||||
ware techniques and shows that the existing hardware on many systems can be used | ||||
for this purpose. It provides suggestions to ameliorate the problem when a hard | ||||
ware solution is not available, and it gives examples of how large such quantiti | ||||
es need to be for some applications. This document specifies an Internet Best C | ||||
urrent Practices for the Internet Community, and requests discussion and suggest | ||||
ions for improvements.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="106"/> | ||||
<seriesInfo name="RFC" value="4086"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC4086"/> | ||||
</reference> | ||||
<reference anchor="RFC4987" target="https://www.rfc-editor.org/info/rfc4 | ||||
987" quoteTitle="true" derivedAnchor="RFC4987"> | ||||
<front> | ||||
<title>TCP SYN Flooding Attacks and Common Mitigations</title> | ||||
<author initials="W." surname="Eddy" fullname="W. Eddy"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2007" month="August"/> | ||||
<abstract> | ||||
<t>This document describes TCP SYN flooding attacks, which have be | ||||
en well-known to the community for several years. Various countermeasures again | ||||
st these attacks, and the trade-offs of each, are described. This document arch | ||||
ives explanations of the attack and common defense techniques for the benefit of | ||||
TCP implementers and administrators of TCP servers or networks, but does not ma | ||||
ke any standards-level recommendations. This memo provides information for the | ||||
Internet community.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="4987"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC4987"/> | ||||
</reference> | ||||
<reference anchor="RFC5681" target="https://www.rfc-editor.org/info/rfc5 | ||||
681" quoteTitle="true" derivedAnchor="RFC5681"> | ||||
<front> | ||||
<title>TCP Congestion Control</title> | ||||
<author initials="M." surname="Allman" fullname="M. Allman"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="V." surname="Paxson" fullname="V. Paxson"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="E." surname="Blanton" fullname="E. Blanton"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2009" month="September"/> | ||||
<abstract> | ||||
<t>This document defines TCP's four intertwined congestion control | ||||
algorithms: slow start, congestion avoidance, fast retransmit, and fast recover | ||||
y. In addition, the document specifies how TCP should begin transmission after | ||||
a relatively long idle period, as well as discussing various acknowledgment gene | ||||
ration methods. This document obsoletes RFC 2581. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="5681"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC5681"/> | ||||
</reference> | ||||
<reference anchor="RFC6181" target="https://www.rfc-editor.org/info/rfc6 | ||||
181" quoteTitle="true" derivedAnchor="RFC6181"> | ||||
<front> | ||||
<title>Threat Analysis for TCP Extensions for Multipath Operation wi | ||||
th Multiple Addresses</title> | ||||
<author initials="M." surname="Bagnulo" fullname="M. Bagnulo"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2011" month="March"/> | ||||
<abstract> | ||||
<t>Multipath TCP (MPTCP for short) describes the extensions propos | ||||
ed for TCP so that endpoints of a given TCP connection can use multiple paths to | ||||
exchange data. Such extensions enable the exchange of segments using different | ||||
source-destination address pairs, resulting in the capability of using multiple | ||||
paths in a significant number of scenarios. Some level of multihoming and mobi | ||||
lity support can be achieved through these extensions. However, the support for | ||||
multiple IP addresses per endpoint may have implications on the security of the | ||||
resulting MPTCP. This note includes a threat analysis for MPTCP. This document | ||||
is not an Internet Standards Track specification; it is published for informati | ||||
onal purposes.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6181"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6181"/> | ||||
</reference> | ||||
<reference anchor="RFC6182" target="https://www.rfc-editor.org/info/rfc6 | ||||
182" quoteTitle="true" derivedAnchor="RFC6182"> | ||||
<front> | ||||
<title>Architectural Guidelines for Multipath TCP Development</title | ||||
> | ||||
<author initials="A." surname="Ford" fullname="A. Ford"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="C." surname="Raiciu" fullname="C. Raiciu"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Handley" fullname="M. Handley"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="S." surname="Barre" fullname="S. Barre"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="J." surname="Iyengar" fullname="J. Iyengar"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2011" month="March"/> | ||||
<abstract> | ||||
<t>Hosts are often connected by multiple paths, but TCP restricts | ||||
communications to a single path per transport connection. Resource usage within | ||||
the network would be more efficient were these multiple paths able to be used c | ||||
oncurrently. This should enhance user experience through improved resilience to | ||||
network failure and higher throughput.</t> | ||||
<t>This document outlines architectural guidelines for the develop | ||||
ment of a Multipath Transport Protocol, with references to how these architectur | ||||
al components come together in the development of a Multipath TCP (MPTCP). This | ||||
document lists certain high-level design decisions that provide foundations for | ||||
the design of the MPTCP protocol, based upon these architectural requirements. | ||||
This document is not an Internet Standards Track specification; it is publishe | ||||
d for informational purposes.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6182"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6182"/> | ||||
</reference> | ||||
<reference anchor="RFC6356" target="https://www.rfc-editor.org/info/rfc6 | ||||
356" quoteTitle="true" derivedAnchor="RFC6356"> | ||||
<front> | ||||
<title>Coupled Congestion Control for Multipath Transport Protocols< | ||||
/title> | ||||
<author initials="C." surname="Raiciu" fullname="C. Raiciu"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Handley" fullname="M. Handley"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="D." surname="Wischik" fullname="D. Wischik"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2011" month="October"/> | ||||
<abstract> | ||||
<t>Often endpoints are connected by multiple paths, but communicat | ||||
ions are usually restricted to a single path per connection. Resource usage wit | ||||
hin the network would be more efficient were it possible for these multiple path | ||||
s to be used concurrently. Multipath TCP is a proposal to achieve multipath tra | ||||
nsport in TCP.</t> | ||||
<t>New congestion control algorithms are needed for multipath tran | ||||
sport protocols such as Multipath TCP, as single path algorithms have a series o | ||||
f issues in the multipath context. One of the prominent problems is that runnin | ||||
g existing algorithms such as standard TCP independently on each path would give | ||||
the multipath flow more than its fair share at a bottleneck link traversed by m | ||||
ore than one of its subflows. Further, it is desirable that a source with multi | ||||
ple paths available will transfer more traffic using the least congested of the | ||||
paths, achieving a property called "resource pooling" where a bundle of links ef | ||||
fectively behaves like one shared link with bigger capacity. This would increas | ||||
e the overall efficiency of the network and also its robustness to failure.</t> | ||||
<t>This document presents a congestion control algorithm that coup | ||||
les the congestion control algorithms running on different subflows by linking t | ||||
heir increase functions, and dynamically controls the overall aggressiveness of | ||||
the multipath flow. The result is a practical algorithm that is fair to TCP at | ||||
bottlenecks while moving traffic away from congested links. This document defin | ||||
es an Experimental Protocol for the Internet community.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6356"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6356"/> | ||||
</reference> | ||||
<reference anchor="RFC6528" target="https://www.rfc-editor.org/info/rfc6 | ||||
528" quoteTitle="true" derivedAnchor="RFC6528"> | ||||
<front> | ||||
<title>Defending against Sequence Number Attacks</title> | ||||
<author initials="F." surname="Gont" fullname="F. Gont"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="S." surname="Bellovin" fullname="S. Bellovin"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2012" month="February"/> | ||||
<abstract> | ||||
<t>This document specifies an algorithm for the generation of TCP | ||||
Initial Sequence Numbers (ISNs), such that the chances of an off-path attacker g | ||||
uessing the sequence numbers in use by a target connection are reduced. This do | ||||
cument revises (and formally obsoletes) RFC 1948, and takes the ISN generation a | ||||
lgorithm originally proposed in that document to Standards Track, formally updat | ||||
ing RFC 793. [STANDARDS-TRACK]</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6528"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6528"/> | ||||
</reference> | ||||
<reference anchor="RFC6824" target="https://www.rfc-editor.org/info/rfc6 | ||||
824" quoteTitle="true" derivedAnchor="RFC6824"> | ||||
<front> | ||||
<title>TCP Extensions for Multipath Operation with Multiple Addresse | ||||
s</title> | ||||
<author initials="A." surname="Ford" fullname="A. Ford"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="C." surname="Raiciu" fullname="C. Raiciu"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="M." surname="Handley" fullname="M. Handley"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="O." surname="Bonaventure" fullname="O. Bonaventure | ||||
"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2013" month="January"/> | ||||
<abstract> | ||||
<t>TCP/IP communication is currently restricted to a single path p | ||||
er connection, yet multiple paths often exist between peers. The simultaneous u | ||||
se of these multiple paths for a TCP/IP session would improve resource usage wit | ||||
hin the network and, thus, improve user experience through higher throughput and | ||||
improved resilience to network failure.</t> | ||||
<t>Multipath TCP provides the ability to simultaneously use multip | ||||
le paths between peers. This document presents a set of extensions to tradition | ||||
al TCP to support multipath operation. The protocol offers the same type of ser | ||||
vice to applications as TCP (i.e., reliable bytestream), and it provides the com | ||||
ponents necessary to establish and use multiple TCP flows across potentially dis | ||||
joint paths. This document defines an Experimental Protocol for the Internet c | ||||
ommunity.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6824"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6824"/> | ||||
</reference> | ||||
<reference anchor="RFC6897" target="https://www.rfc-editor.org/info/rfc6 | ||||
897" quoteTitle="true" derivedAnchor="RFC6897"> | ||||
<front> | ||||
<title>Multipath TCP (MPTCP) Application Interface Considerations</t | ||||
itle> | ||||
<author initials="M." surname="Scharf" fullname="M. Scharf"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="A." surname="Ford" fullname="A. Ford"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2013" month="March"/> | ||||
<abstract> | ||||
<t>Multipath TCP (MPTCP) adds the capability of using multiple pat | ||||
hs to a regular TCP session. Even though it is designed to be totally backward | ||||
compatible to applications, the data transport differs compared to regular TCP, | ||||
and there are several additional degrees of freedom that applications may wish t | ||||
o exploit. This document summarizes the impact that MPTCP may have on applicati | ||||
ons, such as changes in performance. Furthermore, it discusses compatibility is | ||||
sues of MPTCP in combination with non-MPTCP-aware applications. Finally, the doc | ||||
ument describes a basic application interface that is a simple extension of TCP' | ||||
s interface for MPTCP-aware applications.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="6897"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC6897"/> | ||||
</reference> | ||||
<reference anchor="RFC7323" target="https://www.rfc-editor.org/info/rfc7 | ||||
323" quoteTitle="true" derivedAnchor="RFC7323"> | ||||
<front> | ||||
<title>TCP Extensions for High Performance</title> | ||||
<author initials="D." surname="Borman" fullname="D. Borman"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="B." surname="Braden" fullname="B. Braden"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="V." surname="Jacobson" fullname="V. Jacobson"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="R." surname="Scheffenegger" fullname="R. Scheffene | ||||
gger" role="editor"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2014" month="September"/> | ||||
<abstract> | ||||
<t>This document specifies a set of TCP extensions to improve perf | ||||
ormance over paths with a large bandwidth * delay product and to provide reliabl | ||||
e operation over very high-speed paths. It defines the TCP Window Scale (WS) op | ||||
tion and the TCP Timestamps (TS) option and their semantics. The Window Scale o | ||||
ption is used to support larger receive windows, while the Timestamps option can | ||||
be used for at least two distinct mechanisms, Protection Against Wrapped Sequen | ||||
ces (PAWS) and Round-Trip Time Measurement (RTTM), that are also described herei | ||||
n.</t> | ||||
<t>This document obsoletes RFC 1323 and describes changes from it. | ||||
</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="7323"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC7323"/> | ||||
</reference> | ||||
<reference anchor="RFC7413" target="https://www.rfc-editor.org/info/rfc7 | ||||
413" quoteTitle="true" derivedAnchor="RFC7413"> | ||||
<front> | ||||
<title>TCP Fast Open</title> | ||||
<author initials="Y." surname="Cheng" fullname="Y. Cheng"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="J." surname="Chu" fullname="J. Chu"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="S." surname="Radhakrishnan" fullname="S. Radhakris | ||||
hnan"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="A." surname="Jain" fullname="A. Jain"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2014" month="December"/> | ||||
<abstract> | ||||
<t>This document describes an experimental TCP mechanism called TC | ||||
P Fast Open (TFO). TFO allows data to be carried in the SYN and SYN-ACK packets | ||||
and consumed by the receiving end during the initial connection handshake, and | ||||
saves up to one full round-trip time (RTT) compared to the standard TCP, which r | ||||
equires a three-way handshake (3WHS) to complete before data can be exchanged. | ||||
However, TFO deviates from the standard TCP semantics, since the data in the SYN | ||||
could be replayed to an application in some rare circumstances. Applications s | ||||
hould not use TFO unless they can tolerate this issue, as detailed in the Applic | ||||
ability section.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="7413"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC7413"/> | ||||
</reference> | ||||
<reference anchor="RFC7430" target="https://www.rfc-editor.org/info/rfc7 | ||||
430" quoteTitle="true" derivedAnchor="RFC7430"> | ||||
<front> | ||||
<title>Analysis of Residual Threats and Possible Fixes for Multipath | ||||
TCP (MPTCP)</title> | ||||
<author initials="M." surname="Bagnulo" fullname="M. Bagnulo"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="C." surname="Paasch" fullname="C. Paasch"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="F." surname="Gont" fullname="F. Gont"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="O." surname="Bonaventure" fullname="O. Bonaventure | ||||
"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="C." surname="Raiciu" fullname="C. Raiciu"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2015" month="July"/> | ||||
<abstract> | ||||
<t>This document analyzes the residual threats for Multipath TCP ( | ||||
MPTCP) and explores possible solutions to address them.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="7430"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC7430"/> | ||||
</reference> | ||||
<reference anchor="RFC8041" target="https://www.rfc-editor.org/info/rfc8 | ||||
041" quoteTitle="true" derivedAnchor="RFC8041"> | ||||
<front> | ||||
<title>Use Cases and Operational Experience with Multipath TCP</titl | ||||
e> | ||||
<author initials="O." surname="Bonaventure" fullname="O. Bonaventure | ||||
"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="C." surname="Paasch" fullname="C. Paasch"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="G." surname="Detal" fullname="G. Detal"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2017" month="January"/> | ||||
<abstract> | ||||
<t>This document discusses both use cases and operational experien | ||||
ce with Multipath TCP (MPTCP) in real networks. It lists several prominent use | ||||
cases where Multipath TCP has been considered and is being used. It also gives | ||||
insight to some heuristics and decisions that have helped to realize these use c | ||||
ases and suggests possible improvements.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="RFC" value="8041"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8041"/> | ||||
</reference> | ||||
<reference anchor="RFC8126" target="https://www.rfc-editor.org/info/rfc8 | ||||
126" quoteTitle="true" derivedAnchor="RFC8126"> | ||||
<front> | ||||
<title>Guidelines for Writing an IANA Considerations Section in RFCs | ||||
</title> | ||||
<author initials="M." surname="Cotton" fullname="M. Cotton"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="B." surname="Leiba" fullname="B. Leiba"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<author initials="T." surname="Narten" fullname="T. Narten"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date year="2017" month="June"/> | ||||
<abstract> | ||||
<t>Many protocols make use of points of extensibility that use con | ||||
stants to identify various protocol parameters. To ensure that the values in th | ||||
ese fields do not have conflicting uses and to promote interoperability, their a | ||||
llocations are often coordinated by a central record keeper. For IETF protocols | ||||
, that role is filled by the Internet Assigned Numbers Authority (IANA).</t> | ||||
<t>To make assignments in a given registry prudently, guidance des | ||||
cribing the conditions under which new values should be assigned, as well as whe | ||||
n and how modifications to existing values can be made, is needed. This documen | ||||
t defines a framework for the documentation of these guidelines by specification | ||||
authors, in order to assure that the provided guidance for the IANA Considerati | ||||
ons is clear and addresses the various issues that are likely in the operation o | ||||
f a registry.</t> | ||||
<t>This is the third edition of this document; it obsoletes RFC 52 | ||||
26.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="BCP" value="26"/> | ||||
<seriesInfo name="RFC" value="8126"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC8126"/> | ||||
</reference> | ||||
<reference anchor="I-D.ananth-tcpm-tcpoptext" quoteTitle="true" target=" | ||||
https://tools.ietf.org/html/draft-ananth-tcpm-tcpoptext-00" derivedAnchor="TCPLO | ||||
"> | ||||
<front> | ||||
<title>TCP option space extension</title> | ||||
<author initials="A" surname="Ramaiah" fullname="Anantha Ramaiah"> | ||||
<organization showOnFrontPage="true"/> | ||||
</author> | ||||
<date month="March" day="26" year="2012"/> | ||||
<abstract> | ||||
<t>The document goals are as follows: Firstly, this document summa | ||||
rizes the motivations for extending TCP option space. Secondly, It tries to sum | ||||
marize the various known issues that needs to be taken into account while extend | ||||
ing the TCP option space. Thirdly, it briefly provides a short summary of the v | ||||
arious TCP option space proposals that has been proposed so far. Some additiona | ||||
l proposals which includes variations to the existing proposals are also present | ||||
ed. The goal of this document is to rejuvenate the discussions on this topic and | ||||
eventually to converge on a scheme for extending TCP option space.</t> | ||||
</abstract> | ||||
</front> | ||||
<seriesInfo name="Internet-Draft" value="draft-ananth-tcpm-tcpoptext-0 | ||||
0"/> | ||||
<format type="TXT" target="http://www.ietf.org/internet-drafts/draft-a | ||||
nanth-tcpm-tcpoptext-00.txt"/> | ||||
<refcontent>Work in Progress</refcontent> | ||||
</reference> | ||||
</references> | ||||
</references> | </references> | |||
<section anchor="app_options" numbered="true" toc="include" removeInRFC="fal | ||||
se" pn="section-appendix.a"> | ||||
<name slugifiedName="name-notes-on-use-of-tcp-options">Notes on Use of TCP | ||||
Options</name> | ||||
<t pn="section-appendix.a-1">The TCP option space is limited due to the le | ||||
ngth of the Data Offset field in the TCP header (4 bits), which defines the TCP | ||||
header length in 32-bit words. With the standard TCP header being 20 bytes, this | ||||
leaves a maximum of 40 bytes for options, and many of these may already be used | ||||
by options such as timestamp and SACK.</t> | ||||
<t pn="section-appendix.a-2">We performed a brief study on the commonly us | ||||
ed TCP options in SYN, | ||||
data, and pure ACK packets and found that there is enough room | ||||
to fit all the options discussed in this document.</t> | ||||
<t pn="section-appendix.a-3">SYN packets typically include the following o | ||||
ptions: Maximum Segment Size (MSS) (4 bytes), | ||||
window scale (3 bytes), SACK permitted (2 bytes), and timestamp | ||||
(10 bytes). The sum of these options is 19 bytes. Some operating | ||||
systems appear to pad each option up to a word boundary, thus using 24 | ||||
bytes (a brief survey suggests that Windows XP and Mac OS X do this, where | ||||
as Linux does not). | ||||
<references title="Informative References"> | Optimistically, therefore, we have 21 bytes available, or 16 if options ha | |||
&RFC1122; | ve to be | |||
&RFC7323; | word-aligned. In either case, however, the SYN versions of | |||
&RFC1918; | MP_CAPABLE (12 bytes) and MP_JOIN (12 or 16 bytes) will fit in this remain | |||
&RFC2018; | ing space.</t> | |||
&RFC5681; | <t pn="section-appendix.a-4">Note that due to the use of a 64-bit data-lev | |||
&RFC2979; | el sequence space, it is | |||
&RFC2992; | feasible that MPTCP will not require the timestamp option for | |||
&RFC3022; | protection against wrapped sequence numbers (per the Protection | |||
&RFC3135; | Against Wrapped Sequences (PAWS) mechanism, as described in <xref target=" | |||
&RFC4086; | RFC7323" format="default" sectionFormat="of" derivedContent="RFC7323"/>), since | |||
&RFC4987; | the data-level sequence space has far less | |||
&RFC8126; | chance of wrapping. Confirmation of the validity of this optimization is | |||
&RFC6181; | left for further study.</t> | |||
&RFC6356; | <t pn="section-appendix.a-5">TCP data packets typically carry timestamp op | |||
&RFC6897; | tions in every packet, | |||
&RFC6182; | taking 10 bytes (or 12, with padding). That leaves 30 bytes (or 28, if | |||
&RFC6528; | word-aligned). The DSS option varies in length, depending on (1) whether t | |||
&RFC7413; | he | |||
&RFC7430; | Data Sequence Mapping, DATA_ACK, or both are included, (2) whether the | |||
&RFC8041; | sequence numbers in use are 4 or 8 octets, and (3) whether the | |||
checksum is present. The maximum size of the DSS option is 28 bytes, so ev | ||||
<!-- &TCPLO; draft-ananth-tcpm-tcpoptext-00; Expired--> | en that will fit in the available space. But unless a connection is both bidirec | |||
tional and high-bandwidth, it is unlikely that all that option space will be req | ||||
<reference anchor='TCPLO'> | uired on each DSS option.</t> | |||
<front> | <t pn="section-appendix.a-6">Within the DSS option, it is not necessary to | |||
<title>TCP option space extension</title> | include the Data Sequence Mapping and DATA_ACK in each packet, and in many case | |||
s it may be possible to alternate their presence (so long as the mapping covers | ||||
<author initials='A' surname='Ramaiah' fullname='Anantha Ramaiah'> | the data being sent in the subsequent packet). It would also be possible to alte | |||
<organization /> | rnate between 4-byte and 8-byte sequence numbers in each option.</t> | |||
</author> | <t pn="section-appendix.a-7">On subflow and connection setup, an MPTCP opt | |||
ion is also set on the third packet (an ACK). These are 20 bytes (for MP_CAPABLE | ||||
<date month='March' day='26' year='2012' /> | ) and 24 bytes (for MP_JOIN), both of which will fit in the available option spa | |||
ce.</t> | ||||
<abstract><t>The document goals are as follows: Firstly, this document summarize | <t pn="section-appendix.a-8">Pure ACKs in TCP typically contain only times | |||
s the motivations for extending TCP option space. Secondly, It tries to summari | tamps (10 bytes). Here, Multipath TCP typically | |||
ze the various known issues that needs to be taken into account while extending | ||||
the TCP option space. Thirdly, it briefly provides a short summary of the vario | ||||
us TCP option space proposals that has been proposed so far. Some additional pr | ||||
oposals which includes variations to the existing proposals are also presented. | ||||
The goal of this document is to rejuvenate the discussions on this topic and eve | ||||
ntually to converge on a scheme for extending TCP option space.</t></abstract> | ||||
</front> | ||||
<seriesInfo name='Work in' value='Progress' /> | ||||
</reference> | ||||
<reference anchor='norm' target="http://www.usenix.org/events/sec01/full_papers/ | ||||
handley/handley.pdf"><front><title abbrev="Network Intrusion Detection: Evasion, | ||||
Traffic Normalization, and End-to-End Protocol Semantics ">Network Intrusion De | ||||
tection: Evasion, Traffic Normalization, and End-to-End Protocol Semantics</titl | ||||
e><author initials='M.' surname='Handley' fullname='Mark Handley'><organization> | ||||
ACIRI</organization></author><author initials='V.' surname='Paxson' fullname='Ve | ||||
rn Paxson'><organization>ACIRI</organization></author><author initials='C.' surn | ||||
ame='Kreibich' fullname='Christian Kreibich'><organization>Technische Universita | ||||
t Munchen</organization></author><date year="2001"/></front><seriesInfo name="Us | ||||
enix Security" value="2001"/></reference> | ||||
<reference anchor='howhard' target="https://www.usenix.org/conference/nsdi12/how | ||||
-hard-can-it-be-designing-and-implementing-deployable-multipath-tcp"> | ||||
<front><title abbrev="How Hard Can It Be? Designing and Implementing a Deployabl | ||||
e Multipath TCP">How Hard Can It Be? Designing and Implementing a Deployable Mul | ||||
tipath TCP</title> | ||||
<author initials='C.' surname='Raiciu' fullname='Costin Raiciu'><organization>Un | ||||
iversitatea Politehnica Bucuresti</organization></author> | ||||
<author initials='C.' surname='Paasch' fullname='Christoph Paasch'><organization | ||||
>Universite Catholique de Louvain</organization></author> | ||||
<author initials='S.' surname='Barre' fullname='Sebastien Barre'><organization>U | ||||
niversite Catholique de Louvain</organization></author> | ||||
<author initials='A.' surname='Ford' fullname='Alan Ford'><organization/></autho | ||||
r> | ||||
<author initials='M.' surname='Honda' fullname='Michio Honda'><organization>Keio | ||||
University</organization></author> | ||||
<author initials='F.' surname='Duchene' fullname='Fabien Duchene'><organization> | ||||
Universite Catholique de Louvain</organization></author> | ||||
<author initials='O.' surname='Bonaventure' fullname='Olivier Bonaventure'><orga | ||||
nization>Universite Catholique de Louvain</organization></author> | ||||
<author initials='M.' surname='Handley' fullname='Mark Handley'><organization>Un | ||||
iversity College London</organization></author> | ||||
<date year="2012" /> | ||||
</front> | ||||
<seriesInfo name="Usenix Symposium on Networked Systems Design and Implementatio | ||||
n" value="2012"/> | ||||
</reference> | ||||
<reference anchor='deployments' target="https://www.ietfjournal.org/multipath-tc | ||||
p-deployments/"><front><title abbrev="MPTCP Deployments">Multipath TCP Deploymen | ||||
ts</title><author initials='O.' surname='Bonaventure' fullname='Olivier Bonavent | ||||
ure'><organization>Universite Catholique de Louvain</organization></author><auth | ||||
or initials='S.' surname='Seo' fullname='SungHoon Seo'></author><date day="1" mo | ||||
nth="November" year="2016"/></front><seriesInfo name="IETF Journal" value="2016" | ||||
/></reference> | ||||
</references> | ||||
<section title="Notes on Use of TCP Options" anchor="app_options"> | ||||
<t>The TCP option space is limited due to the length of the Data Offset fi | ||||
eld in the TCP header (4 bits), which defines the TCP header length in 32-bit wo | ||||
rds. With the standard TCP header being 20 bytes, this leaves a maximum of 40 by | ||||
tes for options, and many of these may already be used by options such as timest | ||||
amp and SACK.</t> | ||||
<t>We have performed a brief study on the commonly used TCP options in SYN | ||||
, data, and pure ACK packets, and found that there is enough room to fit all the | ||||
options we propose using in this document.</t> | ||||
<t>SYN packets typically include Maximum Segment Size (MSS) (4 bytes), win | ||||
dow scale (3 bytes), SACK permitted (2 bytes), and timestamp (10 bytes) options. | ||||
Together these sum to 19 bytes. Some operating systems appear to pad each optio | ||||
n up to a word boundary, thus using 24 bytes (a brief survey suggests Windows XP | ||||
and Mac OS X do this, whereas Linux does not). | ||||
Optimistically, therefore, we have 21 bytes spare, or 16 if it has to be w | ||||
ord-aligned. In either case, however, the SYN versions of Multipath Capable (12 | ||||
bytes) and Join (12 or 16 bytes) options will fit in this remaining space.</t> | ||||
<t>Note that due to the use of a 64-bit data-level sequence space, it is f | ||||
easible that MPTCP will not require the timestamp option for protection against | ||||
wrapped sequence numbers (PAWS <xref target="RFC7323"/>), since the data-level s | ||||
equence space has far less chance of wrapping. Confirmation of the validity of t | ||||
his optimisation is for further study.</t> | ||||
<t>TCP data packets typically carry timestamp options in every packet, tak | ||||
ing 10 bytes (or 12 with padding). That leaves 30 bytes (or 28, if word-aligned) | ||||
. The Data Sequence Signal (DSS) option varies in length depending on whether th | ||||
e data sequence mapping and DATA_ACK are included, and whether the sequence numb | ||||
ers in use are 4 or 8 octets. The maximum size of the DSS option is 28 bytes, so | ||||
even that will fit in the available space. But unless a connection is both bidi | ||||
rectional and high-bandwidth, it is unlikely that all that option space will be | ||||
required on each DSS option.</t> | ||||
<t>Within the DSS option, it is not necessary to include the data sequence | ||||
mapping and DATA_ACK in each packet, and in many cases it may be possible to al | ||||
ternate their presence (so long as the mapping covers the data being sent in the | ||||
following packet). It would also be possible to alternate between 4- and 8-byte | ||||
sequence numbers in each option.</t> | ||||
<t>On subflow and connection setup, an MPTCP option is also set on the thi | ||||
rd packet (an ACK). These are 20 bytes (for Multipath Capable) and 24 bytes (for | ||||
Join), both of which will fit in the available option space.</t> | ||||
<t>Pure ACKs in TCP typically contain only timestamps (10 bytes). Here, Mu | ||||
ltipath TCP typically | ||||
needs to encode only the DATA_ACK (maximum of 12 bytes). Occasionally, ACKs will contain SACK information. Depending | needs to encode only the DATA_ACK (maximum of 12 bytes). Occasionally, ACKs will contain SACK information. Depending | |||
on the number of lost packets, SACK may utilize the entire option space. If a DA TA_ACK had to be | on the number of lost packets, SACK may utilize the entire option space. If a DA TA_ACK had to be | |||
included, then it is probably necessary to reduce the number of SACK blocks to a ccommodate the | included, then it is probably necessary to reduce the number of SACK blocks to a ccommodate the | |||
DATA_ACK. However, the presence of the DATA_ACK is unlikely to be necessary in a case where SACK is | DATA_ACK. However, the presence of the DATA_ACK is unlikely to be necessary in a case where SACK is | |||
in use, since until at least some of the SACK blocks have been retransmitted, th e cumulative | in use, since until at least some of the SACK blocks have been retransmitted, th e cumulative | |||
data-level ACK will not be moving forward (or if it does, due to retransmissions on another path, | data-level ACK will not be moving forward (or if it does, due to retransmissions on another path, | |||
then that path can also be used to transmit the new DATA_ACK).</t> | then that path can also be used to transmit the new DATA_ACK).</t> | |||
<t pn="section-appendix.a-9">The ADD_ADDR option can be between 16 and 30 | ||||
<t>The ADD_ADDR option can be between 16 and 30 bytes, depending on whethe | bytes, depending on | |||
r IPv4 or IPv6 is used, and whether or not the port number is present. It is unl | (1) whether IPv4 or IPv6 is used and (2) whether or not the port number is | |||
ikely that such signaling would fit in a data packet (although if there is space | present. It is unlikely that such signaling would fit in a data packet | |||
, it is fine to include it). It is recommended to use duplicate ACKs with no oth | (although if there is space, it is fine to include it). It is | |||
er payload or options in order to transmit these rare signals. Note this is the | recommended that duplicate ACKs not be used with any other payload or opti | |||
reason for mandating that duplicate ACKs with MPTCP options are not taken as a s | ons, in | |||
ignal of congestion.</t> | order to transmit these rare signals. Note that this is the reason for | |||
mandating that duplicate ACKs with MPTCP options not be taken as a signal | ||||
of congestion.</t> | ||||
</section> | </section> | |||
<section anchor="app_tfo" numbered="true" toc="include" removeInRFC="false" | ||||
<section title="TCP Fast Open and MPTCP" anchor="app_tfo"> | pn="section-appendix.b"> | |||
<t>TCP Fast Open (TFO) is an experimental TCP extension, described in | <name slugifiedName="name-tcp-fast-open-and-mptcp">TCP Fast Open and MPTCP | |||
<xref target="RFC7413"/>, which has been introduced to allow sending data | </name> | |||
<t pn="section-appendix.b-1">TCP Fast Open (TFO) is an experimental TCP ex | ||||
tension, described in | ||||
<xref target="RFC7413" format="default" sectionFormat="of" derivedContent= | ||||
"RFC7413"/>, which has been introduced to | ||||
allow the sending of data | ||||
one RTT earlier than with regular TCP. This is | one RTT earlier than with regular TCP. This is | |||
considered a valuable gain as very short connections are very common, | considered a valuable gain, as very short connections are very common, | |||
especially for HTTP request/response schemes. It achieves this by sending | especially for HTTP request/response schemes. It achieves this by sending | |||
the SYN-segment together with the application's data and allowing the list | the SYN segment together with the application's data and allowing the list | |||
ener to reply | ener to reply | |||
immediately with data after the SYN/ACK. <xref target="RFC7413"/> secures | immediately with data after the SYN/ACK. <xref target="RFC7413" format="de | |||
this mechanism, by using a new TCP option that includes a cookie which | fault" sectionFormat="of" derivedContent="RFC7413"/> secures | |||
this mechanism by using a new TCP option that includes a cookie that | ||||
is negotiated in a preceding connection.</t> | is negotiated in a preceding connection.</t> | |||
<t pn="section-appendix.b-2">When using TFO in conjunction with MPTCP, the | ||||
re are two key | ||||
points to take into account, as detailed below.</t> | ||||
<section anchor="tfocookie" numbered="true" toc="include" removeInRFC="fal | ||||
se" pn="section-b.1"> | ||||
<name slugifiedName="name-tfo-cookie-request-with-mpt">TFO Cookie Reques | ||||
t with MPTCP</name> | ||||
<t pn="section-b.1-1">When a TFO initiator first connects to a listener, | ||||
it cannot immediately | ||||
include data in the SYN for security reasons <xref target="RFC7413" fo | ||||
rmat="default" sectionFormat="of" derivedContent="RFC7413"/>. | ||||
Instead, it requests a cookie that will be used in subsequent | ||||
connections. This is done with the TCP cookie request/response options | ||||
, | ||||
of 2 bytes and 6-18 bytes, respectively (depending on the chosen cooki | ||||
e length).</t> | ||||
<t pn="section-b.1-2">TFO and MPTCP can be combined, provided that the t | ||||
otal length of all the | ||||
options does not exceed the maximum 40 bytes possible in TCP: | ||||
<t>When using TCP Fast Open in conjunction with MPTCP, there are two key | </t> | |||
points to take into account, detailed hereafter.</t> | <ul spacing="normal" bare="false" empty="false" pn="section-b.1-3"> | |||
<li pn="section-b.1-3.1">In the SYN: MPTCP uses a 4-byte MP_CAPABLE op | ||||
<section title="TFO cookie request with MPTCP" anchor="tfocookie"> | tion. The sum | |||
<t>When a TFO initiator first connects to a listener, it cannot immedia | of the MPTCP and TFO options is 6 bytes. With typical TCP options usin | |||
tely | g up | |||
include data in the SYN for security reasons <xref target="RFC7413"/>. | to 19 bytes in the SYN (24 bytes if options are padded at a word bound | |||
Instead, it requests a cookie that will be used in subsequent | ary), | |||
connections. This is done with the TCP cookie request/response options, | there is enough space to combine the MP_CAPABLE with the TFO cookie re | |||
of respectively 2 bytes and 6-18 bytes (depending on the chosen cookie | quest.</li> | |||
length).</t> | <li pn="section-b.1-3.2">In the SYN + ACK: MPTCP uses a 12-byte MP_CAP | |||
ABLE option, but | ||||
<t>TFO and MPTCP can be combined provided that the total length of all | now the TFO option can be as long as 18 bytes. Since the maximum optio | |||
the | n length | |||
options does not exceed the maximum 40 bytes possible in TCP: | may be exceeded, it is up to the listener to avoid this problem by usi | |||
ng a | ||||
<list style="symbols"> | shorter cookie. | |||
<t>In the SYN: MPTCP uses a 4-bytes long MP_CAPABLE option. The MPTCP | As an example, if we consider that 19 bytes are used for classical | |||
and TFO options sum up to 6 bytes. With typical TCP-options using up | TCP options, the maximum possible cookie length would be | |||
to 19 bytes in the SYN (24 bytes if options are padded at a word bounda | 7 bytes. Note that, for the SYN packet, the same limitation applies to | |||
ry), | subsequent | |||
there is enough space to combine the MP_CAPABLE with the TFO Cookie Req | connections (because the initiator then echoes | |||
uest.</t> | the cookie back to the listener). Finally, if the security impact of r | |||
educing | ||||
<t>In the SYN+ACK: MPTCP uses a 12-bytes long MP_CAPABLE option, but | the cookie size is not deemed acceptable, the listener can reduce the | |||
now TFO can be as long as 18 bytes. Since the maximum option length | amount of space used by other TCP options by omitting the TCP timestam | |||
may be exceeded, it is up to the listener to solve this by using a | ps (as | |||
shorter cookie. | outlined in <xref target="app_options" format="default" sectionFormat= | |||
As an example, if we consider that 19 bytes are used for classical | "of" derivedContent="Appendix A"/>).</li> | |||
TCP options, the maximum possible cookie length would be | </ul> | |||
of 7 bytes. Note that the same limitation applies to subsequent | </section> | |||
connections, for the SYN packet (because the initiator then echoes back | <section anchor="tfodata" numbered="true" toc="include" removeInRFC="false | |||
the cookie to the listener). Finally, if the security impact of reducin | " pn="section-b.2"> | |||
g | <name slugifiedName="name-data-sequence-mapping-under">Data Sequence Map | |||
the cookie size is not deemed acceptable, the listener can reduce the | ping under TFO</name> | |||
amount of other TCP-options by omitting the TCP timestamps (as | <t pn="section-b.2-1">In the TCP establishment phase, MPTCP uses a key e | |||
outlined in <xref target="app_options"/>).</t> | xchange that is | |||
</list></t> | used to generate the Initial Data Sequence Numbers (IDSNs). In particu | |||
</section> | lar, | |||
the SYN with MP_CAPABLE occupies the first octet of data sequence | ||||
<section title="Data sequence mapping under TFO" anchor="tfodata"> | space. With TFO, one way to handle the data sent together with the SYN | |||
<t>MPTCP uses, in the TCP establishment phase, a key exchange that is | would be to consider an implicit DSS mapping that covers that SYN segm | |||
used to generate the Initial Data Sequence Numbers (IDSNs). In particul | ent | |||
ar, | (since there is not enough space in the SYN to include a DSS option). | |||
the SYN with MP_CAPABLE occupies the first octet of the data sequence | The problem with that approach is that if a middlebox modifies the TFO | |||
space. With TFO, one way to handle the data sent together with the SYN | data, this will not be noticed by MPTCP because of the absence of a | |||
would be to consider an implicit DSS mapping that covers that SYN segme | DSS checksum. For example, a TCP‑aware (but not MPTCP-aware) middlebox | |||
nt | could | |||
(since there is not enough space in the SYN to include a DSS option). | insert bytes at the beginning of the stream and adapt the TCP checksum | |||
The problem with that approach is that if a middlebox modifies the TFO | and sequence numbers accordingly. With an implicit mapping, this infor | |||
data, this will not be noticed by MPTCP because of the absence of a | mation would | |||
DSS-checksum. For example, a TCP (but not MPTCP)-aware middlebox could | give to the initiator and listener a different view of the DSS | |||
insert bytes at the beginning of the stream and adapt the TCP checksum | mapping; there would be no | |||
and sequence numbers accordingly. With an implicit mapping, this would | way to detect this inconsistency, because the DSS checksum is not pres | |||
give to initiator and listener a different view on the DSS-mapping, wit | ent.</t> | |||
h no | <t pn="section-b.2-2">To solve this issue, the TFO data must not be cons | |||
way to detect this inconsistency as the DSS checksum is not present.</t | idered part of the | |||
> | data sequence number space: the SYN with MP_CAPABLE still occupies | |||
the first octet of data sequence space, but then the first non-TFO | ||||
<t>To solve this, the TFO data must not be considered part of the | data byte occupies the second octet. This guarantees that, if the | |||
Data Sequence Number space: the SYN with MP_CAPABLE still occupies | use of the DSS checksum is negotiated, all data in the data sequence | |||
the first octet of data sequence space, but then the first non-TFO | number space is checksummed. We also note that this does not entail | |||
data byte occupies the second octet. This guarantees that, if the | a loss of functionality, because TFO data is always only sent on the | |||
use of DSS-checksum is negotiated, all data in the data sequence | initial subflow, before any attempt to create additional subflows.</t> | |||
number space is checksummed. We also note that this does not entail | </section> | |||
a loss of functionality, because TFO-data is always only sent on the | <section anchor="tfoexamples" numbered="true" toc="include" removeInRFC="f | |||
initial subflow before any attempt to create additional subflows.</t> | alse" pn="section-b.3"> | |||
</section> | <name slugifiedName="name-connection-establishment-ex">Connection Establ | |||
ishment Examples</name> | ||||
<section title="Connection establishment examples" anchor="tfoexamples"> | <t pn="section-b.3-1">A few examples of possible "TFO + MPTCP" | |||
<t>The following shows a few examples of possible TFO+MPTCP | establishment scenarios are shown below.</t> | |||
establishment scenarios.</t> | <t pn="section-b.3-2">Before an initiator can send data together with th | |||
e SYN, it must request | ||||
<t>Before an initiator can send data together with the SYN, it must re | a cookie from the listener, as shown in <xref target="fig_tfocookie" f | |||
quest | ormat="default" sectionFormat="of" derivedContent="Figure 18"/>. (Note: The se | |||
a cookie to the listener, as shown in <xref target="fig_tfocookie"/>. | quence number | |||
This is done by simply combining the TFO and MPTCP options.</t> | and length are annotated in <xref target="fig_tfocookie" format="default" sectio | |||
nFormat="of" derivedContent="Figure 18"/> as | ||||
<figure align="center" anchor="fig_tfocookie" title="Cookie request - | Seq(Length) (e.g., "S. 0(0)") and used as such in the subsequent figures | |||
sequence number and length are annotated as Seq(Length) and used hereafter in th | (e.g., "S 0(20)" in <xref target="fig_tfodata" format="default" section | |||
e figures."> | Format="of" derivedContent="Figure 19"/>).) This is done by simply combining the | |||
<artwork align="left"><![CDATA[ | TFO and MPTCP options.</t> | |||
initiator listener | <figure anchor="fig_tfocookie" align="left" suppress-title="false" pn="f | |||
| | | igure-18"> | |||
| S Seq=0(Length=0) <MP_CAPABLE>, <TFO cookie request> | | <name slugifiedName="name-cookie-request">Cookie Request</name> | |||
| -----------------------------------------------------------> | | <artwork align="left" name="" type="" alt="" pn="section-b.3-3.1"> | |||
| | | initiator listener | |||
| S. 0(0) ack 1 <MP_CAPABLE>, <TFO cookie> | | | | | |||
| <----------------------------------------------------------- | | | S Seq=0(Length=0) <MP_CAPABLE>, <TFO cookie request> | | |||
| | | | --------------------------------------------------------> | | |||
| . 0(0) ack 1 <MP_CAPABLE> | | | | | |||
| -----------------------------------------------------------> | | | S. 0(0) ack 1 <MP_CAPABLE>, <TFO cookie> | | |||
| | | | <-------------------------------------------------------- | | |||
]]></artwork> | | | | |||
</figure> | | . 0(0) ack 1 <MP_CAPABLE> | | |||
| --------------------------------------------------------> | | ||||
<t>Once this is done, the received cookie can be used for TFO, as show | | | </artwork> | |||
n | </figure> | |||
in <xref target="fig_tfodata"/>. In this example, the initiator first | <t pn="section-b.3-4">Once this is done, the received cookie can be used | |||
sends 20 bytes in the SYN. The listener immediately replies with 100 by | for TFO, as shown | |||
tes | in <xref target="fig_tfodata" format="default" sectionFormat="of" deri | |||
following the SYN-ACK upon which the initiator replies with 20 more byt | vedContent="Figure 19"/>. In this example, the initiator first | |||
es. | sends 20 bytes in the SYN. The listener immediately replies with 100 b | |||
Note that the last segment in the figure | ytes | |||
following the SYN-ACK, to which the initiator replies with 20 more byt | ||||
es. | ||||
Note that the last segment in the figure | ||||
has a TCP sequence number of 21, while the DSS subflow sequence | has a TCP sequence number of 21, while the DSS subflow sequence | |||
number is 1 (because the TFO data is not part of the data sequence | number is 1 (because the TFO data is not part of the data sequence | |||
number space, as explained in Section <xref target="tfodata"/>.</t> | number space, as explained in <xref target="tfodata" format="default" | |||
sectionFormat="of" derivedContent="Appendix B.2"/>.</t> | ||||
<figure align="center" anchor="fig_tfodata" title="The listener support | <figure anchor="fig_tfodata" align="left" suppress-title="false" pn="fig | |||
s TFO"> | ure-19"> | |||
<artwork align="left"><![CDATA[ | <name slugifiedName="name-the-listener-supports-tfo">The Listener Supp | |||
initiator listener | orts TFO</name> | |||
| | | <artwork align="left" name="" type="" alt="" pn="section-b.3-5.1"> | |||
| S 0(20) <MP_CAPABLE>, <TFO cookie> | | initiator listener | |||
| -----------------------------------------------------------> | | | | | |||
| | | | S 0(20) <MP_CAPABLE>, <TFO cookie> | | |||
| S. 0(0) ack 21 <MP_CAPABLE> | | | --------------------------------------------------------> | | |||
| <----------------------------------------------------------- | | | | | |||
| | | | S. 0(0) ack 21 <MP_CAPABLE> | | |||
| . 1(100) ack 21 <DSS ack=1 seq=1 ssn=1 dlen=100> | | | <-------------------------------------------------------- | | |||
| <----------------------------------------------------------- | | | | | |||
| | | | . 1(100) ack 21 <DSS ack=1 seq=1 ssn=1 dlen=100> | | |||
| . 21(0) ack 1 <MP_CAPABLE> | | | <-------------------------------------------------------- | | |||
| -----------------------------------------------------------> | | | | | |||
| | | | . 21(0) ack 1 <MP_CAPABLE> | | |||
| . 21(20) ack 101 <DSS ack=101 seq=1 ssn=1 dlen=20> | | | --------------------------------------------------------> | | |||
| -----------------------------------------------------------> | | | | | |||
| | | | . 21(20) ack 101 <DSS ack=101 seq=1 ssn=1 dlen=20> | | |||
]]></artwork> | | --------------------------------------------------------> | | |||
</figure> | | | </artwork> | |||
</figure> | ||||
<t>In <xref target="fig_tfofallback"/>, the listener does not support | <t pn="section-b.3-6">In <xref target="fig_tfofallback" format="default" | |||
TFO. The initiator detects | sectionFormat="of" derivedContent="Figure 20"/>, the listener does not support | |||
that no state is created in the listener (as no data is acked), and no | TFO. The initiator detects | |||
w | that no state is created in the listener (as no data is ACKed) and now | |||
sends the MP_CAPABLE in the third ack, in order for the listener to | sends the MP_CAPABLE in the third packet, in order for the listener to | |||
build its MPTCP context at then end of the establishment. Now, the | build its MPTCP context at the end of the establishment. Now, the | |||
tfo data, retransmitted, becomes part of the data sequence mapping | TFO data, when retransmitted, becomes part of the Data Sequence Mappin | |||
because it is effectively sent (in fact re-sent) after the | g | |||
because it is effectively sent (in fact re‑sent) after the | ||||
establishment.</t> | establishment.</t> | |||
<figure anchor="fig_tfofallback" align="left" suppress-title="false" pn= | ||||
<figure align="center" anchor="fig_tfofallback" title="The listener doe | "figure-20"> | |||
s not support TFO"> | <name slugifiedName="name-the-listener-does-not-suppo">The Listener Do | |||
<artwork align="left"><![CDATA[ | es Not Support TFO</name> | |||
initiator listener | <artwork align="left" name="" type="" alt="" pn="section-b.3-7.1"> | |||
| | | initiator listener | |||
| S 0(20) <MP_CAPABLE>, <TFO cookie> | | | | | |||
| -----------------------------------------------------------> | | | S 0(20) <MP_CAPABLE>, <TFO cookie> | | |||
| | | | --------------------------------------------------------> | | |||
| S. 0(0) ack 1 <MP_CAPABLE> | | | | | |||
| <----------------------------------------------------------- | | | S. 0(0) ack 1 <MP_CAPABLE> | | |||
| | | | <-------------------------------------------------------- | | |||
| . 1(0) ack 1 <MP_CAPABLE> | | | | | |||
| -----------------------------------------------------------> | | | . 1(0) ack 1 <MP_CAPABLE> | | |||
| | | | --------------------------------------------------------> | | |||
| . 1(20) ack 1 <DSS ack=1 seq=1 ssn=1 dlen=20> | | | | | |||
| -----------------------------------------------------------> | | | . 1(20) ack 1 <DSS ack=1 seq=1 ssn=1 dlen=20> | | |||
| | | | --------------------------------------------------------> | | |||
| . 0(0) ack 21 <DSS ack=21 seq=1 ssn=1 dlen=0> | | | | | |||
| <----------------------------------------------------------- | | | . 0(0) ack 21 <DSS ack=21 seq=1 ssn=1 dlen=0> | | |||
| | | | <-------------------------------------------------------- | | |||
]]></artwork> | | | </artwork> | |||
</figure> | </figure> | |||
<t pn="section-b.3-8">It is also possible that the listener acknowledges | ||||
<t>It is also possible that the listener acknowledges only part of the | only part of the TFO | |||
TFO | data, as illustrated in <xref target="fig_tfopartial" format="default" | |||
data, as illustrated in <xref target="fig_tfopartial"/>. The | sectionFormat="of" derivedContent="Figure 21"/>. The | |||
initiator will simply retransmit the missing data together with a DSS-m | initiator will simply retransmit the missing data together with a | |||
apping.</t> | DSS mapping.</t> | |||
<figure anchor="fig_tfopartial" align="left" suppress-title="false" pn=" | ||||
<figure align="center" anchor="fig_tfopartial" title="Partial data ackn | figure-21"> | |||
owledgement"> | <name slugifiedName="name-partial-data-acknowledgment">Partial Data Ac | |||
<artwork align="left"><![CDATA[ | knowledgment</name> | |||
initiator listener | <artwork align="left" name="" type="" alt="" pn="section-b.3-9.1"> | |||
| | | initiator listener | |||
| S 0(1000) <MP_CAPABLE>, <TFO cookie> | | | | | |||
| -----------------------------------------------------------> | | | S 0(1000) <MP_CAPABLE>, <TFO cookie> | | |||
| | | | --------------------------------------------------------> | | |||
| S. 0(0) ack 501 <MP_CAPABLE> | | | | | |||
| <----------------------------------------------------------- | | | S. 0(0) ack 501 <MP_CAPABLE> | | |||
| | | | <-------------------------------------------------------- | | |||
| . 501(0) ack 1 <MP_CAPABLE> | | | | | |||
| -----------------------------------------------------------> | | | . 501(0) ack 1 <MP_CAPABLE> | | |||
| | | | --------------------------------------------------------> | | |||
| . 501(500) ack 1 <DSS ack=1 seq=1 ssn=1 dlen=500> | | | | | |||
| -----------------------------------------------------------> | | | . 501(500) ack 1 <DSS ack=1 seq=1 ssn=1 dlen=500> | | |||
| | | | --------------------------------------------------------> | | |||
]]></artwork> | | | </artwork> | |||
</figure> | </figure> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="app_tcb" numbered="true" toc="include" removeInRFC="false" | ||||
<section title="Control Blocks" anchor="app_tcb"> | pn="section-appendix.c"> | |||
<t>Conceptually, an MPTCP connection can be represented as an MPTCP protocol con | <name slugifiedName="name-control-blocks">Control Blocks</name> | |||
trol | <t pn="section-appendix.c-1">Conceptually, an MPTCP connection can be repr | |||
esented as an MPTCP protocol control | ||||
block (PCB) that contains several variables that track the progress and the | block (PCB) that contains several variables that track the progress and the | |||
state of the MPTCP connection and a set of linked TCP control blocks | state of the MPTCP connection and a set of linked TCP control blocks | |||
that correspond to the subflows that have been established.</t> | that correspond to the subflows that have been established.</t> | |||
<t pn="section-appendix.c-2">RFC 793 <xref target="RFC0793" format="defaul | ||||
<t>RFC 793 <xref target="RFC0793"/> specifies several state variables. Whenever | t" sectionFormat="of" derivedContent="RFC0793"/> specifies several state variabl | |||
possible, we reuse | es. Whenever possible, we reuse | |||
the same terminology as RFC 793 to describe the state variables that are | the same terminology as RFC 793 to describe the state variables that are | |||
maintained by MPTCP.</t> | maintained by MPTCP.</t> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-c.1 | ||||
<section title="MPTCP Control Block"> | "> | |||
<t>The MPTCP control block contains the following variable per connection.</t> | <name slugifiedName="name-mptcp-control-block">MPTCP Control Block</name | |||
> | ||||
<section title="Authentication and Metadata"> | <t pn="section-c.1-1">The MPTCP control block contains the following var | |||
<t><list style="hanging"> | iables per connection.</t> | |||
<t hangText="Local.Token (32 bits):"> This is the token chosen by the local host | <section numbered="true" toc="include" removeInRFC="false" pn="section-c | |||
on | .1.1"> | |||
<name slugifiedName="name-authentication-and-metadata">Authentication | ||||
and Metadata</name> | ||||
<dl newline="false" spacing="normal" indent="3" pn="section-c.1.1-1"> | ||||
<dt pn="section-c.1.1-1.1">Local.Token (32 bits):</dt> | ||||
<dd pn="section-c.1.1-1.2"> This is the token chosen by the local ho | ||||
st on | ||||
this MPTCP connection. The token must be unique among all established | this MPTCP connection. The token must be unique among all established | |||
MPTCP connections, and is generated from the local key.</t> | MPTCP connections and is generated from the local key.</dd> | |||
<t hangText="Local.Key (64 bits):"> This is the key sent by the local host on th | <dt pn="section-c.1.1-1.3">Local.Key (64 bits):</dt> | |||
is | <dd pn="section-c.1.1-1.4"> This is the key sent by the local host o | |||
MPTCP connection.</t> | n this | |||
<t hangText="Remote.Token (32 bits):"> This is the token chosen by the remote ho | MPTCP connection.</dd> | |||
st on | <dt pn="section-c.1.1-1.5">Remote.Token (32 bits):</dt> | |||
this MPTCP connection, generated from the remote key.</t> | <dd pn="section-c.1.1-1.6"> This is the token chosen by the remote h | |||
<t hangText="Remote.Key (64 bits):"> This is the key chosen by the remote host o | ost on | |||
n | this MPTCP connection, generated from the remote key.</dd> | |||
this MPTCP connection</t> | <dt pn="section-c.1.1-1.7">Remote.Key (64 bits):</dt> | |||
<t hangText="MPTCP.Checksum (flag):"> This flag is set to true if at least one o | <dd pn="section-c.1.1-1.8"> This is the key chosen by the remote hos | |||
f the | t on | |||
hosts has set the A bit in the MP_CAPABLE options exchanged during connection es | this MPTCP connection.</dd> | |||
tablishment, | <dt pn="section-c.1.1-1.9">MPTCP.Checksum (flag):</dt> | |||
and is set to false otherwise. If this flag is set, the checksum must be comput | <dd pn="section-c.1.1-1.10"> This flag is set to true if at least on | |||
ed in | e of the | |||
all DSS options.</t> | hosts has set the "A" bit in the MP_CAPABLE options exchanged during | |||
</list></t> | connection establishment; otherwise, | |||
</section> | it is set to false. If this flag is set, the checksum must be computed in | |||
all DSS options.</dd> | ||||
<section title="Sending Side"> | </dl> | |||
<t><list style="hanging"> | </section> | |||
<t hangText="SND.UNA (64 bits):"> This is the data sequence number of the next b | <section numbered="true" toc="include" removeInRFC="false" pn="section-c | |||
yte to be | .1.2"> | |||
<name slugifiedName="name-sending-side">Sending Side</name> | ||||
<dl newline="false" spacing="normal" indent="3" pn="section-c.1.2-1"> | ||||
<dt pn="section-c.1.2-1.1">SND.UNA (64 bits):</dt> | ||||
<dd pn="section-c.1.2-1.2"> This is the data sequence number of the | ||||
next byte to be | ||||
acknowledged, at the MPTCP connection level. This variable is updated | acknowledged, at the MPTCP connection level. This variable is updated | |||
upon reception of a DSS option containing a DATA_ACK.</t> | upon reception of a DSS option containing a DATA_ACK.</dd> | |||
<t hangText="SND.NXT (64 bits):"> This is the data sequence number of the next b | <dt pn="section-c.1.2-1.3">SND.NXT (64 bits):</dt> | |||
yte to be | <dd pn="section-c.1.2-1.4"> This is the data sequence number of the | |||
sent. SND.NXT is used to determine the value of the DSN in the DSS option.</t> | next byte to be | |||
<t hangText="SND.WND (32 bits with RFC 7323, 16 bits otherwise):"> This is the s | sent. SND.NXT is used to determine the value of the DSN in the DSS option.</dd> | |||
ending window. MPTCP | <dt pn="section-c.1.2-1.5">SND.WND (32 bits):</dt> | |||
maintains the sending window at the MPTCP connection level and the same | <dd pn="section-c.1.2-1.6"> This is the send window. 32 bits if the | |||
window is shared by all subflows. All subflows use the MPTCP connection | features in RFC | |||
level SND.WND to compute the SEQ.WND value that is sent in each | 7323 are used; 16 bits otherwise. MPTCP maintains the send window at | |||
transmitted segment.</t> | the MPTCP connection level, and the same | |||
</list></t> | window is shared by all subflows. All subflows use the MPTCP connection-level | |||
</section> | SND.WND to compute the SEQ.WND value that is sent in each | |||
transmitted segment.</dd> | ||||
<section title="Receiving Side"> | </dl> | |||
<t><list style="hanging"> | </section> | |||
<t hangText="RCV.NXT (64 bits):"> This is the data sequence number of the next b | <section numbered="true" toc="include" removeInRFC="false" pn="section-c | |||
yte that | .1.3"> | |||
<name slugifiedName="name-receiving-side">Receiving Side</name> | ||||
<dl newline="false" spacing="normal" indent="3" pn="section-c.1.3-1"> | ||||
<dt pn="section-c.1.3-1.1">RCV.NXT (64 bits):</dt> | ||||
<dd pn="section-c.1.3-1.2"> This is the data sequence number of the | ||||
next byte that | ||||
is expected on the MPTCP connection. This state variable is modified | is expected on the MPTCP connection. This state variable is modified | |||
upon reception of in-order data. The value of RCV.NXT is used to specify | upon reception of in-order data. The value of RCV.NXT is used to specify | |||
the DATA_ACK that is sent in the DSS option on all subflows.</t> | the DATA_ACK that is sent in the DSS option on all subflows.</dd> | |||
<t hangText="RCV.WND (32 bits with RFC 7323, 16 bits otherwise):"> This is the c | <dt pn="section-c.1.3-1.3">RCV.WND (32 bits):</dt> | |||
onnection-level | <dd pn="section-c.1.3-1.4"> This is the connection-level receive win | |||
receive window, which is the maximum of the RCV.WND on all the subflows.</t> | dow, which is the | |||
</list></t> | maximum of the RCV.WND on all the subflows. 32 bits if the features | |||
</section> | in RFC 7323 are used; 16 bits otherwise.</dd> | |||
</section> | </dl> | |||
</section> | ||||
<section title="TCP Control Blocks"> | </section> | |||
<t>The MPTCP control block also contains a list of the TCP control blocks | <section numbered="true" toc="include" removeInRFC="false" pn="section-c.2 | |||
"> | ||||
<name slugifiedName="name-tcp-control-blocks">TCP Control Blocks</name> | ||||
<t pn="section-c.2-1">The MPTCP control block also contains a list of th | ||||
e TCP control blocks | ||||
that are associated with the MPTCP connection.</t> | that are associated with the MPTCP connection.</t> | |||
<t pn="section-c.2-2">Note that the TCP control block on the TCP subflow | ||||
<t>Note that the TCP control block on the TCP subflows does not contain the | s does not contain the | |||
RCV.WND and SND.WND state variables as these are maintained at the MPTCP | RCV.WND and SND.WND state variables, as these are maintained at the MPTCP | |||
connection level and not at the subflow level.</t> | connection level and not at the subflow level.</t> | |||
<t pn="section-c.2-3">Inside each TCP control block, the following state | ||||
<t>Inside each TCP control block, the following state variables are defined.</t> | variables are defined.</t> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-c | ||||
<section title="Sending Side"> | .2.1"> | |||
<t><list style="hanging"> | <name slugifiedName="name-sending-side-2">Sending Side</name> | |||
<t hangText="SND.UNA (32 bits):"> This is the sequence number of the next byte t | <dl newline="false" spacing="normal" indent="3" pn="section-c.2.1-1"> | |||
o be | <dt pn="section-c.2.1-1.1">SND.UNA (32 bits):</dt> | |||
<dd pn="section-c.2.1-1.2"> This is the sequence number of the next | ||||
byte to be | ||||
acknowledged on the subflow. This variable is updated upon reception of | acknowledged on the subflow. This variable is updated upon reception of | |||
each TCP acknowledgment on the subflow.</t> | each TCP acknowledgment on the subflow.</dd> | |||
<t hangText="SND.NXT (32 bits):"> This is the sequence number of the next byte t | <dt pn="section-c.2.1-1.3">SND.NXT (32 bits):</dt> | |||
o be | <dd pn="section-c.2.1-1.4"> This is the sequence number of the next | |||
byte to be | ||||
sent on the subflow. SND.NXT is used to set the value of SEG.SEQ upon | sent on the subflow. SND.NXT is used to set the value of SEG.SEQ upon | |||
transmission of the next segment.</t> | transmission of the next segment.</dd> | |||
</list></t> | </dl> | |||
</section> | </section> | |||
<section numbered="true" toc="include" removeInRFC="false" pn="section-c | ||||
<section title="Receiving Side"> | .2.2"> | |||
<t><list style="hanging"> | <name slugifiedName="name-receiving-side-2">Receiving Side</name> | |||
<t hangText="RCV.NXT (32 bits):"> This is the sequence number of the next byte t | <dl newline="false" spacing="normal" indent="3" pn="section-c.2.2-1"> | |||
hat | <dt pn="section-c.2.2-1.1">RCV.NXT (32 bits):</dt> | |||
<dd pn="section-c.2.2-1.2"> This is the sequence number of the next | ||||
byte that | ||||
is expected on the subflow. This state variable is modified upon | is expected on the subflow. This state variable is modified upon | |||
reception of in-order segments. The value of RCV.NXT is copied to the | reception of in-order segments. The value of RCV.NXT is copied to the | |||
SEG.ACK field of the next segments transmitted on the subflow.</t> | SEG.ACK field of the next segments transmitted on the subflow.</dd> | |||
<t hangText="RCV.WND (32 bits with RFC 7323, 16 bits otherwise):"> This is the | <dt pn="section-c.2.2-1.3">RCV.WND (32 bits):</dt> | |||
subflow-level receive window that is updated with the window field from the | <dd pn="section-c.2.2-1.4">This is the subflow-level receive window | |||
segments received on this subflow.</t> | that is updated with | |||
</list></t> | the window field from the segments received on this subflow. 32 | |||
</section> | bits if the features in RFC 7323 are used; 16 bits otherwise.</dd> | |||
</section> | </dl> | |||
</section> | ||||
</section> | </section> | |||
</section> | ||||
<section title="Finite State Machine" anchor="app_fsm"> | <section anchor="app_fsm" numbered="true" toc="include" removeInRFC="false" | |||
<t>The diagram in <xref target="fig_fsm"/> shows the Finite State Machine | pn="section-appendix.d"> | |||
for connection-level closure. This illustrates how the DATA_FIN connection-leve | <name slugifiedName="name-finite-state-machine">Finite State Machine</name | |||
l signal (indicated in the diagram as the DFIN flag on a DATA_ACK) interacts wit | > | |||
h subflow-level FINs, and permits "break-before-make" handover between subflows. | <t pn="section-appendix.d-1">The diagram in <xref target="fig_fsm" format= | |||
</t> | "default" sectionFormat="of" derivedContent="Figure 22"/> shows the | |||
Finite State Machine for connection-level closure. This illustrates how | ||||
<figure align="center" anchor="fig_fsm" title="Finite State Machine for Co | the DATA_FIN connection-level signal (indicated in the diagram as the | |||
nnection Closure"> | DFIN flag on a DATA_ACK) (1) interacts with subflow-level FINs and (2) per | |||
<artwork align="left"><![CDATA[ | mits break-before-make handover between subflows.</t> | |||
+---------+ | <figure anchor="fig_fsm" align="left" suppress-title="false" pn="figure-22 | |||
| M_ESTAB | | "> | |||
+---------+ | <name slugifiedName="name-finite-state-machine-for-co">Finite State Mach | |||
M_CLOSE | | rcv DATA_FIN | ine for Connection Closure</name> | |||
------- | | ------- | <artwork align="left" name="" type="" alt="" pn="section-appendix.d-2.1" | |||
+---------+ snd DATA_FIN / \ snd DATA_ACK[DFIN] +---------+ | > | |||
| M_FIN |<----------------- ------------------->| M_CLOSE | | +---------+ | |||
| WAIT-1 |--------------------------- | WAIT | | | M_ESTAB | | |||
+---------+ rcv DATA_FIN \ +---------+ | +---------+ | |||
| rcv DATA_ACK[DFIN] ------- | M_CLOSE | | M_CLOSE | | rcv DATA_FIN | |||
| -------------- snd DATA_ACK | ------- | | ------- | | ------- | |||
| CLOSE all subflows | snd DATA_FIN | | +---------+ snd DATA_FIN / \ snd DATA_ACK[DFIN] +-------+ | |||
V V V | | M_FIN |<----------------- ------------------->|M_CLOSE| | |||
+-----------+ +-----------+ +-----------+ | | WAIT-1 |--------------------------- | WAIT | | |||
|M_FINWAIT-2| | M_CLOSING | | M_LAST-ACK| | +---------+ rcv DATA_FIN \ +-------+ | |||
+-----------+ +-----------+ +-----------+ | | rcv DATA_ACK[DFIN] ------- | M_CLOSE | | |||
| rcv DATA_ACK[DFIN] | rcv DATA_ACK[DFIN] | | | -------------- snd DATA_ACK | ------- | | |||
| rcv DATA_FIN -------------- | -------------- | | | CLOSE all subflows | snd DATA_FIN | | |||
| ------- CLOSE all subflows | CLOSE all subflows | | V V V | |||
| snd DATA_ACK[DFIN] V delete MPTCP PCB V | +-----------+ +-----------+ +----------+ | |||
\ +-----------+ +---------+ | |M_FINWAIT-2| | M_CLOSING | |M_LAST-ACK| | |||
------------------------>|M_TIME WAIT|----------------->| M_CLOSED| | +-----------+ +-----------+ +----------+ | |||
+-----------+ +---------+ | | rcv DATA_ACK[DFIN] | rcv DATA_ACK[DFIN] | | |||
All subflows in CLOSED | | rcv DATA_FIN -------------- | -------------- | | |||
------------ | | ------- CLOSE all subflows | CLOSE all subflows | | |||
delete MPTCP PCB | | snd DATA_ACK[DFIN] V delete MPTCP PCB V | |||
]]></artwork> | \ +-----------+ +--------+ | |||
------------------------>|M_TIME WAIT|---------------->|M_CLOSED| | ||||
+-----------+ +--------+ | ||||
All subflows in CLOSED | ||||
------------ | ||||
delete MPTCP PCB </artwork> | ||||
</figure> | </figure> | |||
</section> | </section> | |||
<section anchor="app_changelog" numbered="true" toc="include" removeInRFC="f | ||||
alse" pn="section-appendix.e"> | ||||
<name slugifiedName="name-changes-from-rfc-6824">Changes from RFC 6824</na | ||||
me> | ||||
<t pn="section-appendix.e-1">This appendix lists the key technical changes | ||||
between <xref target="RFC6824" format="default" sectionFormat="of" derivedConte | ||||
nt="RFC6824"/>, | ||||
which specifies MPTCP v0; and this document, which obsoletes <xref target= | ||||
"RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> and spe | ||||
cifies MPTCP v1. Note that this specification is not backward compatible with <x | ||||
ref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824 | ||||
"/>. | ||||
<section title="Changes from RFC6824" anchor="app_changelog"> | </t> | |||
<t>This section lists the key technical changes between RFC6824, specifyin | <ul spacing="normal" bare="false" empty="false" pn="section-appendix.e-2"> | |||
g MPTCP v0, and this document, which obsoletes RFC6824 and specifies MPTCP v1. N | <li pn="section-appendix.e-2.1">This document incorporates lessons learn | |||
ote that this specification is not backwards compatible with RFC6824. | ed from the various implementations, deployments, and experiments gathered in th | |||
e documents "Use Cases and Operational Experience with Multipath TCP" <xref targ | ||||
<list style="symbols"> | et="RFC8041" format="default" sectionFormat="of" derivedContent="RFC8041"/> and | |||
<t>The document incorporates lessons learnt from the various implementat | the IETF Journal article "Multipath TCP Deployments" <xref target="deployments" | |||
ions, deployments and experiments gathered in the documents "Use Cases and Opera | format="default" sectionFormat="of" derivedContent="deployments"/>.</li> | |||
tional Experience with Multipath TCP" <xref target="RFC8041"/> and the IETF Jour | <li pn="section-appendix.e-2.2">Connection initiation, through the excha | |||
nal article "Multipath TCP Deployments" <xref target="deployments"/>.</t> | nge of the MP_CAPABLE | |||
<t>Connection initiation, through the exchange of the MP_CAPABLE MPTCP o | MPTCP option, is different from <xref target="RFC6824" format="default" | |||
ption, is different from RFC6824. The SYN no longer includes the initiator's key | sectionFormat="of" derivedContent="RFC6824"/>. The SYN no longer | |||
, allowing the MP_CAPABLE option on the SYN to be shorter in length, and to avoi | includes the initiator's key, to allow the MP_CAPABLE option on the SYN | |||
d duplicating the sending of keying material.</t> | to be shorter in length and to avoid duplicating the sending of keying material. | |||
<t>This also ensures reliable delivery of the key on the MP_CAPABLE opti | </li> | |||
on by allowing its transmission to be combined with data and thus using TCP's in | <li pn="section-appendix.e-2.3">This also ensures reliable delivery of t | |||
-built reliability mechanism. If the initiator does not immediately have data to | he key on the MP_CAPABLE | |||
send, the MP_CAPABLE option with the keys will be repeated on the first data pa | option by allowing its transmission to be combined with data and thus | |||
cket. If the other end is first to send, then the presence of the DSS option imp | using TCP's built-in reliability mechanism. If the initiator does not | |||
licitly confirms the receipt of the MP_CAPABLE.</t> | immediately have data to send, the MP_CAPABLE option with the keys | |||
<t>In the Flags field of MP_CAPABLE, C is now assigned to mean that the | will be repeated on the first data packet. If the other end is the first | |||
sender of this option will not accept additional MPTCP subflows to the source ad | to send, then the presence of the DSS option implicitly confirms the receipt of | |||
dress and port. This is an efficiency improvement, for example where the sender | the MP_CAPABLE.</li> | |||
is behind a strict NAT.</t> | <li pn="section-appendix.e-2.4">In the Flags field of MP_CAPABLE, "C" is | |||
<t>In the Flags field of MP_CAPABLE, H now indicates the use of HMAC-SHA | now assigned to mean that | |||
256 (rather than HMAC-SHA1).</t> | the sender of this option will not accept additional MPTCP subflows to | |||
<t>Connection initiation also defines the procedure for version negotiat | the source address and port. This improves efficiency -- for example, | |||
ion, for implementations that support both v0 (RFC6824) and v1 (this document).< | in cases where the sender is behind a strict NAT.</li> | |||
/t> | <li pn="section-appendix.e-2.5">In the Flags field of MP_CAPABLE, "H" no | |||
<t>The HMAC-SHA256 (rather than HMAC-SHA1) algorithm is used, as the alg | w indicates the use of HMAC-SHA256 (rather than HMAC-SHA1).</li> | |||
orithm provides better security. It is used to generate the token in the MP_JOIN | <li pn="section-appendix.e-2.6">Connection initiation also defines the p | |||
and ADD_ADDR messages, and to set the initial data sequence number.</t> | rocedure for version negotiation, for implementations that support both v0 <xref | |||
<t>A new subflow-level option exists to signal reasons for sending a RST | target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> | |||
on a subflow (MP_TCPRST <xref target="sec_reset"/>), which can help an implemen | and v1 (this document).</li> | |||
tation decide whether to attempt later re-connection.</t> | <li pn="section-appendix.e-2.7">The HMAC-SHA256 (rather than HMAC-SHA1) | |||
<t>The MP_PRIO option (<xref target="sec_policy"/>), which is used to si | algorithm is used, as it provides better security. It is used to generate the to | |||
gnal a change of priority for a subflow, no longer includes the AddrID field. It | ken in the MP_JOIN and ADD_ADDR messages and to set the IDSN.</li> | |||
s purpose was to allow the changed priority to be applied on a subflow other tha | <li pn="section-appendix.e-2.8">A new subflow-level option exists to sig | |||
n the one it was sent on. However, it has been realised that this could be used | nal reasons for sending a | |||
by a man-in-the-middle to divert all traffic on to its own path, and MP_PRIO doe | RST on a subflow (MP_TCPRST (<xref target="sec_reset" format="default" s | |||
s not include a token or other security mechanism.</t> | ectionFormat="of" derivedContent="Section 3.6"/>)); this can help an implementat | |||
<t>The ADD_ADDR option (<xref target="sec_add_address"/>), which is used | ion decide whether to attempt later reconnection.</li> | |||
to inform the other host about another potential address, is different in sever | <li pn="section-appendix.e-2.9">The MP_PRIO option (<xref target="sec_po | |||
al ways. It now includes an HMAC of the added address, for enhanced security. In | licy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>), | |||
addition, reliability for the ADD_ADDR option has been added: the IPVer field i | which is used to signal a change of priority for a subflow, no longer | |||
s replaced with a flag field, and one flag is assigned (E) which is used as an ' | includes the AddrID field. Its purpose was to allow the changed | |||
Echo' so a host can indicate that it has received the option.</t> | priority to be applied on a subflow other than the one it was sent | |||
<t>An additional way of performing a Fast Close is described, by sending | on. However, it was determined that this could be used by a | |||
a MP_FASTCLOSE option on a RST on all subflows. This allows the host to tear do | man-in-the-middle to divert all traffic onto its own path, and MP_PRIO | |||
wn the subflows and the connection immediately.</t> | does not include a token or other type of security mechanism.</li> | |||
<t>In the IANA registry a new MPTCP subtype option, MP_EXPERIMENTAL, is | <li pn="section-appendix.e-2.10">The ADD_ADDR option (<xref target="sec_ | |||
reserved for private experiments. However, the document doesn't define how to us | add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/ | |||
e the subtype option.</t> | >), which is used to inform the other host about another potential address, is d | |||
<t>A new Appendix discusses the usage of both the MPTCP and TCP Fast Ope | ifferent in several ways. It now includes an HMAC of the added address, for enha | |||
n on the same packet (<xref target="app_tfo"/>).</t> | nced security. In addition, reliability for the ADD_ADDR option has been added: | |||
</list></t> | the IPVer field is replaced with a flag field, and one flag is assigned ("E") th | |||
at is used as an "echo" so a host can indicate that it has received the option.< | ||||
/li> | ||||
<li pn="section-appendix.e-2.11">This document describes an additional w | ||||
ay of performing a Fast | ||||
Close -- by sending an MP_FASTCLOSE option on a RST on all subflows. Thi | ||||
s allows the host to tear down the subflows and the connection immediately.</li> | ||||
<li pn="section-appendix.e-2.12">IANA has reserved the MPTCP option subt | ||||
ype of value 0xf for | ||||
Private Use (<xref target="IANA_subtypes" format="default" sectionFormat | ||||
="of" derivedContent="Section 7.2"/>). This document doesn't define how to use t | ||||
hat value.</li> | ||||
<li pn="section-appendix.e-2.13">This document adds a new appendix (<xre | ||||
f target="app_tfo" format="default" sectionFormat="of" derivedContent="Appendix | ||||
B"/>), which discusses the usage of both MPTCP options | ||||
and TFO options on the same packet.</li> | ||||
</ul> | ||||
</section> | ||||
<section anchor="Acknowledgments" numbered="false" toc="include" removeInRFC | ||||
="false" pn="section-appendix.f"> | ||||
<name slugifiedName="name-acknowledgments">Acknowledgments</name> | ||||
<t pn="section-appendix.f-1">The authors gratefully acknowledge significan | ||||
t input into this | ||||
document from <contact fullname="Sebastien Barre"/> and <contact fullname= | ||||
"Andrew McDonald"/>.</t> | ||||
<t pn="section-appendix.f-2">The authors also wish to acknowledge reviews | ||||
and contributions from | ||||
<contact fullname="Iljitsch van Beijnum"/>, <contact fullname="Lars | ||||
Eggert"/>, <contact fullname="Marcelo Bagnulo"/>, <contact fullname="Robert Hanc | ||||
ock"/>, <contact fullname="Pasi Sarolahti"/>, | ||||
<contact fullname="Toby Moncaster"/>, <contact fullname="Philip Eard | ||||
ley"/>, <contact fullname="Sergio Lembo"/>, <contact fullname="Lawrence Conroy"/ | ||||
>, <contact fullname="Yoshifumi Nishida"/>, | ||||
<contact fullname="Bob Briscoe"/>, <contact fullname="Stein Gjessing"/>, | ||||
<contact fullname="Andrew McGregor"/>, <contact fullname="Georg Hamp | ||||
el"/>, <contact fullname="Anumita Biswas"/>, <contact fullname="Wes Eddy"/ | ||||
>, <contact fullname="Alexey Melnikov"/>, <contact fullname="Francis Dupont"/>, | ||||
<contact fullname="Adrian Farrel"/>, | ||||
<contact fullname="Barry Leiba"/>, <contact fullname="Robert Sparks"/>, | ||||
<contact fullname="Sean Turner"/>, <contact fullname="Stephen Farrel | ||||
l"/>, <contact fullname="Martin Stiemerling"/>, <contact fullname="Gregory Detal | ||||
"/>, <contact fullname="Fabien Duchene"/>, | ||||
<contact fullname="Xavier de Foy"/>, <contact fullname="Rahul Jadhav"/>, | ||||
<contact fullname="Klemens Schragel"/>, <contact fullname="Mirja Küh | ||||
lewind"/>, <contact fullname="Sheng Jiang"/>, <contact fullname="Alissa Cooper"/ | ||||
>, <contact fullname="Ines Robles"/>, <contact fullname="Roman Danyliw"/>, <cont | ||||
act fullname="Adam Roach"/>, | ||||
<contact fullname="Eric Vyncke"/>, and <contact fullname="Ben Kaduk"/>.</t | ||||
> | ||||
</section> | ||||
<section anchor="authors-addresses" numbered="false" removeInRFC="false" toc | ||||
="include" pn="section-appendix.g"> | ||||
<name slugifiedName="name-authors-addresses">Authors' Addresses</name> | ||||
<author fullname="Alan Ford" initials="A." surname="Ford"> | ||||
<organization showOnFrontPage="true">Pexip</organization> | ||||
<address> | ||||
<email>alan.ford@gmail.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Costin Raiciu" initials="C." surname="Raiciu"> | ||||
<organization abbrev="U. Politehnica of Bucharest" showOnFrontPage="true | ||||
">University Politehnica of Bucharest</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Splaiul Independentei 313</street> | ||||
<city>Bucharest</city> | ||||
<country>Romania</country> | ||||
</postal> | ||||
<email>costin.raiciu@cs.pub.ro</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Mark Handley" initials="M." surname="Handley"> | ||||
<organization abbrev="U. College London" showOnFrontPage="true">Universi | ||||
ty College London</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Gower Street</street> | ||||
<city>London</city> | ||||
<code>WC1E 6BT</code> | ||||
<country>United Kingdom</country> | ||||
</postal> | ||||
<email>m.handley@cs.ucl.ac.uk</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure" | ||||
> | ||||
<organization abbrev="U. catholique de Louvain" ascii="Universite cathol | ||||
ique de Louvain" showOnFrontPage="true">Université catholique de Louvain</orga | ||||
nization> | ||||
<address> | ||||
<postal> | ||||
<street>Pl. Ste Barbe, 2</street> | ||||
<code>1348</code> | ||||
<city>Louvain-la-Neuve</city> | ||||
<country>Belgium</country> | ||||
</postal> | ||||
<email>olivier.bonaventure@uclouvain.be</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Christoph Paasch" initials="C." surname="Paasch"> | ||||
<organization abbrev="Apple, Inc." showOnFrontPage="true">Apple, Inc.</o | ||||
rganization> | ||||
<address> | ||||
<postal> | ||||
<street/> | ||||
<city>Cupertino</city> | ||||
<region>CA</region> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>cpaasch@apple.com</email> | ||||
</address> | ||||
</author> | ||||
</section> | </section> | |||
</back> | </back> | |||
</rfc> | </rfc> | |||
End of changes. 319 change blocks. | ||||
3231 lines changed or deleted | 5040 lines changed or added | |||
This html diff was produced by rfcdiff 1.45. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ |