rfc8684xml2.original.xml   rfc8684.xml 
<?xml version="1.0" encoding="US-ASCII"?> <?xml version='1.0' encoding='utf-8'?>
<!-- Convert to HTML and Text with xml2rfc: http://xml2rfc.ietf.org. --> <rfc xmlns:xi="http://www.w3.org/2001/XInclude" version="3" category="std" conse
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ nsus="true" docName="draft-ietf-mptcp-rfc6824bis-18" indexInclude="true" ipr="tr
<!ENTITY RFC5533 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. ust200902" number="8684" obsoletes="6824" prepTime="2020-03-30T17:51:35" scripts
RFC.5533.xml"> ="Common,Latin" sortRefs="true" submissionType="IETF" symRefs="true" tocDepth="3
<!ENTITY RFC5062 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. " tocInclude="true" xml:lang="en">
RFC.5062.xml"> <link href="https://datatracker.ietf.org/doc/draft-ietf-mptcp-rfc6824bis-18" r
<!ENTITY RFC5061 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. el="prev"/>
RFC.5061.xml"> <link href="https://dx.doi.org/10.17487/rfc8684" rel="alternate"/>
<!ENTITY RFC4960 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference. <link href="urn:issn:2070-1721" rel="alternate"/>
RFC.4960.xml">
<!ENTITY RFC4987 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.4987.xml">
<!ENTITY RFC6234 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6234.xml">
<!ENTITY RFC4086 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.4086.xml">
<!ENTITY RFC5681 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.5681.xml">
<!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.2119.xml">
<!ENTITY RFC2992 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.2992.xml">
<!ENTITY RFC2979 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.2979.xml">
<!ENTITY RFC2104 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.2104.xml">
<!ENTITY RFC2018 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.2018.xml">
<!ENTITY RFC1918 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.1918.xml">
<!ENTITY RFC0793 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.0793.xml">
<!ENTITY RFC7323 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.7323.xml">
<!ENTITY RFC1122 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.1122.xml">
<!ENTITY RFC3135 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.3135.xml">
<!ENTITY RFC3022 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.3022.xml">
<!ENTITY RFC6181 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6181.xml">
<!ENTITY RFC6182 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6182.xml">
<!ENTITY RFC6356 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6356.xml">
<!ENTITY RFC6555 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6555.xml">
<!ENTITY RFC8126 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.8126.xml">
<!ENTITY RFC6897 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6897.xml">
<!ENTITY RFC6528 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.6528.xml">
<!ENTITY RFC5961 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.5961.xml">
<!ENTITY RFC7413 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.7413.xml">
<!ENTITY RFC7430 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.7430.xml">
<!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.8174.xml">
<!ENTITY RFC8041 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.
RFC.8041.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc strict="no" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc rfcedstyle="yes"?>
<rfc category="std" docName="draft-ietf-mptcp-rfc6824bis-18" ipr="trust200902" o
bsoletes="6824">
<front> <front>
<title abbrev="Multipath TCP">TCP Extensions for Multipath Operation with Mu ltiple Addresses</title> <title abbrev="Multipath TCP">TCP Extensions for Multipath Operation with Mu ltiple Addresses</title>
<seriesInfo name="RFC" value="8684" stream="IETF"/>
<author fullname="Alan Ford" initials="A." surname="Ford"> <author fullname="Alan Ford" initials="A." surname="Ford">
<organization>Pexip</organization> <organization showOnFrontPage="true">Pexip</organization>
<address> <address>
<!-- <postal>
<street>Beech Court</street>
<city>Hurst</city>
<region>Berkshire</region>
<code>RG10 0RQ</code>
<country>UK</country>
</postal> -->
<email>alan.ford@gmail.com</email> <email>alan.ford@gmail.com</email>
</address> </address>
</author> </author>
<author fullname="Costin Raiciu" initials="C." surname="Raiciu"> <author fullname="Costin Raiciu" initials="C." surname="Raiciu">
<organization abbrev="U. Politechnica of Bucharest">University Politehnica of Bucharest</organization> <organization abbrev="U. Politehnica of Bucharest" showOnFrontPage="true"> University Politehnica of Bucharest</organization>
<address> <address>
<postal> <postal>
<street>Splaiul Independentei 313</street> <street>Splaiul Independentei 313</street>
<city>Bucharest</city> <city>Bucharest</city>
<country>Romania</country> <country>Romania</country>
</postal> </postal>
<email>costin.raiciu@cs.pub.ro</email> <email>costin.raiciu@cs.pub.ro</email>
</address> </address>
</author> </author>
<author fullname="Mark Handley" initials="M." surname="Handley"> <author fullname="Mark Handley" initials="M." surname="Handley">
<organization abbrev="U. College London">University College London</organi zation> <organization abbrev="U. College London" showOnFrontPage="true">University College London</organization>
<address> <address>
<postal> <postal>
<street>Gower Street</street> <street>Gower Street</street>
<city>London</city> <city>London</city>
<code>WC1E 6BT</code> <code>WC1E 6BT</code>
<country>UK</country> <country>United Kingdom</country>
</postal> </postal>
<email>m.handley@cs.ucl.ac.uk</email> <email>m.handley@cs.ucl.ac.uk</email>
</address> </address>
</author> </author>
<author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure"> <author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure">
<organization abbrev="U. catholique de Louvain">Universit&eacute; catholiq ue de Louvain</organization> <organization abbrev="U. catholique de Louvain" ascii="Universite catholiq ue de Louvain" showOnFrontPage="true">Université catholique de Louvain</organi zation>
<address> <address>
<postal> <postal>
<street>Pl. Ste Barbe, 2</street> <street>Pl. Ste Barbe, 2</street>
<code>1348</code> <code>1348</code>
<city>Louvain-la-Neuve</city> <city>Louvain-la-Neuve</city>
<country>Belgium</country> <country>Belgium</country>
</postal> </postal>
<email>olivier.bonaventure@uclouvain.be</email> <email>olivier.bonaventure@uclouvain.be</email>
</address> </address>
</author> </author>
skipping to change at line 101 skipping to change at line 50
<address> <address>
<postal> <postal>
<street>Pl. Ste Barbe, 2</street> <street>Pl. Ste Barbe, 2</street>
<code>1348</code> <code>1348</code>
<city>Louvain-la-Neuve</city> <city>Louvain-la-Neuve</city>
<country>Belgium</country> <country>Belgium</country>
</postal> </postal>
<email>olivier.bonaventure@uclouvain.be</email> <email>olivier.bonaventure@uclouvain.be</email>
</address> </address>
</author> </author>
<author fullname="Christoph Paasch" initials="C." surname="Paasch"> <author fullname="Christoph Paasch" initials="C." surname="Paasch">
<organization abbrev="Apple, Inc.">Apple, Inc.</organization> <organization abbrev="Apple, Inc." showOnFrontPage="true">Apple, Inc.</org anization>
<address> <address>
<postal> <postal>
<street></street> <street/>
<city>Cupertino</city> <city>Cupertino</city>
<country>US</country> <region>CA</region>
<country>United States of America</country>
</postal> </postal>
<email>cpaasch@apple.com</email> <email>cpaasch@apple.com</email>
</address> </address>
</author> </author>
<date month="03" year="2020"/>
<date year="2019" /> <keyword>tcp</keyword>
<keyword>extensions</keyword>
<area>General</area> <keyword>multipath</keyword>
<workgroup>Internet Engineering Task Force</workgroup> <keyword>multihomed</keyword>
<keyword>tcp extensions multipath multihomed subflow</keyword> <keyword>subflow</keyword>
<abstract pn="section-abstract">
<abstract> <t pn="section-abstract-1">TCP/IP communication is currently restricted to
<t>TCP/IP communication is currently restricted to a single path per conne a single path per connection, yet multiple paths often exist between peers. The
ction, yet multiple paths often exist between peers. The simultaneous use of the simultaneous use of these multiple paths for a TCP/IP session would improve res
se multiple paths for a TCP/IP session would improve resource usage within the n ource usage within the network and thus improve user experience through higher t
etwork and, thus, improve user experience through higher throughput and improved hroughput and improved resilience to network failure.</t>
resilience to network failure.</t> <t pn="section-abstract-2">Multipath TCP provides the ability to simultane
ously use multiple
<t>Multipath TCP provides the ability to simultaneously use multiple paths paths between peers. This document presents a set of extensions to
between peers. This document presents a set of extensions to traditional TCP to traditional TCP to support multipath operation. The protocol offers the
support multipath operation. The protocol offers the same type of service to ap same type of service to applications as TCP (i.e., a reliable bytestream),
plications as TCP (i.e., reliable bytestream), and it provides the components ne and it provides the components necessary to establish and use multiple TCP flow
cessary to establish and use multiple TCP flows across potentially disjoint path s across potentially disjoint paths.</t>
s.</t> <t pn="section-abstract-3">This document specifies v1 of Multipath TCP, ob
soleting v0 as
<t>This document specifies v1 of Multipath TCP, obsoleting v0 as specified specified in RFC 6824, through clarifications and modifications primarily
in RFC6824, through clarifications and modifications primarily driven by deploy driven by deployment experience.</t>
ment experience.</t>
</abstract> </abstract>
<boilerplate>
<section anchor="status-of-memo" numbered="false" removeInRFC="false" toc=
"exclude" pn="section-boilerplate.1">
<name slugifiedName="name-status-of-this-memo">Status of This Memo</name
>
<t pn="section-boilerplate.1-1">
This is an Internet Standards Track document.
</t>
<t pn="section-boilerplate.1-2">
This document is a product of the Internet Engineering Task Force
(IETF). It represents the consensus of the IETF community. It has
received public review and has been approved for publication by
the Internet Engineering Steering Group (IESG). Further
information on Internet Standards is available in Section 2 of
RFC 7841.
</t>
<t pn="section-boilerplate.1-3">
Information about the current status of this document, any
errata, and how to provide feedback on it may be obtained at
<eref target="https://www.rfc-editor.org/info/rfc8684" brackets="non
e"/>.
</t>
</section>
<section anchor="copyright" numbered="false" removeInRFC="false" toc="excl
ude" pn="section-boilerplate.2">
<name slugifiedName="name-copyright-notice">Copyright Notice</name>
<t pn="section-boilerplate.2-1">
Copyright (c) 2020 IETF Trust and the persons identified as the
document authors. All rights reserved.
</t>
<t pn="section-boilerplate.2-2">
This document is subject to BCP 78 and the IETF Trust's Legal
Provisions Relating to IETF Documents
(<eref target="https://trustee.ietf.org/license-info" brackets="none
"/>) in effect on the date of
publication of this document. Please review these documents
carefully, as they describe your rights and restrictions with
respect to this document. Code Components extracted from this
document must include Simplified BSD License text as described in
Section 4.e of the Trust Legal Provisions and are provided without
warranty as described in the Simplified BSD License.
</t>
</section>
</boilerplate>
<toc>
<section anchor="toc" numbered="false" removeInRFC="false" toc="exclude" p
n="section-toc.1">
<name slugifiedName="name-table-of-contents">Table of Contents</name>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="section-to
c.1-1">
<li pn="section-toc.1-1.1">
<t keepWithNext="true" pn="section-toc.1-1.1.1"><xref derivedContent
="1" format="counter" sectionFormat="of" target="section-1"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-introduction">Introductio
n</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.1.2">
<li pn="section-toc.1-1.1.2.1">
<t keepWithNext="true" pn="section-toc.1-1.1.2.1.1"><xref derive
dContent="1.1" format="counter" sectionFormat="of" target="section-1.1"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-design-assump
tions">Design Assumptions</xref></t>
</li>
<li pn="section-toc.1-1.1.2.2">
<t keepWithNext="true" pn="section-toc.1-1.1.2.2.1"><xref derive
dContent="1.2" format="counter" sectionFormat="of" target="section-1.2"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-multipath-tcp
-in-the-networ">Multipath TCP in the Networking Stack</xref></t>
</li>
<li pn="section-toc.1-1.1.2.3">
<t keepWithNext="true" pn="section-toc.1-1.1.2.3.1"><xref derive
dContent="1.3" format="counter" sectionFormat="of" target="section-1.3"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-terminology">
Terminology</xref></t>
</li>
<li pn="section-toc.1-1.1.2.4">
<t keepWithNext="true" pn="section-toc.1-1.1.2.4.1"><xref derive
dContent="1.4" format="counter" sectionFormat="of" target="section-1.4"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-concept
">MPTCP Concept</xref></t>
</li>
<li pn="section-toc.1-1.1.2.5">
<t keepWithNext="true" pn="section-toc.1-1.1.2.5.1"><xref derive
dContent="1.5" format="counter" sectionFormat="of" target="section-1.5"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-requirements-
language">Requirements Language</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.2">
<t keepWithNext="true" pn="section-toc.1-1.2.1"><xref derivedContent
="2" format="counter" sectionFormat="of" target="section-2"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-operation-overview">Opera
tion Overview</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.2.2">
<li pn="section-toc.1-1.2.2.1">
<t keepWithNext="true" pn="section-toc.1-1.2.2.1.1"><xref derive
dContent="2.1" format="counter" sectionFormat="of" target="section-2.1"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-initiating-an
-mptcp-connect">Initiating an MPTCP Connection</xref></t>
</li>
<li pn="section-toc.1-1.2.2.2">
<t keepWithNext="true" pn="section-toc.1-1.2.2.2.1"><xref derive
dContent="2.2" format="counter" sectionFormat="of" target="section-2.2"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-associating-a
-new-subflow-w">Associating a New Subflow with an Existing MPTCP Connection</xre
f></t>
</li>
<li pn="section-toc.1-1.2.2.3">
<t keepWithNext="true" pn="section-toc.1-1.2.2.3.1"><xref derive
dContent="2.3" format="counter" sectionFormat="of" target="section-2.3"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-informing-the
-other-host-ab">Informing the Other Host about Another Potential Address</xref><
/t>
</li>
<li pn="section-toc.1-1.2.2.4">
<t keepWithNext="true" pn="section-toc.1-1.2.2.4.1"><xref derive
dContent="2.4" format="counter" sectionFormat="of" target="section-2.4"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-data-transfer
-using-mptcp">Data Transfer Using MPTCP</xref></t>
</li>
<li pn="section-toc.1-1.2.2.5">
<t keepWithNext="true" pn="section-toc.1-1.2.2.5.1"><xref derive
dContent="2.5" format="counter" sectionFormat="of" target="section-2.5"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-requesting-a-
change-in-a-pa">Requesting a Change in a Path's Priority</xref></t>
</li>
<li pn="section-toc.1-1.2.2.6">
<t keepWithNext="true" pn="section-toc.1-1.2.2.6.1"><xref derive
dContent="2.6" format="counter" sectionFormat="of" target="section-2.6"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-closing-an-mp
tcp-connection">Closing an MPTCP Connection</xref></t>
</li>
<li pn="section-toc.1-1.2.2.7">
<t keepWithNext="true" pn="section-toc.1-1.2.2.7.1"><xref derive
dContent="2.7" format="counter" sectionFormat="of" target="section-2.7"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-notable-featu
res">Notable Features</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.3">
<t keepWithNext="true" pn="section-toc.1-1.3.1"><xref derivedContent
="3" format="counter" sectionFormat="of" target="section-3"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-mptcp-operations-an-overv
ie">MPTCP Operations: An Overview</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.3.2">
<li pn="section-toc.1-1.3.2.1">
<t keepWithNext="true" pn="section-toc.1-1.3.2.1.1"><xref derive
dContent="3.1" format="counter" sectionFormat="of" target="section-3.1"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-connection-in
itiation">Connection Initiation</xref></t>
</li>
<li pn="section-toc.1-1.3.2.2">
<t keepWithNext="true" pn="section-toc.1-1.3.2.2.1"><xref derive
dContent="3.2" format="counter" sectionFormat="of" target="section-3.2"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-starting-a-ne
w-subflow">Starting a New Subflow</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.1"><xref derive
dContent="3.3" format="counter" sectionFormat="of" target="section-3.3"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-operati
on-and-data-tr">MPTCP Operation and Data Transfer</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se
ction-toc.1-1.3.2.3.2">
<li pn="section-toc.1-1.3.2.3.2.1">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.1.1"><xre
f derivedContent="3.3.1" format="counter" sectionFormat="of" target="section-3.3
.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-d
ata-sequence-mapping">Data Sequence Mapping</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.2">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.2.1"><xre
f derivedContent="3.3.2" format="counter" sectionFormat="of" target="section-3.3
.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-d
ata-acknowledgments">Data Acknowledgments</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.3">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.3.1"><xre
f derivedContent="3.3.3" format="counter" sectionFormat="of" target="section-3.3
.3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-c
losing-a-connection">Closing a Connection</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.4">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.4.1"><xre
f derivedContent="3.3.4" format="counter" sectionFormat="of" target="section-3.3
.4"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-r
eceiver-considerations">Receiver Considerations</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.5">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.5.1"><xre
f derivedContent="3.3.5" format="counter" sectionFormat="of" target="section-3.3
.5"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-s
ender-considerations">Sender Considerations</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.6">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.6.1"><xre
f derivedContent="3.3.6" format="counter" sectionFormat="of" target="section-3.3
.6"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-r
eliability-and-retransmiss">Reliability and Retransmissions</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.7">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.7.1"><xre
f derivedContent="3.3.7" format="counter" sectionFormat="of" target="section-3.3
.7"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-c
ongestion-control-consider">Congestion Control Considerations</xref></t>
</li>
<li pn="section-toc.1-1.3.2.3.2.8">
<t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.8.1"><xre
f derivedContent="3.3.8" format="counter" sectionFormat="of" target="section-3.3
.8"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-s
ubflow-policy">Subflow Policy</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.3.2.4">
<t keepWithNext="true" pn="section-toc.1-1.3.2.4.1"><xref derive
dContent="3.4" format="counter" sectionFormat="of" target="section-3.4"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-address-knowl
edge-exchange-">Address Knowledge Exchange (Path Management)</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se
ction-toc.1-1.3.2.4.2">
<li pn="section-toc.1-1.3.2.4.2.1">
<t keepWithNext="true" pn="section-toc.1-1.3.2.4.2.1.1"><xre
f derivedContent="3.4.1" format="counter" sectionFormat="of" target="section-3.4
.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-a
ddress-advertisement">Address Advertisement</xref></t>
</li>
<li pn="section-toc.1-1.3.2.4.2.2">
<t keepWithNext="true" pn="section-toc.1-1.3.2.4.2.2.1"><xre
f derivedContent="3.4.2" format="counter" sectionFormat="of" target="section-3.4
.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-r
emove-address">Remove Address</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.3.2.5">
<t keepWithNext="true" pn="section-toc.1-1.3.2.5.1"><xref derive
dContent="3.5" format="counter" sectionFormat="of" target="section-3.5"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-fast-close">F
ast Close</xref></t>
</li>
<li pn="section-toc.1-1.3.2.6">
<t keepWithNext="true" pn="section-toc.1-1.3.2.6.1"><xref derive
dContent="3.6" format="counter" sectionFormat="of" target="section-3.6"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-subflow-reset
">Subflow Reset</xref></t>
</li>
<li pn="section-toc.1-1.3.2.7">
<t keepWithNext="true" pn="section-toc.1-1.3.2.7.1"><xref derive
dContent="3.7" format="counter" sectionFormat="of" target="section-3.7"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-fallback">Fal
lback</xref></t>
</li>
<li pn="section-toc.1-1.3.2.8">
<t keepWithNext="true" pn="section-toc.1-1.3.2.8.1"><xref derive
dContent="3.8" format="counter" sectionFormat="of" target="section-3.8"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-error-handlin
g">Error Handling</xref></t>
</li>
<li pn="section-toc.1-1.3.2.9">
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.1"><xref derive
dContent="3.9" format="counter" sectionFormat="of" target="section-3.9"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-heuristics">H
euristics</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se
ction-toc.1-1.3.2.9.2">
<li pn="section-toc.1-1.3.2.9.2.1">
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.1.1"><xre
f derivedContent="3.9.1" format="counter" sectionFormat="of" target="section-3.9
.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-p
ort-usage">Port Usage</xref></t>
</li>
<li pn="section-toc.1-1.3.2.9.2.2">
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.2.1"><xre
f derivedContent="3.9.2" format="counter" sectionFormat="of" target="section-3.9
.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-d
elayed-subflow-start-and-s">Delayed Subflow Start and Subflow Symmetry</xref></t
>
</li>
<li pn="section-toc.1-1.3.2.9.2.3">
<t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.3.1"><xre
f derivedContent="3.9.3" format="counter" sectionFormat="of" target="section-3.9
.3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-f
ailure-handling">Failure Handling</xref></t>
</li>
</ul>
</li>
</ul>
</li>
<li pn="section-toc.1-1.4">
<t keepWithNext="true" pn="section-toc.1-1.4.1"><xref derivedContent
="4" format="counter" sectionFormat="of" target="section-4"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-semantic-issues">Semantic
Issues</xref></t>
</li>
<li pn="section-toc.1-1.5">
<t keepWithNext="true" pn="section-toc.1-1.5.1"><xref derivedContent
="5" format="counter" sectionFormat="of" target="section-5"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-security-considerations">
Security Considerations</xref></t>
</li>
<li pn="section-toc.1-1.6">
<t keepWithNext="true" pn="section-toc.1-1.6.1"><xref derivedContent
="6" format="counter" sectionFormat="of" target="section-6"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-interactions-with-middleb
ox">Interactions with Middleboxes</xref></t>
</li>
<li pn="section-toc.1-1.7">
<t keepWithNext="true" pn="section-toc.1-1.7.1"><xref derivedContent
="7" format="counter" sectionFormat="of" target="section-7"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-iana-considerations">IANA
Considerations</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.7.2">
<li pn="section-toc.1-1.7.2.1">
<t keepWithNext="true" pn="section-toc.1-1.7.2.1.1"><xref derive
dContent="7.1" format="counter" sectionFormat="of" target="section-7.1"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-tcp-option-ki
nd-numbers">TCP Option Kind Numbers</xref></t>
</li>
<li pn="section-toc.1-1.7.2.2">
<t keepWithNext="true" pn="section-toc.1-1.7.2.2.1"><xref derive
dContent="7.2" format="counter" sectionFormat="of" target="section-7.2"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-option-
subtypes">MPTCP Option Subtypes</xref></t>
</li>
<li pn="section-toc.1-1.7.2.3">
<t keepWithNext="true" pn="section-toc.1-1.7.2.3.1"><xref derive
dContent="7.3" format="counter" sectionFormat="of" target="section-7.3"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-mptcp-handsha
ke-algorithms">MPTCP Handshake Algorithms</xref></t>
</li>
<li pn="section-toc.1-1.7.2.4">
<t keepWithNext="true" pn="section-toc.1-1.7.2.4.1"><xref derive
dContent="7.4" format="counter" sectionFormat="of" target="section-7.4"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-mp_tcprst-rea
son-codes">MP_TCPRST Reason Codes</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.8">
<t keepWithNext="true" pn="section-toc.1-1.8.1"><xref derivedContent
="8" format="counter" sectionFormat="of" target="section-8"/>.  <xref derivedCon
tent="" format="title" sectionFormat="of" target="name-references">References</x
ref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.8.2">
<li pn="section-toc.1-1.8.2.1">
<t keepWithNext="true" pn="section-toc.1-1.8.2.1.1"><xref derive
dContent="8.1" format="counter" sectionFormat="of" target="section-8.1"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-normative-ref
erences">Normative References</xref></t>
</li>
<li pn="section-toc.1-1.8.2.2">
<t keepWithNext="true" pn="section-toc.1-1.8.2.2.1"><xref derive
dContent="8.2" format="counter" sectionFormat="of" target="section-8.2"/>.  <xre
f derivedContent="" format="title" sectionFormat="of" target="name-informative-r
eferences">Informative References</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.9">
<t keepWithNext="true" pn="section-toc.1-1.9.1"><xref derivedContent
="Appendix A" format="default" sectionFormat="of" target="section-appendix.a"/>.
  <xref derivedContent="" format="title" sectionFormat="of" target="name-notes-o
n-use-of-tcp-options">Notes on Use of TCP Options</xref></t>
</li>
<li pn="section-toc.1-1.10">
<t keepWithNext="true" pn="section-toc.1-1.10.1"><xref derivedConten
t="Appendix B" format="default" sectionFormat="of" target="section-appendix.b"/>
.  <xref derivedContent="" format="title" sectionFormat="of" target="name-tcp-fa
st-open-and-mptcp">TCP Fast Open and MPTCP</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.10.2">
<li pn="section-toc.1-1.10.2.1">
<t keepWithNext="true" pn="section-toc.1-1.10.2.1.1"><xref deriv
edContent="B.1" format="counter" sectionFormat="of" target="section-b.1"/>.  <xr
ef derivedContent="" format="title" sectionFormat="of" target="name-tfo-cookie-r
equest-with-mpt">TFO Cookie Request with MPTCP</xref></t>
</li>
<li pn="section-toc.1-1.10.2.2">
<t keepWithNext="true" pn="section-toc.1-1.10.2.2.1"><xref deriv
edContent="B.2" format="counter" sectionFormat="of" target="section-b.2"/>.  <xr
ef derivedContent="" format="title" sectionFormat="of" target="name-data-sequenc
e-mapping-under">Data Sequence Mapping under TFO</xref></t>
</li>
<li pn="section-toc.1-1.10.2.3">
<t keepWithNext="true" pn="section-toc.1-1.10.2.3.1"><xref deriv
edContent="B.3" format="counter" sectionFormat="of" target="section-b.3"/>.  <xr
ef derivedContent="" format="title" sectionFormat="of" target="name-connection-e
stablishment-ex">Connection Establishment Examples</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.11">
<t keepWithNext="true" pn="section-toc.1-1.11.1"><xref derivedConten
t="Appendix C" format="default" sectionFormat="of" target="section-appendix.c"/>
.  <xref derivedContent="" format="title" sectionFormat="of" target="name-contro
l-blocks">Control Blocks</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio
n-toc.1-1.11.2">
<li pn="section-toc.1-1.11.2.1">
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.1"><xref deriv
edContent="C.1" format="counter" sectionFormat="of" target="section-c.1"/>.  <xr
ef derivedContent="" format="title" sectionFormat="of" target="name-mptcp-contro
l-block">MPTCP Control Block</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se
ction-toc.1-1.11.2.1.2">
<li pn="section-toc.1-1.11.2.1.2.1">
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.1.1"><xr
ef derivedContent="C.1.1" format="counter" sectionFormat="of" target="section-c.
1.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-
authentication-and-metadata">Authentication and Metadata</xref></t>
</li>
<li pn="section-toc.1-1.11.2.1.2.2">
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.2.1"><xr
ef derivedContent="C.1.2" format="counter" sectionFormat="of" target="section-c.
1.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-
sending-side">Sending Side</xref></t>
</li>
<li pn="section-toc.1-1.11.2.1.2.3">
<t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.3.1"><xr
ef derivedContent="C.1.3" format="counter" sectionFormat="of" target="section-c.
1.3"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-
receiving-side">Receiving Side</xref></t>
</li>
</ul>
</li>
<li pn="section-toc.1-1.11.2.2">
<t keepWithNext="true" pn="section-toc.1-1.11.2.2.1"><xref deriv
edContent="C.2" format="counter" sectionFormat="of" target="section-c.2"/>.  <xr
ef derivedContent="" format="title" sectionFormat="of" target="name-tcp-control-
blocks">TCP Control Blocks</xref></t>
<ul bare="true" empty="true" indent="2" spacing="compact" pn="se
ction-toc.1-1.11.2.2.2">
<li pn="section-toc.1-1.11.2.2.2.1">
<t keepWithNext="true" pn="section-toc.1-1.11.2.2.2.1.1"><xr
ef derivedContent="C.2.1" format="counter" sectionFormat="of" target="section-c.
2.1"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-
sending-side-2">Sending Side</xref></t>
</li>
<li pn="section-toc.1-1.11.2.2.2.2">
<t keepWithNext="true" pn="section-toc.1-1.11.2.2.2.2.1"><xr
ef derivedContent="C.2.2" format="counter" sectionFormat="of" target="section-c.
2.2"/>.  <xref derivedContent="" format="title" sectionFormat="of" target="name-
receiving-side-2">Receiving Side</xref></t>
</li>
</ul>
</li>
</ul>
</li>
<li pn="section-toc.1-1.12">
<t keepWithNext="true" pn="section-toc.1-1.12.1"><xref derivedConten
t="Appendix D" format="default" sectionFormat="of" target="section-appendix.d"/>
.  <xref derivedContent="" format="title" sectionFormat="of" target="name-finite
-state-machine">Finite State Machine</xref></t>
</li>
<li pn="section-toc.1-1.13">
<t keepWithNext="true" pn="section-toc.1-1.13.1"><xref derivedConten
t="Appendix E" format="default" sectionFormat="of" target="section-appendix.e"/>
.  <xref derivedContent="" format="title" sectionFormat="of" target="name-change
s-from-rfc-6824">Changes from RFC 6824</xref></t>
</li>
<li pn="section-toc.1-1.14">
<t keepWithNext="true" pn="section-toc.1-1.14.1"><xref derivedConten
t="" format="none" sectionFormat="of" target="section-appendix.f"/><xref derived
Content="" format="title" sectionFormat="of" target="name-acknowledgments">Ackno
wledgments</xref></t>
</li>
<li pn="section-toc.1-1.15">
<t keepWithNext="true" pn="section-toc.1-1.15.1"><xref derivedConten
t="" format="none" sectionFormat="of" target="section-appendix.g"/><xref derived
Content="" format="title" sectionFormat="of" target="name-authors-addresses">Aut
hors' Addresses</xref></t>
</li>
</ul>
</section>
</toc>
</front> </front>
<middle> <middle>
<section title="Introduction" anchor="sec_intro"> <section anchor="sec_intro" numbered="true" toc="include" removeInRFC="false
<t>Multipath TCP (MPTCP) is a set of extensions to regular TCP <xref targe " pn="section-1">
t="RFC0793"/> to provide a Multipath TCP <xref target="RFC6182"/> service, which <name slugifiedName="name-introduction">Introduction</name>
enables a transport connection to operate across multiple paths <t pn="section-1-1">Multipath TCP (MPTCP) is a set of extensions to regula
simultaneously. This document presents the protocol changes required to add mult r TCP <xref target="RFC0793" format="default" sectionFormat="of" derivedContent=
ipath capability to TCP; specifically, those for signaling and setting up multip "RFC0793"/> to provide a Multipath TCP service <xref target="RFC6182" format="de
le paths ("subflows"), managing these subflows, reassembly of data, and terminat fault" sectionFormat="of" derivedContent="RFC6182"/>, which enables a transport
ion of sessions. connection to operate across multiple paths
This is not the only information required to create a Multipath TCP implem simultaneously. This document presents the protocol changes required to add
entation, however. This document is complemented by three others: multipath capability to TCP -- specifically, those for signaling and setting
<list style="symbols"> up multiple paths ("subflows"), managing these subflows, reassembly of data,
<t>Architecture <xref target="RFC6182"/>, which explains the motivatio and termination of sessions. This is not the only information required to create
ns behind Multipath TCP, contains a discussion of high-level design decisions on a Multipath TCP implementation, however. This document is complemented by three
which this design is based, and an explanation of a functional separation throu others:
gh which an extensible MPTCP implementation can be developed.</t>
<t>Congestion control <xref target="RFC6356"/> presents a safe congest
ion control algorithm for coupling the behavior of the multiple paths in order t
o "do no harm" to other network users.</t>
<t>Application considerations <xref target="RFC6897"/> discusses what
impact MPTCP will have on applications, what applications will want to do with M
PTCP, and as a consequence of these factors, what API extensions an MPTCP implem
entation should present.</t>
</list>
This document is an update to, and obsoletes, the v0 specification of Mult
ipath TCP (RFC6824). This document specifies MPTCP v1, which is not backward com
patible with MPTCP v0. This document additionally defines version negotiation pr
ocedures for implementations that support both versions.
</t> </t>
<ul spacing="normal" bare="false" empty="false" pn="section-1-2">
<section title="Design Assumptions" anchor="sec_assum"> <li pn="section-1-2.1">
<t>In order to limit the potentially huge design space, the mptcp workin <xref target="RFC6182" format="default" sectionFormat="of" derivedCont
g group imposed two key constraints on the Multipath TCP design presented in thi ent="RFC6182"/> (MPTCP architecture), which
s document: explains the motivations behind Multipath TCP, contains a discussion
<list style="symbols"> of high-level design decisions on which this design is based, and provid
<t>It must be backwards-compatible with current, regular TCP, to inc es an explanation of a functional separation through which an extensible MPTCP i
rease its chances of deployment.</t> mplementation can be developed.</li>
<t>It can be assumed that one or both hosts are multihomed and multi <li pn="section-1-2.2">
addressed.</t> <xref target="RFC6356" format="default" sectionFormat="of" derivedCont
</list> ent="RFC6356"/> (congestion control), which presents a safe congestion control a
lgorithm for coupling the behavior of the multiple paths in order to "do no harm
" to other network users.</li>
<li pn="section-1-2.3">
<xref target="RFC6897" format="default" sectionFormat="of" derivedCont
ent="RFC6897"/> (application considerations), which discusses what impact MPTCP
will have on applications, what applications will want to do with MPTCP, and as
a consequence of these factors, what API extensions an MPTCP implementation shou
ld present.</li>
</ul>
<t pn="section-1-3">
This document obsoletes the v0 specification of
Multipath TCP <xref target="RFC6824" format="default" sectionFormat="of" d
erivedContent="RFC6824"/>. This document specifies MPTCP v1, which is not backwa
rd compatible with MPTCP v0. This document additionally defines version negotiat
ion procedures for implementations that support both versions.
</t>
<section anchor="sec_assum" numbered="true" toc="include" removeInRFC="fal
se" pn="section-1.1">
<name slugifiedName="name-design-assumptions">Design Assumptions</name>
<t pn="section-1.1-1">In order to limit the potentially huge design spac
e, the
MPTCP Working Group imposed two key constraints on the Multipath TCP des
ign presented in this document:
</t> </t>
<t>To simplify the design, we assume that the presence of multiple addre <ul spacing="normal" bare="false" empty="false" pn="section-1.1-2">
sses at a host is sufficient to indicate the existence of multiple paths. These <li pn="section-1.1-2.1">It must be backward compatible with current,
paths need not be entirely disjoint: they may share one or many routers between regular TCP, to increase its chances of deployment.</li>
them. Even in such a situation, making use of multiple paths is beneficial, impr <li pn="section-1.1-2.2">It can be assumed that one or both hosts are
oving resource utilization and resilience to a subset of node failures. The cong multihomed and multiaddressed.</li>
estion control algorithms defined in <xref target="RFC6356"/> ensure this does n </ul>
ot act detrimentally. Furthermore, there may be some scenarios where different T <t pn="section-1.1-3">To simplify the design, we assume that the presenc
CP ports on a single host can provide disjoint paths (such as through certain Eq e of multiple
ual-Cost Multipath (ECMP) implementations <xref target="RFC2992"/>), and so the addresses at a host is sufficient to indicate the existence of
MPTCP design also supports the use of ports in path identifiers.</t> multiple paths. These paths need not be entirely disjoint: they may
<t>There are three aspects to the backwards-compatibility listed above ( share one or many routers between them. Even in such a situation,
discussed in more detail in <xref target="RFC6182"/>): making use of multiple paths is beneficial, improving resource
<list style="hanging"> utilization and resilience to a subset of node failures. The
<t hangText="External Constraints:"> The protocol must function thro congestion control algorithm defined in <xref target="RFC6356" format="d
ugh the vast majority of existing efault" sectionFormat="of" derivedContent="RFC6356"/> ensures that the use of mu
middleboxes such as NATs, firewalls, and proxies, and as such must resemble exis ltiple paths does not act detrimentally.
ting TCP as far as possible on the Furthermore, there may be some scenarios where different TCP ports on a
wire. Furthermore, the protocol must not assume the segments it sends on the wir single host can provide disjoint paths (such as through certain
e arrive unmodified at the destination: Equal-Cost Multipath (ECMP) implementations <xref target="RFC2992" format="defau
they may be split or coalesced; TCP options may be removed or duplicated. </t> lt" sectionFormat="of" derivedContent="RFC2992"/>), and so the MPTCP design also
<t hangText="Application Constraints:"> The protocol must be usable supports the use of
with no change to existing applications that use the common TCP API (although it ports in path identifiers.</t>
is reasonable that not all features would be available to such legacy applicati <t pn="section-1.1-4">There are three aspects to the backward compatibil
ons). Furthermore, the protocol must provide the same service model as regular T ity listed above (discussed in more detail in <xref target="RFC6182" format="def
CP to the application.</t> ault" sectionFormat="of" derivedContent="RFC6182"/>):
<t hangText="Fallback:"> The protocol should be able to fall back to
standard TCP with no interference from the user, to be able to communicate with
legacy hosts.</t>
</list>
</t> </t>
<t>The complementary application considerations document <xref target="R <dl newline="false" spacing="normal" indent="3" pn="section-1.1-5">
FC6897"/> discusses the necessary features of an API to provide backwards-compat <dt pn="section-1.1-5.1">External Constraints:</dt>
ibility, as well as API extensions to convey the behavior of MPTCP at a level of <dd pn="section-1.1-5.2"> The protocol must function through the vast
control and information equivalent to that available with regular, single-path majority of existing
TCP.</t> middleboxes such as NATs, firewalls, and proxies, and as such must resemble exis
<t>Further discussion of the design constraints and associated design de ting TCP as far as possible on the
cisions are given in the MPTCP Architecture document <xref target="RFC6182"/> an wire. Furthermore, the protocol must not assume that the segments it sends on th
d in <xref target="howhard"/>.</t> e wire arrive unmodified at the destination:
they may be split or coalesced; TCP options may be removed or duplicated. </dd>
<dt pn="section-1.1-5.3">Application Constraints:</dt>
<dd pn="section-1.1-5.4"> The protocol must be usable with no change t
o existing applications that use the common TCP API (although it is reasonable t
hat not all features would be available to such legacy applications). Furthermor
e, the protocol must provide the same service model as regular TCP to the applic
ation.</dd>
<dt pn="section-1.1-5.5">Fallback:</dt>
<dd pn="section-1.1-5.6"> The protocol should be able to fall back to
standard TCP with no interference from the user, to be able to communicate with
legacy hosts.</dd>
</dl>
<t pn="section-1.1-6">The complementary application considerations docum
ent <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="R
FC6897"/> discusses the necessary features
of an API to provide backward compatibility, as well as API extensions t
o convey the behavior of MPTCP at a level of control and information equivalent
to that available with regular, single-path TCP.</t>
<t pn="section-1.1-7">Further discussion of the design constraints and a
ssociated design decisions is given in the MPTCP architecture document <xref tar
get="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> and
in <xref target="howhard" format="default" sectionFormat="of" derivedContent="h
owhard"/>.</t>
</section> </section>
<section anchor="sec_layers" numbered="true" toc="include" removeInRFC="fa
<section title="Multipath TCP in the Networking Stack" anchor="sec_layers" lse" pn="section-1.2">
> <name slugifiedName="name-multipath-tcp-in-the-networ">Multipath TCP in
<t>MPTCP operates at the transport layer and aims to be transparent to b the Networking Stack</name>
oth higher and lower <t pn="section-1.2-1">MPTCP operates at the transport layer and aims to
layers. It is a set of additional features on top of standard TCP; <xref target= be transparent to both higher and lower
"fig_arch" /> illustrates layers. It is a set of additional features on top of standard TCP; <xref target=
"fig_arch" format="default" sectionFormat="of" derivedContent="Figure 1"/> illus
trates
this layering. MPTCP is designed to be usable by legacy applications with no cha nges; detailed discussion this layering. MPTCP is designed to be usable by legacy applications with no cha nges; detailed discussion
of its interactions with applications is given in <xref target="RFC6897"/>.</t> of its interactions with applications is given in <xref target="RFC6897" format=
"default" sectionFormat="of" derivedContent="RFC6897"/>.</t>
<figure align="center" anchor="fig_arch" title="Comparison of Standard T <figure anchor="fig_arch" align="left" suppress-title="false" pn="figure
CP and MPTCP Protocol Stacks"> -1">
<artwork align="left"><![CDATA[ <name slugifiedName="name-comparison-of-standard-tcp-">Comparison of S
tandard TCP and MPTCP Protocol Stacks</name>
<artwork align="left" name="" type="" alt="" pn="section-1.2-2.1">
+-------------------------------+ +-------------------------------+
| Application | | Application |
+---------------+ +-------------------------------+ +---------------+ +-------------------------------+
| Application | | MPTCP | | Application | | MPTCP |
+---------------+ + - - - - - - - + - - - - - - - + +---------------+ + - - - - - - - + - - - - - - - +
| TCP | | Subflow (TCP) | Subflow (TCP) | | TCP | | Subflow (TCP) | Subflow (TCP) |
+---------------+ +-------------------------------+ +---------------+ +-------------------------------+
| IP | | IP | IP | | IP | | IP | IP |
+---------------+ +-------------------------------+ +---------------+ +-------------------------------+ </artwork>
]]></artwork>
</figure> </figure>
</section> </section>
<section numbered="true" toc="include" removeInRFC="false" pn="section-1.3
<section title="Terminology"> ">
<t>This document makes use of a number of terms that are either MPTCP-sp <name slugifiedName="name-terminology">Terminology</name>
ecific or have defined meaning in the context of MPTCP, as follows: <t pn="section-1.3-1">This document makes use of a number of terms that
<list style="hanging"> are either MPTCP specific or have defined meaning in the context of MPTCP, as fo
<t hangText="Path:"> A sequence of links between a sender and a receiv llows:
er, defined in this context by a 4-tuple of source and destination address/port </t>
pairs.</t> <dl newline="false" spacing="normal" indent="3" pn="section-1.3-2">
<t hangText="Subflow:"> A flow of TCP segments operating over an indiv <dt pn="section-1.3-2.1">Path:</dt>
idual path, which forms part of a larger MPTCP connection. A subflow is started <dd pn="section-1.3-2.2"> A sequence of links between a sender and a r
and terminated similar to a regular TCP connection.</t> eceiver, defined in this context by a 4-tuple of source and destination address⁠
<t hangText="(MPTCP) Connection:"> A set of one or more subflows, over /port pairs.</dd>
which an application can communicate between two hosts. There is a one-to-one m <dt pn="section-1.3-2.3">Subflow:</dt>
apping between a connection and an application socket.</t> <dd pn="section-1.3-2.4"> A flow of TCP segments operating over an ind
<t hangText="Data-level:"> The payload data is nominally transferred o ividual path, which forms part of a larger MPTCP connection. A subflow is starte
ver a connection, which in turn is transported over subflows. Thus, the term "d d and terminated similarly to a regular TCP connection.</dd>
ata-level" is synonymous with "connection level", in contrast to "subflow-level" <dt pn="section-1.3-2.5">(MPTCP) Connection:</dt>
, which refers to properties of an individual subflow.</t> <dd pn="section-1.3-2.6"> A set of one or more subflows, over which an
<t hangText="Token:"> A locally unique identifier given to a multipath application can communicate between two hosts. There is a one‑to‑one mapping be
connection by a host. May also be referred to as a "Connection ID".</t> tween a connection and an application socket.</dd>
<t hangText="Host:"> An end host operating an MPTCP implementation, an <dt pn="section-1.3-2.7">Data-level:</dt>
d either initiating or accepting an MPTCP connection.</t> <dd pn="section-1.3-2.8"> The payload data is nominally transferred ov
</list> er a connection, which in turn is transported over subflows. Thus, the term "da
In addition to these terms, note that MPTCP's interpretation of, and eff ta-level" is synonymous with "connection-level", in contrast to "subflow-level",
ect on, regular single-path TCP semantics are discussed in <xref target="sec_sem which refers to properties of an individual subflow.</dd>
antics"/>.</t> <dt pn="section-1.3-2.9">Token:</dt>
<dd pn="section-1.3-2.10"> A locally unique identifier given to a mult
ipath connection by a host. May also be referred to as a "Connection ID".</dd>
<dt pn="section-1.3-2.11">Host:</dt>
<dd pn="section-1.3-2.12"> An end host operating an MPTCP implementati
on, and either initiating or accepting an MPTCP connection.</dd>
</dl>
<t pn="section-1.3-3">
In addition to these terms, note that MPTCP's interpretation of, and eff
ect on, regular single-path TCP semantics are discussed in <xref target="sec_sem
antics" format="default" sectionFormat="of" derivedContent="Section 4"/>.</t>
</section> </section>
<section anchor="sec_operation" numbered="true" toc="include" removeInRFC=
<section title="MPTCP Concept" anchor="sec_operation"> "false" pn="section-1.4">
<t>This section provides a high-level summary of normal <name slugifiedName="name-mptcp-concept">MPTCP Concept</name>
operation of MPTCP, and is illustrated by the scenario shown in <t pn="section-1.4-1">This section provides a high-level summary of norm
<xref target="fig_scenario"/>. A detailed description of operation is given in < al
xref target="sec_protocol"/>. operation of MPTCP; this type of scenario is illustrated in
<list style="symbols"> <xref target="fig_scenario" format="default" sectionFormat="of" derivedContent="
<t>To a non-MPTCP-aware application, MPTCP will behave the same as n Figure 2"/>. A detailed description of how
ormal TCP. Extended APIs could provide MPTCP operates is given in <xref target="sec_protocol" format="default" sectionF
additional control to MPTCP-aware applications <xref target="RFC6897"/>. ormat="of" derivedContent="Section 3"/>.
An application begins by opening a TCP socket in the normal way.
MPTCP signaling and operation are handled by the MPTCP implementation.
</t>
<t>An MPTCP connection begins similarly to a regular TCP connection.
This is
illustrated in <xref target="fig_scenario"/> where an MPTCP connection is establ
ished between
addresses A1 and B1 on Hosts A and B, respectively.</t>
<t>If extra paths are available, additional TCP sessions (termed MPT
CP "subflows")
are created on these paths, and are combined with the existing session, which co
ntinues
to appear as a single connection to the applications at both ends. The creation
of the
additional TCP session is illustrated between Address A2 on Host A and Address B
1 on
Host B.</t>
<t>MPTCP identifies multiple paths by the presence of multiple addre
sses
at hosts. Combinations of these multiple addresses equate to the additional path
s.
In the example, other potential paths that could be set up are A1&lt;-&gt;B2 and
A2&lt;-&gt;B2.
Although this additional session is shown as being initiated from A2, it could e
qually have
been initiated from B1 or B2.</t>
<t>The discovery and setup of additional subflows
will be achieved through a path management method; this document describes a mec
hanism
by which a host can initiate new subflows by using its own additional addresses,
or by
signaling its available addresses to the other host.</t>
<t>MPTCP adds connection-level sequence numbers to allow the reassem
bly of
segments arriving on multiple subflows with differing network delays. </t>
<t>Subflows are terminated as regular TCP connections, with a four-w
ay FIN
handshake. The MPTCP connection is terminated by a connection-level FIN.</t>
</list>
</t> </t>
<?rfc needLines='17'?> <figure anchor="fig_scenario" align="left" suppress-title="false" pn="fi
<figure align="center" anchor="fig_scenario" title="Example MPTCP Usag gure-2">
e Scenario"> <name slugifiedName="name-example-mptcp-usage-scenari">Example MPTCP U
<artwork align="left"><![CDATA[ sage Scenario</name>
<artwork align="left" name="" type="" alt="" pn="section-1.4-2.1">
Host A Host B Host A Host B
------------------------ ------------------------ ------------------------ ------------------------
Address A1 Address A2 Address B1 Address B2 Address A1 Address A2 Address B1 Address B2
---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
| | | | | | | |
| (initial connection setup) | | | (initial connection setup) | |
|----------------------------------->| | |-----------------------------------&gt;| |
|<-----------------------------------| | |&lt;-----------------------------------| |
| | | | | | | |
| (additional subflow setup) | | (additional subflow setup) |
| |--------------------->| | | |---------------------&gt;| |
| |<---------------------| | | |&lt;---------------------| |
| | | |
| | | | | | | |
]]></artwork> | | | | </artwork>
</figure> </figure>
<ul spacing="normal" bare="false" empty="false" pn="section-1.4-3">
<li pn="section-1.4-3.1">To a non-MPTCP-aware application, MPTCP will
behave the same as normal TCP. Extended APIs could provide
additional control to MPTCP-aware applications <xref target="RFC6897" format="de
fault" sectionFormat="of" derivedContent="RFC6897"/>.
An application begins by opening a TCP socket in the normal way.
MPTCP signaling and operation are handled by the MPTCP implementation.
</li>
<li pn="section-1.4-3.2">An MPTCP connection begins similarly to a reg
ular TCP connection. This is
illustrated in <xref target="fig_scenario" format="default" sectionFormat="of" d
erivedContent="Figure 2"/>, where an MPTCP connection is established between
addresses A1 and B1 on Hosts A and B, respectively.</li>
<li pn="section-1.4-3.3">If extra paths are available, additional TCP
sessions (termed MPTCP "subflows")
are created on these paths and are combined with the existing session, which con
tinues
to appear as a single connection to the applications at both ends. The creation
of the
additional TCP session is illustrated between Address A2 on Host A and Address B
1 on
Host B.</li>
<li pn="section-1.4-3.4">MPTCP identifies multiple paths by the presen
ce of multiple addresses
at hosts. Combinations of these multiple addresses equate to the additional path
s.
In the example, other potential paths that could be set up are A1&lt;-&gt;B2 and
A2&lt;-&gt;B2.
Although this additional session is shown as being initiated from A2, it could e
qually have
been initiated from B1 or B2.</li>
<li pn="section-1.4-3.5">The discovery and setup of additional subflow
s
will be achieved through a path management method; this document describes a mec
hanism
by which a host can initiate new subflows by using its own additional addresses
or by
signaling its available addresses to the other host.</li>
<li pn="section-1.4-3.6">MPTCP adds connection-level sequence numbers
to allow the reassembly of
segments arriving on multiple subflows with differing network delays. </li>
<li pn="section-1.4-3.7">Subflows are terminated as regular TCP connec
tions, with a four‑way FIN
handshake. The MPTCP connection is terminated by a connection-level FIN.</li>
</ul>
</section> </section>
<section numbered="true" toc="include" removeInRFC="false" pn="section-1.5
<section title="Requirements Language"> ">
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL <name slugifiedName="name-requirements-language">Requirements Language</
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", name>
"MAY", and "OPTIONAL" in this document are to be interpreted as <t pn="section-1.5-1">
described in BCP&nbsp;14 <xref target="RFC2119"/> <xref target="RFC8174" The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>",
/> "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>
when, and only when, they appear in all capitals, as shown here.</t> ",
"<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>",
"<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
"<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to
be
interpreted as described in BCP 14 <xref target="RFC2119" format="default" s
ectionFormat="of" derivedContent="RFC2119"/> <xref target="RFC8174" format="defa
ult" sectionFormat="of" derivedContent="RFC8174"/> when, and only when, they app
ear in all capitals, as
shown here.
</t>
</section> </section>
</section> </section>
<section anchor="sec_overview" numbered="true" toc="include" removeInRFC="fa
<section title="Operation Overview" anchor="sec_overview"> lse" pn="section-2">
<t>This section presents a single description of common MPTCP operation, w <name slugifiedName="name-operation-overview">Operation Overview</name>
ith reference to the protocol operation. This is a high-level overview of the ke <t pn="section-2-1">This section presents a single description of common M
y functions; the full specification follows in <xref target="sec_protocol"/>. Ex PTCP operation, with reference to the protocol operation. This is a high-level o
tensibility and negotiated features are not discussed here. Considerable referen verview of the key functions; the full specification follows in <xref target="se
ce is made to symbolic names of MPTCP options throughout this section -- these a c_protocol" format="default" sectionFormat="of" derivedContent="Section 3"/>. Ex
re subtypes of the IANA-assigned MPTCP option (see <xref target="IANA"/>), and t tensibility and negotiated features are not discussed here. Considerable referen
heir formats are defined in the detailed protocol specification that follows in ce is made to symbolic names of MPTCP options throughout this section -- these a
<xref target="sec_protocol"/>.</t> re subtypes of the IANA‑assigned MPTCP option (see <xref target="IANA" format="d
efault" sectionFormat="of" derivedContent="Section 7"/>), and their formats are
<t>A Multipath TCP connection provides a bidirectional bytestream between two ho defined in the detailed protocol specification provided in <xref target="sec_pro
sts communicating like normal TCP and, thus, does not require any change to the tocol" format="default" sectionFormat="of" derivedContent="Section 3"/>.</t>
applications. However, Multipath TCP enables the hosts to use different paths wi <t pn="section-2-2">A Multipath TCP connection provides a bidirectional by
th different IP addresses to exchange packets belonging to the MPTCP connection. testream between two hosts communicating like normal TCP and thus does not requi
A Multipath TCP connection appears like a normal TCP connection to an applicati re any change to the applications. However, Multipath TCP enables the hosts to u
on. However, to the network layer, each MPTCP subflow looks like a regular TCP f se different paths with different IP addresses to exchange packets belonging to
low whose segments carry a new TCP option type. Multipath TCP manages the creati the MPTCP connection. A Multipath TCP connection appears like a normal TCP conne
on, removal, and utilization of these subflows to send data. The number of subfl ction to an application. However, to the network layer, each MPTCP subflow looks
ows that are managed within a Multipath TCP connection is not fixed and it can f like a regular TCP flow whose segments carry a new TCP option type. Multipath T
luctuate during the lifetime of the Multipath TCP connection.</t> CP manages the creation, removal, and utilization of these subflows to send data
. The number of subflows that are managed within a Multipath TCP connection is n
<t>All MPTCP operations are signaled with a TCP option -- a single numerical typ ot fixed, and it can fluctuate during the lifetime of the Multipath TCP connecti
e for MPTCP, with "sub-types" for each MPTCP message. What follows is a summary on.</t>
of the purpose and rationale of these messages.</t> <t pn="section-2-3">All MPTCP operations are signaled with a TCP option --
<section title="Initiating an MPTCP Connection"> a single numerical type for MPTCP, with "subtypes" for each MPTCP message. What
<t>This is the same signaling as for initiating a normal TCP connection, but the follows is a summary of the purpose and rationale of these messages.</t>
SYN, SYN/ACK, and initial ACK (and data) packets also carry the MP_CAPABLE opti <section numbered="true" toc="include" removeInRFC="false" pn="section-2.1
on. This option has a variable length and serves multiple purposes. Firstly, it ">
verifies whether the remote host supports Multipath TCP; secondly, this option a <name slugifiedName="name-initiating-an-mptcp-connect">Initiating an MPT
llows the hosts to exchange some information to authenticate the establishment o CP Connection</name>
f additional subflows. Further details are given in <xref target="sec_init"/>.</ <t pn="section-2.1-1">This is the same signaling as for initiating a nor
t> mal TCP connection, but the SYN, SYN/ACK, and initial ACK (and data) packets als
o carry the MP_CAPABLE option. This option has a variable length and serves mult
<figure><artwork align="left"><![CDATA[ iple purposes. Firstly, it verifies whether the remote host supports Multipath T
CP; secondly, this option allows the hosts to exchange some information to authe
nticate the establishment of additional subflows. Further details are given in <
xref target="sec_init" format="default" sectionFormat="of" derivedContent="Secti
on 3.1"/>.</t>
<artwork align="left" name="" type="" alt="" pn="section-2.1-2">
Host A Host B Host A Host B
------ ------ ------ ------
MP_CAPABLE -> MP_CAPABLE -&gt;
[flags] [flags]
<- MP_CAPABLE &lt;- MP_CAPABLE
[B's key, flags] [B's key, flags]
ACK + MP_CAPABLE (+ data) -> ACK + MP_CAPABLE (+ data) -&gt;
[A's key, B's key, flags, (data-level details)] [A's key, B's key, flags, (data-level details)] </artwork>
]]></artwork></figure> <t pn="section-2.1-3">Retransmission of the ACK + MP_CAPABLE can occur i
f it is not known if it has been received. The following diagrams show all possi
<t>Retransmission of the ACK + MP_CAPABLE can occur if it is not known if it has ble exchanges for the initial subflow setup to ensure this reliability.</t>
been received. The following diagrams show all possible exchanges for the initi <artwork align="left" name="" type="" alt="" pn="section-2.1-4">
al subflow setup to ensure this reliability.</t>
<figure><artwork align="left"><![CDATA[
Host A (with data to send immediately) Host B Host A (with data to send immediately) Host B
------ ------ ------ ------
MP_CAPABLE -> MP_CAPABLE -&gt;
[flags] [flags]
<- MP_CAPABLE &lt;- MP_CAPABLE
[B's key, flags] [B's key, flags]
ACK + MP_CAPABLE + data -> ACK + MP_CAPABLE + data -&gt;
[A's key, B's key, flags, data-level details] [A's key, B's key, flags, data-level details]
Host A (with data to send later) Host B Host A (with data to send later) Host B
------ ------ ------ ------
MP_CAPABLE -> MP_CAPABLE -&gt;
[flags] [flags]
<- MP_CAPABLE &lt;- MP_CAPABLE
[B's key, flags] [B's key, flags]
ACK + MP_CAPABLE -> ACK + MP_CAPABLE -&gt;
[A's key, B's key, flags] [A's key, B's key, flags]
ACK + MP_CAPABLE + data -> ACK + MP_CAPABLE + data -&gt;
[A's key, B's key, flags, data-level details] [A's key, B's key, flags, data-level details]
Host A Host B (sending first) Host A Host B (sending first)
------ ------ ------ ------
MP_CAPABLE -> MP_CAPABLE -&gt;
[flags] [flags]
<- MP_CAPABLE &lt;- MP_CAPABLE
[B's key, flags] [B's key, flags]
ACK + MP_CAPABLE -> ACK + MP_CAPABLE -&gt;
[A's key, B's key, flags] [A's key, B's key, flags]
<- ACK + DSS + data &lt;- ACK + DSS + data
[data-level details] [data-level details] </artwork>
]]></artwork></figure> </section>
</section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.2
">
<section title="Associating a New Subflow with an Existing MPTCP Connection"> <name slugifiedName="name-associating-a-new-subflow-w">Associating a New
<t>The exchange of keys in the MP_CAPABLE handshake provides material that can b Subflow with an Existing MPTCP Connection</name>
e used to authenticate the endpoints when new subflows will be set up. <t pn="section-2.2-1">The exchange of keys in the MP_CAPABLE handshake p
rovides material that can be used to authenticate the endpoints when new subflow
s will be set up.
Additional subflows begin in the same way as initiating a normal TCP connection, but the SYN, SYN/ACK, and ACK packets also carry the MP_JOIN option. </t> Additional subflows begin in the same way as initiating a normal TCP connection, but the SYN, SYN/ACK, and ACK packets also carry the MP_JOIN option. </t>
<t pn="section-2.2-2">Host A initiates a new subflow between one of its
<t>Host A initiates a new subflow between one of its addresses and one of Host B addresses and one
's addresses. The token -- generated from the key -- is used to identify which M of Host B's addresses. The token -- generated from the key -- is used
PTCP connection it is joining, and the HMAC is used for authentication. The Hash to identify which MPTCP connection it is joining, and the Hash‑based
-based Message Authentication Code (HMAC) uses the keys exchanged in the MP_CAPA Message Authentication Code (HMAC) is used for authentication. The HMAC
BLE handshake, and the random numbers (nonces) exchanged in these MP_JOIN option uses the keys exchanged in the MP_CAPABLE handshake and the random numbers (nonc
s. MP_JOIN also contains flags and an Address ID that can be used to refer to th es) exchanged in these MP_JOIN options. MP_JOIN also contains flags and an Addre
e source address without the sender needing to know if it has been changed by a ss ID that can be used to refer to the source address without the sender needing
NAT. Further details are in <xref target="sec_join"/>.</t> to know if it has been changed by a NAT. Further details are given in <xref tar
get="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/
<figure><artwork align="left"><![CDATA[ >.</t>
<artwork align="left" name="" type="" alt="" pn="section-2.2-3">
Host A Host B Host A Host B
------ ------ ------ ------
MP_JOIN -> MP_JOIN -&gt;
[B's token, A's nonce, [B's token, A's nonce,
A's Address ID, flags] A's Address ID, flags]
<- MP_JOIN &lt;- MP_JOIN
[B's HMAC, B's nonce, [B's HMAC, B's nonce,
B's Address ID, flags] B's Address ID, flags]
ACK + MP_JOIN -> ACK + MP_JOIN -&gt;
[A's HMAC] [A's HMAC]
<- ACK &lt;- ACK </artwork>
]]></artwork></figure> </section>
</section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.3
">
<section title="Informing the Other Host about Another Potential Address"> <name slugifiedName="name-informing-the-other-host-ab">Informing the Oth
<t>The set of IP addresses associated to a multihomed host may change during the er Host about Another Potential Address</name>
lifetime of an MPTCP connection. MPTCP supports the addition and removal of add <t pn="section-2.3-1">The set of IP addresses associated to a multihomed
resses on a host both implicitly and explicitly. If Host A has established a sub host may change during the lifetime of an MPTCP connection. MPTCP supports the
flow starting at address/port pair IP#-A1 and wants to open a second subflow sta addition and removal of addresses on a host both implicitly and explicitly. If H
rting at address/port pair IP#-A2, it simply initiates the establishment of the ost A has established a subflow starting at address⁠/port pair IP#-A1 and wants
subflow as explained above. The remote host will then be implicitly informed abo to open a second subflow starting at address⁠/port pair IP#-A2, it simply initia
ut the new address.</t> tes the establishment of the subflow as explained above. The remote host will th
en be implicitly informed about the new address.</t>
<t>In some circumstances, a host may want to advertise to the remote host the av <t pn="section-2.3-2">In some circumstances, a host may want to advertis
ailability of an address without establishing a new subflow, for example, when a e to the remote
NAT prevents setup in one direction. In the example below, Host A informs Host host the availability of an address without establishing a new subflow
B about its alternative IP address/port pair (IP#-A2). Host B may later send an -- for example, when a NAT prevents setup in one direction. In the example bel
MP_JOIN to this new address. The ADD_ADDR option contains a HMAC to authenticat ow, Host A informs Host B about its alternative IP address⁠/port pair (IP#-A2).
e the address as having been sent from the originator of the connection. The rec Host B may later send an MP_JOIN to this new address. The ADD_ADDR option contai
eiver of this option echoes it back to the client to indicate successful receipt ns an HMAC to authenticate the address as having been sent from the originator o
. Further details are in <xref target="sec_add_address"/>.</t> f the connection. The receiver of this option echoes it back to the client to in
dicate successful receipt. Further details are given in <xref target="sec_add_ad
<figure><artwork align="left"><![CDATA[ dress" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>.</t>
<artwork align="left" name="" type="" alt="" pn="section-2.3-3">
Host A Host B Host A Host B
------ ------ ------ ------
ADD_ADDR -> ADD_ADDR -&gt;
[Echo-flag=0, [Echo-flag=0,
IP#-A2, IP#-A2,
IP#-A2's Address ID, IP#-A2's Address ID,
HMAC of IP#-A2] HMAC of IP#-A2]
<- ADD_ADDR &lt;- ADD_ADDR
[Echo-flag=1, [Echo-flag=1,
IP#-A2, IP#-A2,
IP#-A2's Address ID, IP#-A2's Address ID,
HMAC of IP#-A2] HMAC of IP#-A2] </artwork>
]]></artwork></figure> <t pn="section-2.3-4">There is a corresponding signal for address remova
l, making use of
<t>There is a corresponding signal for address removal, making use of the Addres the Address ID that is signaled in the ADD_ADDR handshake.
s ID that is signaled in the add address handshake. Further details in <xref tar
get="sec_remove_addr"/>.</t>
<figure><artwork align="left"><![CDATA[ Further details are given in <xref target="sec_remove_addr" format="default" se
ctionFormat="of" derivedContent="Section 3.4.2"/>.</t>
<artwork align="left" name="" type="" alt="" pn="section-2.3-5">
Host A Host B Host A Host B
------ ------ ------ ------
REMOVE_ADDR -> REMOVE_ADDR -&gt;
[IP#-A2's Address ID] [IP#-A2's Address ID] </artwork>
]]></artwork></figure> </section>
</section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.4
">
<section title="Data Transfer Using MPTCP"> <name slugifiedName="name-data-transfer-using-mptcp">Data Transfer Using
<t>To ensure reliable, in-order delivery of data over subflows that may appear a MPTCP</name>
nd disappear at any time, MPTCP uses a 64-bit data sequence number (DSN) to numb <t pn="section-2.4-1">To ensure reliable, in-order delivery of data over
er all data sent over the MPTCP connection. Each subflow has its own 32-bit sequ subflows that may appear and disappear at any time, MPTCP uses a 64-bit Data Se
ence number space, utilising the regular TCP sequence number header, and an MPTC quence Number (DSN) to number all data sent over the MPTCP connection. Each subf
P option maps the subflow sequence space to the data sequence space. In this way low has its own 32-bit sequence number space, utilizing the regular TCP sequence
, data can be retransmitted on different subflows (mapped to the same DSN) in th number header, and an MPTCP option maps the subflow sequence space to the data
e event of failure.</t> sequence space. In this way, data can be retransmitted on different subflows (ma
pped to the same DSN) in the event of failure.</t>
<t>The Data Sequence Signal (DSS) carries the Data Sequence Mapping. The Data Se <t pn="section-2.4-2">The Data Sequence Signal (DSS) carries the Data Se
quence Mapping consists of the subflow sequence number, data sequence number, an quence Mapping. The Data Sequence Mapping consists of the subflow sequence numbe
d length for which this mapping is valid. This option can also carry a connectio r, data sequence number, and length for which this mapping is valid. This option
n-level acknowledgment (the "Data ACK") for the received DSN.</t> can also carry a connection-level acknowledgment (the "Data ACK") for the recei
ved DSN.</t>
<t>With MPTCP, all subflows share the same receive buffer and advertise the same <t pn="section-2.4-3">With MPTCP, all subflows share the same receive bu
receive window. There are two levels of acknowledgment in MPTCP. Regular TCP ac ffer and advertise the same receive window. There are two levels of acknowledgme
knowledgments are used on each subflow to acknowledge the reception of the segme nt in MPTCP. Regular TCP acknowledgments are used on each subflow to acknowledge
nts sent over the subflow independently of their DSN. In addition, there are con the reception of the segments sent over the subflow independently of their DSN.
nection-level acknowledgments for the data sequence space. These acknowledgments In addition, there are connection-level acknowledgments for the data sequence s
track the advancement of the bytestream and slide the receiving window.</t> pace. These acknowledgments track the advancement of the bytestream and slide th
e receive window.</t>
<t>Further details are in <xref target="sec_generalop"/>.</t> <t pn="section-2.4-4">Further details are given in <xref target="sec_gen
eralop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>.</t>
<figure><artwork align="left"><![CDATA[ <artwork align="left" name="" type="" alt="" pn="section-2.4-5">
Host A Host B Host A Host B
------ ------ ------ ------
DSS -> DSS -&gt;
[Data Sequence Mapping] [Data Sequence Mapping]
[Data ACK] [Data ACK]
[Checksum] [Checksum] </artwork>
]]></artwork></figure> </section>
</section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.5
">
<section title="Requesting a Change in a Path's Priority"> <name slugifiedName="name-requesting-a-change-in-a-pa">Requesting a Chan
<t>Hosts can indicate at initial subflow setup whether they wish the subflow to ge in a Path's Priority</name>
be used as a regular or backup path -- a backup path only being used if there ar <t pn="section-2.5-1">Hosts can indicate at initial subflow setup whethe
e no regular paths available. During a connection, Host A can request a change i r they wish the subflow to be used as a regular or backup path -- a backup path
n the priority of a subflow through the MP_PRIO signal to Host B. Further detail only being used if there are no regular paths available. During a connection, Ho
s are in <xref target="sec_policy"/>.</t> st A can request a change in the priority of a subflow through the MP_PRIO signa
l to Host B. Further details are given in <xref target="sec_policy" format="defa
<figure><artwork align="left"><![CDATA[ ult" sectionFormat="of" derivedContent="Section 3.3.8"/>.</t>
<artwork align="left" name="" type="" alt="" pn="section-2.5-2">
Host A Host B Host A Host B
------ ------ ------ ------
MP_PRIO -> MP_PRIO -&gt; </artwork>
]]></artwork></figure> </section>
</section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.6
">
<section title="Closing an MPTCP Connection"> <name slugifiedName="name-closing-an-mptcp-connection">Closing an MPTCP
<t>When a host wants to close an existing subflow, but not the whole connection, Connection</name>
it can initiate a regular TCP FIN/ACK exchange.</t> <t pn="section-2.6-1">When a host wants to close an existing subflow but
not the whole connection, it can initiate a regular TCP FIN/ACK exchange.</t>
<t>When Host A wants to inform Host B that it has no more data to send, it signa <t pn="section-2.6-2">When Host A wants to inform Host B that it has no
ls this "Data FIN" as part of the Data Sequence Signal (see above). It has the s more data to send, it signals this "Data FIN" as part of the DSS (see above). It
ame semantics and behavior as a regular TCP FIN, but at the connection level. On has the same semantics and behavior as a regular TCP FIN, but at the connection
ce all the data on the MPTCP connection has been successfully received, then thi level. Once all the data on the MPTCP connection has been successfully received
s message is acknowledged at the connection level with a Data ACK. Further detai , this message is acknowledged at the connection level with a Data ACK. Further
ls are in <xref target="sec_close"/>.</t> details are given in <xref target="sec_close" format="default" sectionFormat="of
" derivedContent="Section 3.3.3"/>.</t>
<figure><artwork align="left"><![CDATA[ <artwork align="left" name="" type="" alt="" pn="section-2.6-3">
Host A Host B Host A Host B
------ ------ ------ ------
DSS -> DSS -&gt;
[Data FIN] [Data FIN]
<- DSS &lt;- DSS
[Data ACK] [Data ACK] </artwork>
]]></artwork></figure> <t pn="section-2.6-4">There is an additional method of connection closur
e, referred to as
<t>There is an additional method of connection closure, referred to as "Fast Clo "Fast Close", which is analogous to closing a single-path TCP
se", which is analogous to closing a single-path TCP connection with a RST signa connection with a RST signal. The MP_FASTCLOSE signal is used to
l. The MP_FASTCLOSE signal is used to indicate to the peer that the connection w indicate to the peer that the connection will be abruptly closed and
ill be abruptly closed and no data will be accepted anymore. This can be used on no data will be accepted anymore. This can be used on an ACK (which
an ACK (ensuring reliability of the signal), or a RST (which is not). Both exam ensures reliability of the signal) or a RST (which does not).
ples are shown in the following diagrams. Further details are in <xref target="s Both examples are shown in the following diagrams. Further details are given in
ec_fastclose"/>.</t> <xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent
="Section 3.5"/>.</t>
<figure><artwork align="left"><![CDATA[ <artwork align="left" name="" type="" alt="" pn="section-2.6-5">
Host A Host B Host A Host B
------ ------ ------ ------
ACK + MP_FASTCLOSE -> ACK + MP_FASTCLOSE -&gt;
[B's key] [B's key]
[RST on all other subflows] -> [RST on all other subflows] -&gt;
<- [RST on all subflows] &lt;- [RST on all subflows]
Host A Host B Host A Host B
------ ------ ------ ------
RST + MP_FASTCLOSE -> RST + MP_FASTCLOSE -&gt;
[B's key] [on all subflows] [B's key] [on all subflows]
<- [RST on all subflows] &lt;- [RST on all subflows] </artwork>
]]></artwork></figure> </section>
</section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.7
">
<section title="Notable Features"> <name slugifiedName="name-notable-features">Notable Features</name>
<t>It is worth highlighting that MPTCP's signaling has been designed with severa <t pn="section-2.7-1">It is worth highlighting that MPTCP's signaling ha
l key requirements in mind: s been designed with several key requirements in mind:
<list style="symbols"> </t>
<t>To cope with NATs on the path, addresses are referred to by Address IDs, in c <ul spacing="normal" bare="false" empty="false" pn="section-2.7-2">
ase the IP packet's source <li pn="section-2.7-2.1">To cope with NATs on the path, addresses are
referred to by Address IDs, in case the IP packet's source
address gets changed by a NAT. Setting up a new TCP flow is not possible if the receiver of the SYN is behind a NAT; address gets changed by a NAT. Setting up a new TCP flow is not possible if the receiver of the SYN is behind a NAT;
to allow subflows to be created when either end is behind a NAT, MPTCP uses the to allow subflows to be created when either end is behind a NAT, MPTCP uses the
ADD_ADDR message. </t> ADD_ADDR message. </li>
<li pn="section-2.7-2.2">MPTCP falls back to ordinary TCP if MPTCP ope
<t>MPTCP falls back to ordinary TCP if MPTCP operation is not possible, for exam ration is not
ple, if one host is not MPTCP capable or if a middlebox alters the payload. This possible -- for example, if one host is not MPTCP capable or if a middlebox alt
is discussed in <xref target="sec_fallback"/>.</t> ers the payload. This is discussed in <xref target="sec_fallback" format="defaul
t" sectionFormat="of" derivedContent="Section 3.7"/>.</li>
<t>To address the threats identified in <xref target="RFC6181"/>, the following <li pn="section-2.7-2.3">To address the threats identified in <xref ta
steps are taken: keys are sent in the clear in the MP_CAPABLE messages; MP_JOIN rget="RFC6181" format="default" sectionFormat="of" derivedContent="RFC6181"/>, t
messages are secured with HMAC-SHA256 (<xref target="RFC2104"/>, <xref target="R he following steps are taken: keys are sent in
FC6234"/>) using those keys; and standard TCP validity checks are made on the ot the clear in the MP_CAPABLE messages; MP_JOIN messages are secured
her messages (ensuring sequence numbers are in-window <xref target="RFC5961"/>). with HMAC-SHA256 (<xref target="RFC2104" format="default" sectionForma
Residual threats to MPTCP v0 were identified in <xref target="RFC7430"/>, and t t="of" derivedContent="RFC2104"/> using
hose affecting the protocol (i.e. modification to ADD_ADDR) have been incorporat the algorithm in <xref target="RFC6234" format="default" sectionFormat
ed in this document. Further discussion of security can be found in <xref target ="of" derivedContent="RFC6234"/>) using those keys; and standard
="sec_security"/>.</t> TCP validity checks are made on the other messages (ensuring that
</list></t> sequence numbers are in‑window <xref target="RFC5961" format="default"
</section> sectionFormat="of" derivedContent="RFC5961"/>).
Residual threats to MPTCP v0 were identified in <xref target="RFC7430" format="
default" sectionFormat="of" derivedContent="RFC7430"/>, and those affecting the
protocol (i.e., modifications to
ADD_ADDR) have been incorporated in this document.
Further discussion of security can be found in <xref target="sec_security" form
at="default" sectionFormat="of" derivedContent="Section 5"/>.</li>
</ul>
</section>
</section> </section>
<section anchor="sec_protocol" numbered="true" toc="include" removeInRFC="fa
<section title="MPTCP Protocol" anchor="sec_protocol"> lse" pn="section-3">
<t>This section describes the operation of the MPTCP protocol, and is subd <name slugifiedName="name-mptcp-operations-an-overvie">MPTCP Operations: A
ivided into sections for each key part of the protocol operation.</t> n Overview</name>
<t>All MPTCP operations are signaled using optional TCP header fields. A s <t pn="section-3-1">This section describes the operation of MPTCP. The
ingle TCP option number ("Kind") has been assigned by IANA for MPTCP (see <xref subsections below discuss each key part of the protocol operation.</t>
target="IANA"/>), and then individual messages will be determined by a "subtype" <t pn="section-3-2">All MPTCP operations are signaled using optional TCP h
, the values of which are also stored in an IANA registry (and are also listed i eader fields. A single TCP option number ("Kind") has been assigned by IANA for
n <xref target="IANA"/>). As with all TCP options, the Length field is specified MPTCP (see <xref target="IANA" format="default" sectionFormat="of" derivedConten
in bytes, and includes the 2 bytes of Kind and Length.</t> t="Section 7"/>), and then individual messages will be determined by a "subtype"
<t>Throughout this document, when reference is made to an MPTCP option by , the values of which are also stored in an IANA registry (and are also listed i
symbolic name, such as "MP_CAPABLE", this refers to a TCP option with the single n <xref target="IANA" format="default" sectionFormat="of" derivedContent="Sectio
MPTCP option type, and with the subtype value of the symbolic name as defined i n 7"/>). As with all TCP options, the Length field is specified in bytes and inc
n <xref target="IANA"/>. This subtype is a 4-bit field -- the first 4 bits of th ludes the 2 bytes of Kind and Length.</t>
e option payload, as shown in <xref target="fig_option"/>. The MPTCP messages ar <t pn="section-3-3">Throughout this document, when reference is made to an
e defined in the following sections.</t> MPTCP option by symbolic name, such as "MP_CAPABLE", this refers to a TCP optio
n with the single MPTCP option type, and with the subtype value of the symbolic
<?rfc needLines='8'?> name as defined in <xref target="IANA" format="default" sectionFormat="of" deriv
<figure align="center" anchor="fig_option" title="MPTCP Option Format"> edContent="Section 7"/>. This subtype is a 4-bit field -- the first 4 bits of th
<artwork align="left"><![CDATA[ e option payload, as shown in <xref target="fig_option" format="default" section
1 2 3 Format="of" derivedContent="Figure 3"/>. The MPTCP messages are defined in the f
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ollowing sections.</t>
+---------------+---------------+-------+-----------------------+ <figure anchor="fig_option" align="left" suppress-title="false" pn="figure
| Kind | Length |Subtype| | -3">
+---------------+---------------+-------+ | <name slugifiedName="name-mptcp-option-format">MPTCP Option Format</name
| Subtype-specific data | >
| (variable length) | <artwork align="left" name="" type="" alt="" pn="section-3-4.1">
+---------------------------------------------------------------+ 1 2 3
]]></artwork> 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----------------------+
| Kind | Length |Subtype| |
+---------------+---------------+-------+ |
| Subtype-specific data |
| (variable length) |
+---------------------------------------------------------------+ </artwork>
</figure> </figure>
<t pn="section-3-5">Those MPTCP options associated with subflow initiation
<t>Those MPTCP options associated with subflow initiation are used on pack are used on
ets with the SYN flag set. Additionally, there is one MPTCP option for signaling packets with the SYN flag set. Additionally, there is one MPTCP option
metadata to ensure segmented data can be recombined for delivery to the applica for signaling metadata to ensure that segmented data can be recombined for
tion.</t> delivery to the application.</t>
<t>The remaining options, however, are signals that do not need to be on a <t pn="section-3-6">The remaining options, however, are signals that do no
specific packet, such as those for signaling additional addresses. Whilst an im t need to be on
plementation may desire to send MPTCP options as soon as possible, it may not be a specific packet, such as those for signaling additional
possible to combine all desired options (both those for MPTCP and for regular T addresses. While an implementation may desire to send MPTCP options as
CP, such as SACK (selective acknowledgment) <xref target="RFC2018"/>) on a singl soon as possible, it may not be possible to combine all desired options
e packet. Therefore, an implementation may choose to send duplicate ACKs contain (both those for MPTCP and for regular TCP, such as SACK (selective
ing the additional signaling information. This changes the semantics of a duplic acknowledgment) <xref target="RFC2018" format="default" sectionFormat="of"
ate ACK; these are usually only sent as a signal of a lost segment <xref target= derivedContent="RFC2018"/>) on a single
"RFC5681"/> in regular TCP. Therefore, an MPTCP implementation receiving a dupli packet. Therefore, an implementation may choose to send duplicate ACKs
cate ACK that contains an MPTCP option MUST NOT treat it as a signal of congesti containing the additional signaling information. This changes the
on. Additionally, an MPTCP implementation SHOULD NOT send more than two duplicat semantics of a duplicate ACK; these are usually only sent as a signal of
e ACKs in a row for the purposes of sending MPTCP options alone, in order to ens a lost segment <xref target="RFC5681" format="default" sectionFormat="of"
ure no middleboxes misinterpret this as a sign of congestion.</t> derivedContent="RFC5681"/> in regular
<t>Furthermore, standard TCP validity checks (such as ensuring the sequenc TCP. Therefore, an MPTCP implementation receiving a duplicate ACK that
e number and acknowledgment number are within window) MUST be undertaken before contains an MPTCP option <bcp14>MUST NOT</bcp14> treat it as a signal of
processing any MPTCP signals, as described in <xref target="RFC5961"/>, and init congestion. Additionally, an MPTCP implementation <bcp14>SHOULD NOT</bcp14
ial subflow sequence numbers SHOULD be generated according to the recommendation > send more than two duplicate ACKs in a row for the purposes
s in <xref target="RFC6528"/>.</t> of sending MPTCP options alone, in order to ensure that no middleboxes mis
interpret this as a sign of congestion.</t>
<section title="Connection Initiation" anchor="sec_init"> <t pn="section-3-7">Furthermore, standard TCP validity checks (such as ens
<t>Connection initiation begins with a SYN, SYN/ACK, ACK exchange uring that the
sequence number and acknowledgment number are within the window) <bcp14>MU
ST</bcp14> be undertaken before processing any MPTCP signals, as described in <x
ref target="RFC5961" format="default" sectionFormat="of" derivedContent="RFC5961
"/>, and initial subflow sequence numbers <bcp14>SHOULD</bcp14> be generated acc
ording to the recommendations in <xref target="RFC6528" format="default" section
Format="of" derivedContent="RFC6528"/>.</t>
<section anchor="sec_init" numbered="true" toc="include" removeInRFC="fals
e" pn="section-3.1">
<name slugifiedName="name-connection-initiation">Connection Initiation</
name>
<t pn="section-3.1-1">Connection initiation begins with a SYN, SYN/ACK,
ACK exchange
on a single path. Each packet on a single path. Each packet
contains the Multipath Capable (MP_CAPABLE) MPTCP option contains the Multipath Capable (MP_CAPABLE) MPTCP option
(<xref target="tcpm_capable"/>). This option declares its (<xref target="tcpm_capable" format="default" sectionFormat="of" derived
sender is capable of performing Multipath TCP and wishes to do Content="Figure 4"/>). This option declares its
sender capable of performing Multipath TCP and wishes to do
so on this particular connection.</t> so on this particular connection.</t>
<figure anchor="tcpm_capable" align="left" suppress-title="false" pn="fi
<t>The MP_CAPABLE exchange in this specification (v1) is different to gure-4">
<name slugifiedName="name-multipath-capable-mp_capabl">Multipath Capab
le (MP_CAPABLE) Option</name>
<artwork align="left" name="" type="" alt="" pn="section-3.1-2.1">
1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-------+---------------+
| Kind | Length |Subtype|Version|A|B|C|D|E|F|G|H|
+---------------+---------------+-------+-------+---------------+
| Option Sender's Key (64 bits) |
| (if option Length &gt; 4) |
| |
+---------------------------------------------------------------+
| Option Receiver's Key (64 bits) |
| (if option Length &gt; 12) |
| |
+-------------------------------+-------------------------------+
| Data-Level Length (16 bits) | Checksum (16 bits, optional) |
+-------------------------------+-------------------------------+ </artwork>
</figure>
<t pn="section-3.1-3">The MP_CAPABLE exchange in this specification (v1)
is different than
that specified in v0. If a host supports multiple versions that specified in v0. If a host supports multiple versions
of MPTCP, the sender of the MP_CAPABLE option SHOULD signal the of MPTCP, the sender of the MP_CAPABLE option <bcp14>SHOULD</bcp14> sign al the
highest version number it supports. In return, in its MP_CAPABLE option , highest version number it supports. In return, in its MP_CAPABLE option ,
the receiver will signal the version number it wishes to use, which MUST the receiver will signal the version number it wishes to use, which <bcp 14>MUST</bcp14>
be equal to or lower than the version number indicated in the initial be equal to or lower than the version number indicated in the initial
MP_CAPABLE. MP_CAPABLE.
There is a caveat though with respect to this version negotiation with There is a caveat, though, with respect to this version negotiation with
old listeners that only support v0. A listener that supports v0 expects that old listeners that only support v0. A listener that supports v0 expects that
the MP_CAPABLE option in the SYN-segment includes the initiator's key. I the MP_CAPABLE option in the SYN segment will include the initiator's
f key. If, however,
the initiator however already upgraded to v1, it won't include the key i the initiator already upgraded to v1, it won't include the key in the
n the SYN segment. Thus, the listener will ignore the MP_CAPABLE of this SYN s
SYN-segment. Thus, the listener will ignore the MP_CAPABLE of this SYN-s egment
egment and reply with a SYN/ACK that does not include an MP_CAPABLE. The initia
and reply with a SYN/ACK that does not include an MP_CAPABLE. The initia tor <bcp14>MAY</bcp14>
tor MAY choose to immediately fall back to TCP or <bcp14>MAY</bcp14> choose to a
choose to immediately fall back to TCP or MAY choose to attempt a connec ttempt a connection
tion
using MPTCP v0 (if the initiator supports v0), in order to discover whet her the using MPTCP v0 (if the initiator supports v0), in order to discover whet her the
listener supports the earlier version of MPTCP. In general a MPTCP v0 co listener supports the earlier version of MPTCP. In general, an MPTCP v0
nnection connection
is likely to be preferred to a TCP one, however in a particular deployme will likely be preferred over a TCP connection; however, in a particular
nt scenario deployment scenario,
it may be known that the listener is unlikely to support MPTCPv0 and so it may be known that the listener is unlikely to support MPTCP v0 and so
the the
initiator may prefer not to attempt a v0 connection. An initiator MAY ca initiator may prefer not to attempt a v0 connection. An initiator <bcp14
che >MAY</bcp14> cache
information for a peer about what version of MPTCP it supports if any, a information for a peer about what version of MPTCP it supports, if any,
nd use and use
this information for future connection attempts.</t> this information for future connection attempts.</t>
<t pn="section-3.1-4">The MP_CAPABLE option is of variable length, with
<t>The MP_CAPABLE option is variable-length, with different fields different fields
included depending on which packet the option is used on. The full included, depending on which packet the option is used on. The full
MP_CAPABLE option is shown in <xref target="tcpm_capable"/>.</t> MP_CAPABLE option is shown in <xref target="tcpm_capable" format="defaul
t" sectionFormat="of" derivedContent="Figure 4"/>.</t>
<?rfc needLines='10'?> <t pn="section-3.1-5">The MP_CAPABLE option is carried on the SYN, SYN/A
<figure align="center" anchor="tcpm_capable" title="Multipath Capable (M CK, and ACK packets that start the first subflow of an MPTCP connection, as well
P_CAPABLE) Option"> as the first packet that carries data, if the initiator wishes to send first. T
<artwork align="left"><![CDATA[ he data carried by each option is as follows, where A = initiator and B = listen
1 2 3 er.
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 </t>
+---------------+---------------+-------+-------+---------------+ <ul spacing="normal" bare="false" empty="false" pn="section-3.1-6">
| Kind | Length |Subtype|Version|A|B|C|D|E|F|G|H| <li pn="section-3.1-6.1">SYN (A-&gt;B): only the first 4 octets (Lengt
+---------------+---------------+-------+-------+---------------+ h = 4).</li>
| Option Sender's Key (64 bits) | <li pn="section-3.1-6.2">SYN/ACK (B-&gt;A): B's key for this connectio
| (if option Length > 4) | n (Length = 12).</li>
| | <li pn="section-3.1-6.3">ACK (no data) (A-&gt;B): A's key followed by
+---------------------------------------------------------------+ B's key (Length = 20).</li>
| Option Receiver's Key (64 bits) | <li pn="section-3.1-6.4">ACK (with first data) (A-&gt;B): A's key foll
| (if option Length > 12) | owed by B's key followed by Data-Level Length, and optional Checksum (Length = 2
| | 2 or 24).</li>
+-------------------------------+-------------------------------+ </ul>
| Data-Level Length (16 bits) | Checksum (16 bits, optional) | <t pn="section-3.1-7">
+-------------------------------+-------------------------------+ The contents of the option are determined by the SYN and ACK flags of th
]]></artwork> e packet, along with the option's Length field. In <xref target="tcpm_capable" f
</figure> ormat="default" sectionFormat="of" derivedContent="Figure 4"/>, "Sender" and "Re
ceiver" refer to the sender or receiver of the TCP packet (which can be either h
<t>The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK packets ost).</t>
that start the first subflow of an MPTCP connection, as well as the first packe <t pn="section-3.1-8">The initial SYN, containing just the MP_CAPABLE he
t that carries data, if the initiator wishes to send first. The data carried by ader, is used
each option is as follows, where A = initiator and B = listener. to define the version of MPTCP being requested and also to exchange
<list style="symbols"> flags to negotiate connection features, as described later.</t>
<t>SYN (A-&gt;B): only the first four octets (Length = 4).</t> <t pn="section-3.1-9">This option is used to declare the 64-bit keys tha
<t>SYN/ACK (B-&gt;A): B's Key for this connection (Length = 12).</t> t the end hosts
<t>ACK (no data) (A-&gt;B): A's Key followed by B's Key (Length = 20 have generated for this MPTCP connection. These keys are used to
).</t> authenticate the addition of future subflows to this connection. This
<t>ACK (with first data) (A-&gt;B): A's Key followed by B's Key foll is the only time the key will be sent in the clear on the wire (unless "
owed by Data-Level Length, and optional Checksum (Length = 22 or 24).</t> Fast Close" (<xref target="sec_fastclose" format="default" sectionFormat="of" de
</list> rivedContent="Section 3.5"/>) is used); all future subflows will identify the co
The contents of the option is determined by the SYN and ACK flags of the nnection using a 32-bit "token". This token is a cryptographic hash of this key.
packet, along with the option's length field. For the diagram shown in <xref ta The algorithm for this process is dependent on the authentication algorithm sel
rget="tcpm_capable"/>, "sender" and "receiver" refer to the sender or receiver o ected; the method of selection is defined later in this section.</t>
f the TCP packet (which can be either host).</t> <t pn="section-3.1-10">Upon reception of the initial SYN segment, a stat
eful server generates a random key and replies with a SYN/ACK. The key's method
<t>The initial SYN, containing just the MP_CAPABLE header, is used of generation is implementation specific. The key <bcp14>MUST</bcp14> be hard to
to define the version of MPTCP being requested, as well as exchanging guess, and it <bcp14>MUST</bcp14> be unique for the sending host across all its
flags to negotiate connection features, described later.</t> current MPTCP connections. Recommendations for generating random numbers for us
e in keys are given in <xref target="RFC4086" format="default" sectionFormat="of
<t>This option is used to declare the 64-bit keys that the end hosts hav " derivedContent="RFC4086"/>. Connections will be indexed at each host by the to
e generated for this MPTCP connection. These keys are used to authenticate the a ken (a one-way hash of the key). Therefore, an implementation will require a map
ddition of future subflows to this connection. This is the only time the key wil ping from each token to the corresponding connection, and in turn to the keys fo
l be sent in clear on the wire (unless "fast close", <xref target="sec_fastclose r the connection.</t>
"/>, is used); all future subflows will identify the connection using a 32-bit " <t pn="section-3.1-11">There is a risk that two different keys will hash
token". This token is a cryptographic hash of this key. The algorithm for this p to the same
rocess is dependent on the authentication algorithm selected; the method of sele token. The risk of hash collisions is usually small, unless the host
ction is defined later in this section.</t> is handling many tens of thousands of connections. Therefore, an
implementation <bcp14>SHOULD</bcp14> check its list of connection
<t>Upon reception of the initial SYN-segment, a stateful server generate tokens to ensure that there is no collision before sending its key,
s a random key and replies with a SYN/ACK. The key's method of generation is imp and if there is, then it should generate a new key. This would,
lementation specific. The key MUST be hard to guess, and it MUST be unique for t however, be costly for a server with thousands of connections. The
he sending host across all its current MPTCP connections. Recommendations for ge subflow handshake mechanism (<xref target="sec_join" format="default" se
nerating random numbers for use in keys are given in <xref target="RFC4086"/>. C ctionFormat="of" derivedContent="Section 3.2"/>) will ensure that new subflows o
onnections will be indexed at each host by the token (a one-way hash of the key) nly join the
. Therefore, an implementation will require a mapping from each token to the cor correct connection, however, through the cryptographic handshake, as
responding connection, and in turn to the keys for the connection.</t> well as checking the connection tokens in both directions, and
ensuring that sequence numbers are in-window. So, in the worst case, if
<t>There is a risk that two different keys will hash to the same token. there was a token collision, the new subflow would not succeed, but the MPTCP co
The risk of hash collisions is usually small, unless the host is handling many t nnection would continue to provide a regular TCP service.</t>
ens of thousands of connections. Therefore, an implementation SHOULD check its l <t pn="section-3.1-12">Since key generation is implementation specific,
ist of connection tokens to ensure there is no collision before sending its key, there is no
and if there is, then it should generate a new key. This would, however, be cos requirement that they simply be random numbers. An implementation is
tly for a server with thousands of connections. The subflow handshake mechanism free to exchange cryptographic material out of band and generate these
(<xref target="sec_join"/>) will ensure that new subflows only join the correct keys from this material, in order to provide additional mechanisms by wh
connection, however, through the cryptographic handshake, as well as checking th ich to verify the identity of the communicating entities. For example, an implem
e connection tokens in both directions, and ensuring sequence numbers are in-win entation could choose to link its MPTCP keys to those used in higher-layer TLS o
dow. So in the worst case if there was a token collision, the new subflow would r SSH connections.</t>
not succeed, but the MPTCP connection would continue to provide a regular TCP se <t pn="section-3.1-13">If the server behaves in a
rvice.</t>
<t>Since key generation is implementation-specific, there is no r
equirement that they be simply random numbers. An implementation is free to exch
ange cryptographic material out-of-band and generate these keys from this, in or
der to provide additional mechanisms by which to verify the identity of the comm
unicating entities. For example, an implementation could choose to link its MPTC
P keys to those used in higher-layer TLS or SSH connections.</t>
<t>If the server behaves in a
stateless manner, it has to generate its own key in a verifiable stateless manner, it has to generate its own key in a verifiable
fashion. This verifiable way of generating the key can be done by fashion. This verifiable way of generating the key can be done by
using a hash of the 4-tuple, sequence number and a local secret using a hash of the 4-tuple, sequence number, and a local secret
(similar to what is done for the TCP-sequence number <xref target="RFC49 (similar to what is done for the TCP sequence number <xref target="RFC49
87"/>). 87" format="default" sectionFormat="of" derivedContent="RFC4987"/>).
It will thus be able to verify whether it is indeed the originator of It will thus be able to verify whether it is indeed the originator of
the key echoed back in the later MP_CAPABLE option. the key echoed back in the subsequent MP_CAPABLE option.
As for a stateful server, the tokens SHOULD be checked for uniqueness, h As for a stateful server, the tokens <bcp14>SHOULD</bcp14> be checked fo
owever r uniqueness; however,
if uniqueness is not met, and there is no way to generate an alternative if uniqueness is not met and there is no way to generate an alternative
verifiable verifiable
key, then the connection MUST fall back to using regular TCP by not send key, then the connection <bcp14>MUST</bcp14> fall back to using regular
ing a TCP by not sending an
MP_CAPABLE in the SYN/ACK.</t> MP_CAPABLE in the SYN⁠/ACK.</t>
<t pn="section-3.1-14">The ACK carries both A's key and B's key. This is
<t>The ACK carries both A's key and B's key. This is the first time that the first time that A's key is seen on the wire, although it is expected that A
A's key is seen on the wire, although it is expected that A will have generated will have generated a key locally before the initial SYN. The echoing of B's ke
a key locally before the initial SYN. The echoing of B's key allows B to operat y allows B to operate statelessly, as described above. Therefore, A's key must b
e statelessly, as described above. Therefore, A's key must be delivered reliably e delivered reliably to B, and in order to do this, the transmission of this pac
to B, and in order to do this, the transmission of this packet must be made rel ket must be made reliable.</t>
iable.</t> <t pn="section-3.1-15">If B has data to send first, then the reliable de
livery of the
<t>If B has data to send first, then the reliable delivery of the ACK+MP ACK + MP_CAPABLE is ensured by the receipt of this data with an
_CAPABLE can be inferred by the receipt of this data with a MPTCP Data Sequence MPTCP Data Sequence Signal (DSS) option (<xref target="sec_generalop" fo
Signal (DSS) option (<xref target="sec_generalop"/>). If, however, A wishes to s rmat="default" sectionFormat="of" derivedContent="Section 3.3"/>) containing a D
end data first, it has two options to ensure the reliable delivery of the ACK+MP ATA_ACK for the MP_CAPABLE (which is
_CAPABLE. If it immediately has data to send, then the third ACK (with data) wou the first octet of the data sequence space). If, however, A wishes to sen
ld also contain an MP_CAPABLE option with additional data parameters (the Data-L d data first, it has
evel Length and optional Checksum as shown in <xref target="tcpm_capable"/>). If two options to ensure the reliable delivery of the ACK + MP_CAPABLE. If
A does not immediately have data to send, it MUST include the MP_CAPABLE on the it immediately has data to send, then the first ACK (with data) would
third ACK, but without the additional data parameters. When A does have data to also contain an MP_CAPABLE option with additional data parameters (the
send, it must repeat the sending of the MP_CAPABLE option from the third ACK, w Data-Level Length and optional Checksum as shown in <xref target="tcpm_c
ith additional data parameters. This MP_CAPABLE option is in place of the DSS, a apable" format="default" sectionFormat="of" derivedContent="Figure 4"/>). If A d
nd simply specifies the data-level length of the payload, and the checksum (if t oes not immediately
he use of checksums is negotiated). This is the minimal data required to establi have data to send, it <bcp14>MUST</bcp14> include the MP_CAPABLE on
sh a MPTCP connection - it allows validation of the payload, and given it is the the first ACK, but without the additional data parameters. When A does
first data, the Initial Data Sequence Number (IDSN) is also known (as it is gen have data to send, it must repeat the sending of the MP_CAPABLE option
erated from the key, as described below). Conveying the keys on the first data p from the first ACK, with additional data parameters. This MP_CAPABLE
acket allows the TCP reliability mechanisms to ensure the packet is successfully option is used in place of the DSS and simply specifies (1) the Data-Lev
delivered. The receiver will acknowledge this data at the connection level with el
a Data ACK, as if a DSS option has been received.</t> Length of the payload and (2) the checksum (if the use of checksums is
negotiated). This is the minimal data required to establish an MPTCP
<t>There could be situations where both A and B attempt to transmit init connection -- it allows validation of the payload, and given that it is
ial data at the same time. For example, if A did not initially have data to send the
, but then needed to transmit data before it had received anything from B, it wo first data, the Initial Data Sequence Number (IDSN) is also known (as
uld use a MP_CAPABLE option with data parameters (since it would not know if the it is generated from the key, as described below). Conveying the keys
MP_CAPABLE on the ACK was received). In such a situation, B may also have trans on the first data packet allows the TCP reliability mechanisms to
mitted data with a DSS option, but it had not yet been received at A. Therefore, ensure that the packet is successfully delivered. The receiver will ackn
B has received data with a MP_CAPABLE mapping after it has sent data with a DSS owledge this data at the connection level with a Data ACK, as if a DSS option ha
option. To ensure these situations can be handled, it follows that the data par s been received.</t>
ameters in a MP_CAPABLE are semantically equivalent to those in a DSS option and <t pn="section-3.1-16">There could be situations where both A and B atte
can be used interchangeably. Similar situations could occur when the MP_CAPABLE mpt to transmit
with data is lost and retransmitted. Furthermore, in the case of TCP Segmentati initial data at the same time. For example, if A did not initially
on Offloading, the MP_CAPABLE with data parameters may be duplicated across mult have data to send but then needed to transmit data before it had
iple packets, and implementations must also be able to cope with duplicate MP_CA received anything from B, it would use an MP_CAPABLE option with data
PABLE mappings as well as duplicate DSS mappings.</t> parameters (since it would not know if the MP_CAPABLE on the ACK was
received). In such a situation, B may also have transmitted data with
<t>Additionally, the MP_CAPABLE exchange allows the safe passage of MPTC a DSS option, but it had not yet been received at A. Therefore, B has
P options on SYN packets to be determined. If any of these options are dropped, received data with an MP_CAPABLE mapping after it has sent data with a
MPTCP will gracefully fall back to regular single-path TCP, as documented in <xr DSS option. To ensure that these situations can be handled, it follows t
ef target="sec_fallback"/>. If at any point in the handshake either party think hat the data parameters in an MP_CAPABLE are semantically equivalent to those in
s the MPTCP negotiation is compromised, for example by a middlebox corrupting th a DSS option and can be used interchangeably. Similar situations could occur wh
e TCP options, or unexpected ACK numbers being present, the host MUST stop using en the MP_CAPABLE with data is lost and retransmitted. Furthermore, in the case
MPTCP and no longer include MPTCP options in future TCP packets. The other host of TCP segmentation offloading, the MP_CAPABLE with data parameters may be dupli
will then also fall back to regular TCP using the fall back mechanism. Note th cated across multiple packets, and implementations must also be able to cope wit
at new subflows MUST NOT be established (using the process documented in <xref t h duplicate MP_CAPABLE mappings as well as duplicate DSS mappings.</t>
arget="sec_join"/>) until a Data Sequence Signal (DSS) option has been successfu <t pn="section-3.1-17">Additionally, the MP_CAPABLE exchange allows the
lly received across the path (as documented in <xref target="sec_generalop"/>).< safe passage of
/t> MPTCP options on SYN packets to be determined. If any of these options
are dropped, MPTCP will gracefully fall back to regular single-path
<t>Like all MPTCP options, the MP_CAPABLE option starts with the Kind an TCP, as documented in <xref target="sec_fallback" format="default" secti
d Length to specify the TCP-option kind and its length. Followed by that is the onFormat="of" derivedContent="Section 3.7"/>.
MP_CAPABLE option. The first 4 bits of the first octet in the MP_CAPABLE option If at any point in the handshake either party thinks the MPTCP
(<xref target="tcpm_capable"/>) define the MPTCP option subtype (see <xref targe negotiation is compromised -- for example, by a middlebox corrupting
t="IANA"/>; for MP_CAPABLE, this is 0x0), and the remaining 4 bits of this octet the TCP options or by unexpected ACK numbers being present -- the host <
specify the MPTCP version in use (for this specification, this is 1).</t> bcp14>MUST</bcp14> stop using MPTCP and no longer include MPTCP options in futur
e TCP packets. The other host will then also fall back to regular TCP using the
<t>The second octet is reserved for flags, allocated as follows: fallback mechanism. Note that new subflows <bcp14>MUST NOT</bcp14> be establish
ed (using the process documented in <xref target="sec_join" format="default" sec
<list style="hanging"> tionFormat="of" derivedContent="Section 3.2"/>) until a DSS option has been succ
<t hangText="A:"> The leftmost bit, labeled "A", SHOULD be set to 1 to essfully received across the path (as documented in <xref target="sec_generalop"
indicate "Checksum Required", unless the system administrator has decided that format="default" sectionFormat="of" derivedContent="Section 3.3"/>).</t>
checksums are not required (for example, if the environment is controlled and no <t pn="section-3.1-18">Like all MPTCP options, the MP_CAPABLE option sta
middleboxes exist that might adjust the payload).</t> rts with the Kind
<t hangText="B:"> The second bit, labeled "B", is an extensibility fla and Length to specify the TCP option's kind and length. This
g, and MUST be set to 0 for current implementations. This will be used for an ex information is followed by the MP_CAPABLE option. The first 4 bits of
tensibility mechanism in a future specification, and the impact of this flag wil the first octet in the MP_CAPABLE option (<xref target="tcpm_capable" fo
l be defined at a later date. It is expected, but not mandated, that this flag w rmat="default" sectionFormat="of" derivedContent="Figure 4"/>) define the MPTCP
ould be used as part of an alternative security mechanism that does not require Option Subtype (see <xref target="IANA" format="default" sectionFormat="of" deri
a full version upgrade of the protocol, but does require redefining some element vedContent="Section 7"/>; for MP_CAPABLE, this value is
s of the handshake. If receiving a message with the 'B' flag set to 1, and this 0x0), and the remaining 4 bits of this octet specify the MPTCP
is not understood, then the MP_CAPABLE in this SYN MUST be silently ignored, whi version in use (for this specification, this value is 1).</t>
ch triggers a fallback to regular TCP; the sender is expected to retry with a fo <t pn="section-3.1-19">The second octet is reserved for flags, allocated
rmat compatible with this legacy specification. Note that the length of the MP_C as follows:
APABLE option, and the meanings of bits "D" through "H", may be altered by setti
ng B=1.</t>
<t hangText="C:"> The third bit, labeled "C", is set to "1" to indicat
e that the sender of this option will not accept additional MPTCP subflows to th
e source address and port, and therefore the receiver MUST NOT try to open any a
dditional subflows towards this address and port. This is an efficiency improvem
ent for situations where the sender knows a restriction is in place, for example
if the sender is behind a strict NAT, or operating behind a legacy Layer 4 load
balancer.</t>
<t hangText="D through H:"> The remaining bits, labeled "D" through "H
", are used for crypto algorithm negotiation. In this specification only the ri
ghtmost bit, labeled "H", is assigned. Bit "H" indicates the use of HMAC-SHA256
(as defined in <xref target="sec_join"/>). An implementation that only support
s this method MUST set bit "H" to 1, and bits "D" through "G" to 0.</t>
</list>
A crypto algorithm MUST be specified. If flag bits D through H are all
0, the MP_CAPABLE option MUST be treated as invalid and ignored (that is, it mus
t be treated as a regular TCP handshake).</t>
<t>The selection of the authentication algorithm also impacts the algori
thm used to generate the token and the Initial Data Sequence Number (IDSN). In t
his specification, with only the SHA-256 algorithm (bit "H") specified and selec
ted, the token MUST be a truncated (most significant 32 bits) SHA-256 hash (<xre
f target="RFC6234"/>) of the key. A different, 64-bit truncation (the least sign
ificant 64 bits) of the SHA-256 hash of the key MUST be used as the IDSN. Note t
hat the key MUST be hashed in network byte order. Also note that the "least sign
ificant" bits MUST be the rightmost bits of the SHA-256 digest, as per <xref tar
get="RFC6234"/>. Future specifications of the use of the crypto bits may choose
to specify different algorithms for token and IDSN generation.</t>
<t>Both the crypto and checksum bits negotiate capabilities in similar w
ays. For the Checksum Required bit (labeled "A"), if either host requires the us
e of checksums, checksums MUST be used. In other words, the only way for checksu
ms not to be used is if both hosts in their SYNs set A=0. This decision is confi
rmed by the setting of the "A" bit in the third packet (the ACK) of the handshak
e. For example, if the initiator sets A=0 in the SYN, but the responder sets A=1
in the SYN/ACK, checksums MUST be used in both directions, and the initiator wi
ll set A=1 in the ACK. The decision whether to use checksums will be stored by a
n implementation in a per-connection binary state variable. If A=1 is received b
y a host that does not want to use checksums, it MUST fall back to regular TCP b
y ignoring the MP_CAPABLE option as if it was invalid.</t>
<t>For crypto negotiation, the responder has the choice. The initiator c
reates a proposal setting a bit for each algorithm it supports to 1 (in this ver
sion of the specification, there is only one proposal, so bit "H" will be always
set to 1). The responder responds with only 1 bit set -- this is the chosen alg
orithm. The rationale for this behavior is that the responder will typically be
a server with potentially many thousands of connections, so it may wish to choos
e an algorithm with minimal computational complexity, depending on the load. If
a responder does not support (or does not want to support) any of the initiator'
s proposals, it MUST respond without an MP_CAPABLE option, thus forcing a fallba
ck to regular TCP.</t>
<t>The MP_CAPABLE option is only used in the first subflow of a connecti </t>
on, in order to identify the connection; all following subflows will use the "Jo <dl newline="false" spacing="normal" indent="14" pn="section-3.1-20">
in" option (see <xref target="sec_join"/>) to join the existing connection.</t> <dt pn="section-3.1-20.1">A:</dt>
<t>If a SYN contains an MP_CAPABLE option but the <dd pn="section-3.1-20.2"> The leftmost bit, labeled "A", <bcp14>SHOUL
SYN/ACK does not, it is assumed that sender of the SYN/ACK is not D</bcp14> be set to 1 to indicate "Checksum required", unless the system adminis
multipath capable; thus, the MPTCP session MUST operate as trator has decided that checksums are not required (for example, if the environm
a regular, single-path TCP. If a SYN does not contain a ent is controlled and no middleboxes exist that might adjust the payload).</dd>
MP_CAPABLE option, the SYN/ACK MUST NOT contain one <dt pn="section-3.1-20.3">B:</dt>
<dd pn="section-3.1-20.4"> The second bit, labeled "B", is an extensib
ility flag. It
<bcp14>MUST</bcp14> be set to 0 for current implementations. This
flag will be used for an extensibility mechanism in a future specifica
tion, and the impact of this flag will be defined at a later date. It is expecte
d, but not mandated, that this flag would be used as part of an alternative secu
rity mechanism that does not require a full version upgrade of the protocol but
does require redefining some elements of the handshake. If receiving a message w
ith the "B" flag set to 1 and this is not understood, then the MP_CAPABLE in thi
s SYN <bcp14>MUST</bcp14> be silently ignored, which triggers a fallback to regu
lar TCP; the sender is expected to retry with a format compatible with this lega
cy specification. Note that the length of the MP_CAPABLE option, and the meaning
s of bits "D" through "H", may be altered by setting B=1.</dd>
<dt pn="section-3.1-20.5">C:</dt>
<dd pn="section-3.1-20.6"> The third bit, labeled "C", is set to 1 to
indicate that the
sender of this option will not accept additional MPTCP subflows to
the source address and port, and therefore the receiver <bcp14>MUST NO
T</bcp14> try to open any additional subflows toward this address
and port. This improves efficiency in situations where the
sender knows a restriction is in place -- for example, if the sender i
s behind a strict NAT or operating behind a legacy Layer 4 load balancer.</dd>
<dt pn="section-3.1-20.7">D through H:</dt>
<dd pn="section-3.1-20.8"> The remaining bits, labeled "D" through "H"
, are used for
crypto algorithm negotiation. In this specification, only the
rightmost bit, labeled "H", is assigned. Bit "H" indicates the use
of HMAC-SHA256 (as defined in <xref target="sec_join" format="default"
sectionFormat="of" derivedContent="Section 3.2"/>). An implementation that onl
y supports this
method <bcp14>MUST</bcp14> set bit "H" to 1 and bits "D"
through "G" to 0.</dd>
</dl>
<t pn="section-3.1-21">A crypto algorithm <bcp14>MUST</bcp14> be specifi
ed. If flag bits "D" through "H" are all 0, the MP_CAPABLE option <bcp14>MUST</
bcp14> be treated as invalid and ignored (that is, it must be treated as a regul
ar TCP handshake).</t>
<t pn="section-3.1-22">The selection of the authentication algorithm als
o impacts the algorithm used to generate the token and the IDSN. In this specifi
cation, with only the SHA-256 algorithm (bit "H") specified and selected, the to
ken <bcp14>MUST</bcp14> be a truncated (most significant 32 bits) SHA-256 hash <
xref target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC623
4"/> of the key. A different, 64-bit truncation (the least significant 64 bits)
of the SHA-256 hash of the key <bcp14>MUST</bcp14> be used as the IDSN. Note tha
t the key <bcp14>MUST</bcp14> be hashed in network byte order. Also note that th
e "least significant" bits <bcp14>MUST</bcp14> be the rightmost bits of the SHA-
256 digest, as per <xref target="RFC6234" format="default" sectionFormat="of" de
rivedContent="RFC6234"/>. Future specifications of the use of the crypto bits ma
y choose to specify different algorithms for token and IDSN generation.</t>
<t pn="section-3.1-23">Both the crypto and checksum bits negotiate capab
ilities in similar
ways. For the "Checksum required" bit (labeled "A"), if either host
requires the use of checksums, checksums <bcp14>MUST</bcp14> be
used. In other words, the only way for checksums not to be used is if
both hosts in their SYNs set A=0. This decision is confirmed by the
setting of the "A" bit in the third packet (the ACK) of the
handshake. For example, if the initiator sets A=0 in the SYN but the
responder sets A=1 in the SYN/ACK, checksums <bcp14>MUST</bcp14> be
used in both directions, and the initiator will set A=1 in the
ACK. The decision regarding whether to use checksums will be stored by a
n implementation in a per-connection binary state variable. If A=1 is received b
y a host that does not want to use checksums, it <bcp14>MUST</bcp14> fall back t
o regular TCP by ignoring the MP_CAPABLE option as if it was invalid.</t>
<t pn="section-3.1-24">For crypto negotiation, the responder has the cho
ice. The initiator
creates a proposal setting a bit for each algorithm it supports to 1
(in this version of the specification, there is only one proposal, so
bit "H" will always be set to 1). The responder responds with only 1 bit
set -- this is the chosen algorithm. The rationale for this behavior is that th
e responder will typically be a server with potentially many thousands of connec
tions, so it may wish to choose an algorithm with minimal computational complexi
ty, depending on the load. If a responder does not support (or does not want to
support) any of the initiator's proposals, it <bcp14>MUST</bcp14> respond withou
t an MP_CAPABLE option, thus forcing a fallback to regular TCP.</t>
<t pn="section-3.1-25">The MP_CAPABLE option is only used in the first s
ubflow of a
connection, in order to identify the connection; all subsequent
subflows will use the MP_JOIN option (see <xref target="sec_join" format
="default" sectionFormat="of" derivedContent="Section 3.2"/>) to join the existi
ng connection.</t>
<t pn="section-3.1-26">If a SYN contains an MP_CAPABLE option but the
SYN/ACK does not, it is assumed that the sender of the SYN/ACK is not
multipath capable; thus, the MPTCP session <bcp14>MUST</bcp14> operate a
s
a regular, single-path TCP session. If a SYN does not contain an
MP_CAPABLE option, the SYN/ACK <bcp14>MUST NOT</bcp14> contain one
in response. If the third packet (the ACK) does not contain in response. If the third packet (the ACK) does not contain
the MP_CAPABLE option, then the session MUST fall back to the MP_CAPABLE option, then the session <bcp14>MUST</bcp14> fall back to
operating as a regular, single-path TCP. This is to maintain operating as a regular, single-path TCP session. This is done to maintai
n
compatibility with middleboxes on the path that drop some compatibility with middleboxes on the path that drop some
or all TCP options. Note that an implementation MAY choose or all TCP options. Note that an implementation <bcp14>MAY</bcp14> choos e
to attempt sending MPTCP options more than one time before to attempt sending MPTCP options more than one time before
making this decision to operate as regular TCP (see making this decision to operate as regular TCP (see
<xref target="heuristics"/>).</t> <xref target="heuristics" format="default" sectionFormat="of" derivedCon
tent="Section 3.9"/>).</t>
<t>If the SYN packets are unacknowledged, it is up to local <t pn="section-3.1-27">If the SYN packets are unacknowledged, it is up t
o local
policy to decide how to respond. It is expected that a sender policy to decide how to respond. It is expected that a sender
will eventually fall back to single-path TCP (i.e., without the will eventually fall back to single-path TCP (i.e., without the
MP_CAPABLE option) in order to work around middleboxes that MP_CAPABLE option) in order to work around middleboxes that
may drop packets with unknown options; however, the number of may drop packets with unknown options; however, the number of
multipath-capable attempts that are made first will be up to multipath-capable attempts that are made first will be up to
local policy. local policy.
It is possible that MPTCP and non-MPTCP SYNs could get reordered It is possible that MPTCP and non-MPTCP SYNs could get reordered
in the network. Therefore, the final state is inferred from the in the network. Therefore, the final state is inferred from the
presence or absence of the MP_CAPABLE option in the third packet presence or absence of the MP_CAPABLE option in the third packet
of the TCP handshake. If this option is not present, the of the TCP handshake. If this option is not present, the
connection SHOULD fall back to regular TCP, as documented in connection <bcp14>SHOULD</bcp14> fall back to regular TCP, as documented
<xref target="sec_fallback"/>.</t> in
<xref target="sec_fallback" format="default" sectionFormat="of" derivedC
<t>The initial data sequence number on an MPTCP connection ontent="Section 3.7"/>.</t>
<t pn="section-3.1-28">The IDSN on an MPTCP connection
is generated from the key. The algorithm for IDSN generation is is generated from the key. The algorithm for IDSN generation is
also determined from the negotiated authentication algorithm. also determined from the negotiated authentication algorithm.
In this specification, with only the SHA-256 algorithm specified and In this specification, with only the SHA-256 algorithm specified and
selected, the IDSN of a host MUST be the least significant 64 bits of th e selected, the IDSN of a host <bcp14>MUST</bcp14> be the least significan t 64 bits of the
SHA-256 hash of its key, i.e., IDSN-A = Hash(Key-A) and IDSN-B = Hash(Ke y-B). SHA-256 hash of its key, i.e., IDSN-A = Hash(Key-A) and IDSN-B = Hash(Ke y-B).
This deterministic generation of the IDSN allows a receiver to ensure This deterministic generation of the IDSN allows a receiver to ensure
that there are no gaps in sequence space at the start of the connection. that there are no gaps in sequence space at the start of the connection.
The SYN with MP_CAPABLE occupies the first octet of data sequence space, The SYN with MP_CAPABLE occupies the first octet of data sequence space,
although this does not need to be acknowledged at the connection level although this does not need to be acknowledged at the connection level
until the first data is sent (see <xref target="sec_generalop"/>).</t> until the first data is sent (see <xref target="sec_generalop" format="d efault" sectionFormat="of" derivedContent="Section 3.3"/>).</t>
</section> </section>
<section anchor="sec_join" numbered="true" toc="include" removeInRFC="fals
<section title="Starting a New Subflow" anchor="sec_join"> e" pn="section-3.2">
<t>Once an MPTCP connection has begun with the MP_CAPABLE <name slugifiedName="name-starting-a-new-subflow">Starting a New Subflow
</name>
<t pn="section-3.2-1">Once an MPTCP connection has begun with the MP_CAP
ABLE
exchange, further subflows can be added to the connection. exchange, further subflows can be added to the connection.
Hosts have knowledge of their own address(es), and can Hosts have knowledge of their own address(es) and can
become aware of the other host's addresses through become aware of the other host's addresses through
signaling exchanges as described in signaling exchanges as described in
<xref target="sec_pm"/>. Using this knowledge, a host <xref target="sec_pm" format="default" sectionFormat="of" derivedContent ="Section 3.4"/>. Using this knowledge, a host
can initiate a new subflow over a currently unused pair of can initiate a new subflow over a currently unused pair of
addresses. It is permitted for either host in a connection addresses. It is permissible for either host in a connection
to initiate the creation of a new subflow, but it is expected to initiate the creation of a new subflow, but it is expected
that this will normally be the original connection initiator that this will normally be the original connection initiator
(see <xref target="heuristics"/> for heuristics).</t> (see <xref target="heuristics" format="default" sectionFormat="of" deriv
edContent="Section 3.9"/> for heuristics).</t>
<t>A new subflow is started as a normal TCP SYN/ACK <t pn="section-3.2-2">A new subflow is started as a normal TCP SYN/ACK
exchange. The Join Connection (MP_JOIN) MPTCP option exchange. The Join Connection (MP_JOIN) MPTCP option
is used to identify the connection to be joined by the new subflow. is used to identify the connection to be joined by the new subflow.
It uses keying material that was exchanged in the initial MP_CAPABLE It uses keying material that was exchanged in the initial MP_CAPABLE
handshake (<xref target="sec_init"/>), and that handshake also handshake (<xref target="sec_init" format="default" sectionFormat="of" d erivedContent="Section 3.1"/>), and that handshake also
negotiates the crypto algorithm in use for the MP_JOIN handshake.</t> negotiates the crypto algorithm in use for the MP_JOIN handshake.</t>
<t pn="section-3.2-3">This section specifies the behavior of MP_JOIN usi
<t>This section specifies the behavior of MP_JOIN using the HMAC-SHA256 ng the HMAC-SHA256
algorithm. An MP_JOIN option is present in the SYN, SYN/ACK, algorithm. An MP_JOIN option is present in the SYN, SYN/ACK,
and ACK of the three-way handshake, although in each case with a and ACK of the three-way handshake, although in each case with a
different format.</t> different format.</t>
<t pn="section-3.2-4">In the first MP_JOIN on the SYN packet, illustrate
<t>In the first MP_JOIN on the SYN packet, illustrated in d in
<xref target="tcpm_join"/>, the initiator sends a token, random <xref target="tcpm_join" format="default" sectionFormat="of" derivedCont
number, and address ID.</t> ent="Figure 5"/>, the initiator sends a token, random
number, and Address ID.</t>
<t>The token is used to identify the MPTCP connection and is a <figure anchor="tcpm_join" align="left" suppress-title="false" pn="figur
e-5">
<name slugifiedName="name-join-connection-mp_join-opt">Join Connection
(MP_JOIN) Option (for Initial SYN)</name>
<artwork align="left" name="" type="" alt="" pn="section-3.2-5.1">
1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----+-+---------------+
| Kind | Length = 12 |Subtype|(rsv)|B| Address ID |
+---------------+---------------+-------+-----+-+---------------+
| Receiver's Token (32 bits) |
+---------------------------------------------------------------+
| Sender's Random Number (32 bits) |
+---------------------------------------------------------------+ </artwork>
</figure>
<t pn="section-3.2-6">The token is used to identify the MPTCP connection
and is a
cryptographic hash of the receiver's key, as exchanged cryptographic hash of the receiver's key, as exchanged
in the initial MP_CAPABLE handshake (<xref target="sec_init"/>). in the initial MP_CAPABLE handshake (<xref target="sec_init" format="def ault" sectionFormat="of" derivedContent="Section 3.1"/>).
In this specification, the tokens presented in this In this specification, the tokens presented in this
option are generated by the SHA-256 <xref target="RFC6234"/> option are generated by the SHA-256 algorithm <xref target="RFC6234" for
algorithm, truncated to the most significant 32 bits. The token mat="default" sectionFormat="of" derivedContent="RFC6234"/>, truncated to the mo
st significant 32 bits. The token
included in the MP_JOIN option is the token that the receiver included in the MP_JOIN option is the token that the receiver
of the packet uses to identify this connection; i.e., Host A of the packet uses to identify this connection; i.e., Host A
will send Token-B (which is generated from Key-B). Note that the will send Token-B (which is generated from Key-B). Note that the
hash generation algorithm can be overridden by the choice of hash generation algorithm can be overridden by the choice of
cryptographic handshake algorithm, as defined in <xref target="sec_init" cryptographic handshake algorithm, as defined in <xref target="sec_init"
/>.</t> format="default" sectionFormat="of" derivedContent="Section 3.1"/>.</t>
<t pn="section-3.2-7">The MP_JOIN SYN sends not only the token (which is
<t>The MP_JOIN SYN sends not only the token (which is static for a static for a
connection) but also random numbers (nonces) that are used to prevent connection) but also random numbers (nonces) that are used to prevent
replay attacks on the authentication method. Recommendations for the replay attacks on the authentication method. Recommendations for the
generation of random numbers for this purpose are given in <xref target= generation of random numbers for this purpose are given in <xref target=
"RFC4086"/>.</t> "RFC4086" format="default" sectionFormat="of" derivedContent="RFC4086"/>.</t>
<t pn="section-3.2-8">The MP_JOIN option includes an "Address ID". This
<t>The MP_JOIN option includes an "Address ID". This is an identifier is an identifier
generated by the sender of the option, used to identify the source addre ss generated by the sender of the option, used to identify the source addre ss
of this packet, even if the IP header has been changed in transit by a m iddlebox. of this packet, even if the IP header has been changed in transit by a m iddlebox.
The numeric value of this field is generated by the sender and must map uniquely The numeric value of this field is generated by the sender and must map uniquely
to a source IP address for the sending host. to a source IP address for the sending host.
The Address ID allows address removal (<xref target="sec_remove_addr"/>) The Address ID allows address removal (<xref target="sec_remove_addr" fo rmat="default" sectionFormat="of" derivedContent="Section 3.4.2"/>)
without needing to know what the source address at the without needing to know what the source address at the
receiver is, thus allowing address removal through NATs. receiver is, thus allowing address removal through NATs.
The Address ID also allows correlation between new subflow setup attempt s The Address ID also allows correlation between new subflow setup attempt s
and address signaling (<xref target="sec_add_address"/>), and address signaling (<xref target="sec_add_address" format="default" s ectionFormat="of" derivedContent="Section 3.4.1"/>),
to prevent setting up duplicate subflows on the same path, if an MP_JOIN to prevent setting up duplicate subflows on the same path, if an MP_JOIN
and ADD_ADDR are sent at the same time.</t> and ADD_ADDR are sent at the same time.</t>
<t pn="section-3.2-9">The Address IDs of the subflow used in the initial
<t>The Address IDs of the subflow used in the initial SYN SYN
exchange of the first subflow in the connection are implicit, exchange of the first subflow in the connection are implicit
and have the value zero. A host MUST store the mappings between and have the value zero. A host <bcp14>MUST</bcp14> store the mappings b
etween
Address IDs and addresses both for itself and the remote host. Address IDs and addresses both for itself and the remote host.
An implementation will also need to know which local and remote An implementation will also need to know which local and remote
Address IDs are associated with which established subflows, for Address IDs are associated with which established subflows, for
when addresses are removed from a local or remote host.</t> when addresses are removed from a local or remote host.</t>
<t pn="section-3.2-10">The MP_JOIN option on packets with the SYN flag s
<t>The MP_JOIN option on packets with the SYN flag set also includes 4 b et also includes
its of flags, 3 of which are currently reserved and MUST be set to zero by the s 4 bits of flags, 3 of which are currently reserved and
ender. The final bit, labeled "B", indicates whether the sender of this option w <bcp14>MUST</bcp14> be set to 0 by the sender. The final bit, labeled
ishes this subflow to be used as a backup path (B=1) in the event of failure of "B", indicates whether the sender of this option (1) wishes this
other paths, or whether it wants it to be used as part of the connection immedia subflow to be used as a backup path (B=1) in the event of failure of
tely. By setting B=1, the sender of the option is requesting the other host to o other paths or (2) wants the subflow to be used as part of the
nly send data on this subflow if there are no available subflows where B=0. Subf connection immediately. By setting B=1, the sender of the option is
low policy is discussed in more detail in <xref target="sec_policy"/>.</t> requesting that the other host only send data on this subflow if there
are no available subflows where B=0. Subflow policy is discussed in more
<?rfc needLines='10'?> detail in <xref target="sec_policy" format="default" sectionFormat="of" derived
<figure align="center" anchor="tcpm_join" title="Join Connection (MP_JOI Content="Section 3.3.8"/>.</t>
N) Option (for Initial SYN)"> <t pn="section-3.2-11">When receiving a SYN with an MP_JOIN option that
<artwork align="left"><![CDATA[ contains
1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----+-+---------------+
| Kind | Length = 12 |Subtype|(rsv)|B| Address ID |
+---------------+---------------+-------+-----+-+---------------+
| Receiver's Token (32 bits) |
+---------------------------------------------------------------+
| Sender's Random Number (32 bits) |
+---------------------------------------------------------------+
]]></artwork>
</figure>
<t>When receiving a SYN with an MP_JOIN option that contains
a valid token for an existing MPTCP connection, the recipient a valid token for an existing MPTCP connection, the recipient
SHOULD respond with a SYN/ACK also containing an MP_JOIN <bcp14>SHOULD</bcp14> respond with a SYN/ACK also containing an MP_JOIN
option containing a random number and a truncated (leftmost 64 option containing a random number and a truncated (leftmost 64 bits) HMA
bits) Hash-based Message Authentication Code (HMAC). This C. This
version of the option is shown in <xref target="tcpm_join2"/>. version of the option is shown in <xref target="tcpm_join2" format="defa
If the token is unknown, or the host wants to refuse subflow ult" sectionFormat="of" derivedContent="Figure 6"/>. If the token is unknown or
the host wants to refuse subflow
establishment (for example, due to a limit on the number of establishment (for example, due to a limit on the number of
subflows it will permit), the receiver will send back a reset subflows it will permit), the receiver will send back a reset
(RST) signal, analogous to an unknown port in TCP, containing a (RST) signal, analogous to an unknown port in TCP, containing an
MP_TCPRST option (<xref target="sec_reset"/>) with a "MPTCP MP_TCPRST option (<xref target="sec_reset" format="default" sectionForma
t="of" derivedContent="Section 3.6"/>) with an "MPTCP
specific error" reason code. Although calculating an HMAC specific error" reason code. Although calculating an HMAC
requires cryptographic operations, it is believed that the requires cryptographic operations, it is believed that the
32-bit token in the MP_JOIN SYN gives sufficient protection against blin d state 32-bit token in the MP_JOIN SYN gives sufficient protection against blin d state
exhaustion attacks; therefore, there is no need to provide exhaustion attacks; therefore, there is no need to provide
mechanisms to allow a responder to operate statelessly at the mechanisms to allow a responder to operate statelessly at the
MP_JOIN stage.</t> MP_JOIN stage.</t>
<figure anchor="tcpm_join2" align="left" suppress-title="false" pn="figu
<t>An HMAC is sent by both hosts -- by the initiator (Host A) re-6">
<name slugifiedName="name-join-connection-mp_join-opti">Join Connectio
n (MP_JOIN) Option (for Responding SYN/ACK)</name>
<artwork align="left" name="" type="" alt="" pn="section-3.2-12.1">
1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----+-+---------------+
| Kind | Length = 16 |Subtype|(rsv)|B| Address ID |
+---------------+---------------+-------+-----+-+---------------+
| |
| Sender's Truncated HMAC (64 bits) |
| |
+---------------------------------------------------------------+
| Sender's Random Number (32 bits) |
+---------------------------------------------------------------+ </artwork>
</figure>
<t pn="section-3.2-13">An HMAC is sent by both hosts -- by the initiator
(Host A)
in the third packet (the ACK) and by the responder (Host B) in in the third packet (the ACK) and by the responder (Host B) in
the second packet (the SYN/ACK). Doing the HMAC exchange at this the second packet (the SYN/ACK). Doing the HMAC exchange at this
stage allows both hosts to have first exchanged random data (in the stage allows both hosts to have first exchanged random data (in the
first two SYN packets) that is used as the "message". This first two SYN packets) that is used as the "message". This
specification defines that HMAC as defined in <xref target="RFC2104"/> specification defines that HMAC as defined in <xref target="RFC2104" for
is used, along with the SHA-256 hash algorithm <xref target="RFC6234"/>, mat="default" sectionFormat="of" derivedContent="RFC2104"/>
is used, along with the SHA-256 hash algorithm <xref target="RFC6234" fo
rmat="default" sectionFormat="of" derivedContent="RFC6234"/>,
and that the output is truncated to the leftmost 160 bits (20 octets). and that the output is truncated to the leftmost 160 bits (20 octets).
Due to option space limitations, the HMAC included in Due to option space limitations, the HMAC included in
the SYN/ACK is truncated to the leftmost 64 bits, but this is the SYN/ACK is truncated to the leftmost 64 bits, but this is
acceptable since random numbers are used; thus, an attacker acceptable, since random numbers are used; thus, an attacker
only has one chance to correctly guess the HMAC that matches the random only has one chance to correctly guess the HMAC that matches the random
number previously sent by the peer (if the HMAC is number previously sent by the peer (if the HMAC is
incorrect, the TCP connection is closed, so a new MP_JOIN negotiation incorrect, the TCP connection is closed, so a new MP_JOIN negotiation
with a new random number is required).</t> with a new random number is required).</t>
<t pn="section-3.2-14">The initiator's authentication information is sen
<t>The initiator's authentication information is sent in its t in its
first ACK (the third packet of the handshake), as shown in first ACK (the third packet of the handshake), as shown in
<xref target="tcpm_join3"/>. This data needs to be sent reliably, <xref target="tcpm_join3" format="default" sectionFormat="of" derivedCon tent="Figure 7"/>. This data needs to be sent reliably,
since it is the only time this HMAC is sent; since it is the only time this HMAC is sent;
therefore, receipt of this packet MUST trigger a regular TCP ACK therefore, receipt of this packet <bcp14>MUST</bcp14> trigger a regular
in response, and the packet MUST be retransmitted if this TCP ACK
in response, and the packet <bcp14>MUST</bcp14> be retransmitted if this
ACK is not received. In other words, sending the ACK/MP_JOIN ACK is not received. In other words, sending the ACK/MP_JOIN
packet places the subflow in the PRE_ESTABLISHED state, and it packet places the subflow in the PRE_ESTABLISHED state, and it
moves to the ESTABLISHED state only on receipt of an ACK from moves to the ESTABLISHED state only on receipt of an ACK from
the receiver. It is not permitted to send data while in the the receiver. It is not permissible to send data while in the
PRE_ESTABLISHED state. The reserved bits in this option MUST be set PRE_ESTABLISHED state. The reserved bits in this option <bcp14>MUST</bcp
to zero by the sender.</t> 14> be set
to 0 by the sender.</t>
<t>The key for the HMAC algorithm, in the case of the message transmitte <figure anchor="tcpm_join3" align="left" suppress-title="false" pn="figu
d by Host A, will be Key-A followed by Key-B, and in the case of Host B, Key-B f re-7">
ollowed by Key-A. These are the keys that were exchanged in the original MP_CAPA <name slugifiedName="name-join-connection-mp_join-optio">Join Connecti
BLE handshake. The "message" for the HMAC algorithm in each case is the concaten on (MP_JOIN) Option (for Initiator's First ACK)</name>
ations of random number for each host (denoted by R): for Host A, R-A followed b <artwork align="left" name="" type="" alt="" pn="section-3.2-15.1">
y R-B; and for Host B, R-B followed by R-A.</t> 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
<?rfc needLines='10'?> +---------------+---------------+-------+-----------------------+
<figure align="center" anchor="tcpm_join2" title="Join Connection (MP_JO | Kind | Length = 24 |Subtype| (reserved) |
IN) Option (for Responding SYN/ACK)"> +---------------+---------------+-------+-----------------------+
<artwork align="left"><![CDATA[ | |
1 2 3 | |
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | Sender's Truncated HMAC (160 bits) |
+---------------+---------------+-------+-----+-+---------------+ | |
| Kind | Length = 16 |Subtype|(rsv)|B| Address ID | | |
+---------------+---------------+-------+-----+-+---------------+ +---------------------------------------------------------------+ </artwork>
| |
| Sender's Truncated HMAC (64 bits) |
| |
+---------------------------------------------------------------+
| Sender's Random Number (32 bits) |
+---------------------------------------------------------------+
]]></artwork>
</figure>
<?rfc needLines='12'?>
<figure align="center" anchor="tcpm_join3" title="Join Connection (MP_JO
IN) Option (for Third ACK)">
<artwork align="left"><![CDATA[
1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----------------------+
| Kind | Length = 24 |Subtype| (reserved) |
+---------------+---------------+-------+-----------------------+
| |
| |
| Sender's Truncated HMAC (160 bits) |
| |
| |
+---------------------------------------------------------------+
]]></artwork>
</figure> </figure>
<t pn="section-3.2-16">The key for the HMAC algorithm, in the case of th
e message
transmitted by Host A, will be Key-A followed by Key-B; and in the
case of Host B, Key-B followed by Key-A. These are the keys that were
exchanged in the original MP_CAPABLE handshake. The "message" for the
HMAC algorithm in each case is the concatenations of random numbers for
each host (denoted by R): for Host A, R-A followed by R-B; and for
Host B, R-B followed by R-A.</t>
<t pn="section-3.2-17">These various MPTCP options fit together to enabl
e authenticated subflow setup as illustrated in <xref target="fig_tokens" format
="default" sectionFormat="of" derivedContent="Figure 8"/>.</t>
<figure anchor="fig_tokens" align="left" suppress-title="false" pn="figu
re-8">
<name slugifiedName="name-example-use-of-mptcp-authen">Example Use of
MPTCP Authentication</name>
<artwork align="left" name="" type="" alt="" pn="section-3.2-18.1">
Host A Host B
------------------------ ----------
Address A1 Address A2 Address B1
---------- ---------- ----------
| | |
| | SYN + MP_CAPABLE |
|---------------------------------------------&gt;|
|&lt;---------------------------------------------|
| SYN/ACK + MP_CAPABLE(Key-B) |
| | |
| ACK + MP_CAPABLE(Key-A, Key-B) |
|---------------------------------------------&gt;|
| | |
| | SYN + MP_JOIN(Token-B, R-A) |
| |-------------------------------&gt;|
| |&lt;-------------------------------|
| | SYN/ACK + MP_JOIN(HMAC-B, R-B) |
| | |
| | ACK + MP_JOIN(HMAC-A) |
| |-------------------------------&gt;|
| |&lt;-------------------------------|
| | ACK |
<t>These various MPTCP options fit together to enable authenticated subf HMAC-A = HMAC(Key=(Key-A + Key-B), Msg=(R-A + R-B))
low setup as illustrated in <xref target="fig_tokens"/>.</t> HMAC-B = HMAC(Key=(Key-B + Key-A), Msg=(R-B + R-A)) </artwork>
<?rfc needLines='24'?>
<figure align="center" anchor="fig_tokens" title="Example Use of MPTCP A
uthentication">
<artwork align="left"><![CDATA[
Host A Host B
------------------------ ----------
Address A1 Address A2 Address B1
---------- ---------- ----------
| | |
| | SYN + MP_CAPABLE |
|--------------------------------------------->|
|<---------------------------------------------|
| SYN/ACK + MP_CAPABLE(Key-B) |
| | |
| ACK + MP_CAPABLE(Key-A, Key-B) |
|--------------------------------------------->|
| | |
| | SYN + MP_JOIN(Token-B, R-A) |
| |------------------------------->|
| |<-------------------------------|
| | SYN/ACK + MP_JOIN(HMAC-B, R-B) |
| | |
| | ACK + MP_JOIN(HMAC-A) |
| |------------------------------->|
| |<-------------------------------|
| | ACK |
HMAC-A = HMAC(Key=(Key-A+Key-B), Msg=(R-A+R-B))
HMAC-B = HMAC(Key=(Key-B+Key-A), Msg=(R-B+R-A))
]]></artwork>
</figure> </figure>
<t pn="section-3.2-19">If the token received at Host B is unknown or loc
<t>If the token received at Host B is unknown or local policy al policy
prohibits the acceptance of the new subflow, the recipient MUST prohibits the acceptance of the new subflow, the recipient <bcp14>MUST</
respond with a TCP RST for the subflow. If appropriate, a MP_TCPRST bcp14>
option with a "Administratively prohibited" reason code respond with a TCP RST for the subflow. If appropriate, an MP_TCPRST
(<xref target="sec_reset"/>) should be included.</t> option with an "Administratively prohibited" reason code
(<xref target="sec_reset" format="default" sectionFormat="of" derivedCon
<t>If the token is accepted at Host B, but the HMAC returned to tent="Section 3.6"/>) should be included.</t>
Host A does not match the one expected, Host A MUST close the <t pn="section-3.2-20">If the token is accepted at Host B but the HMAC r
subflow with a TCP RST. In this, and all following cases of sending eturned to
a RST in this section, the sender SHOULD send a MP_TCPRST option Host A does not match the one expected, Host A <bcp14>MUST</bcp14> close
(<xref target="sec_reset"/>) on this RST packet with the reason the
code for a "MPTCP specific error".</t> subflow with a TCP RST. In this and all subsequent cases of sending
a RST as described in this section, the sender <bcp14>SHOULD</bcp14> sen
<t>If Host B does not receive the expected HMAC, or the MP_JOIN d an MP_TCPRST option
option is missing from the ACK, it MUST close the subflow with a (<xref target="sec_reset" format="default" sectionFormat="of" derivedCon
tent="Section 3.6"/>) on this RST packet with the reason
code for an "MPTCP-specific error".</t>
<t pn="section-3.2-21">If Host B does not receive the expected HMAC or t
he MP_JOIN
option is missing from the ACK, it <bcp14>MUST</bcp14> close the subflow
with a
TCP RST.</t> TCP RST.</t>
<t pn="section-3.2-22">If the HMACs are verified as correct, then both h
<t>If the HMACs are verified as correct, then both hosts have osts have
verified each other as being the same peers as existed at verified each other as being the same peers as those that existed at
the start of the connection, and they have agreed of which the start of the connection, and they have agreed of which
connection this subflow will become a part.</t> connection this subflow will become a part.</t>
<t pn="section-3.2-23">If the SYN/ACK as received at Host A does not hav
<t>If the SYN/ACK as received at Host A does not have an MP_JOIN e an MP_JOIN
option, Host A MUST close the subflow with a TCP RST.</t> option, Host A <bcp14>MUST</bcp14> close the subflow with a TCP RST.</t>
<t pn="section-3.2-24">This covers all cases of the loss of an MP_JOIN.
<t>This covers all cases of the loss of an MP_JOIN. In more detail, In more detail,
if MP_JOIN is stripped from the SYN on the path from A to if an MP_JOIN is stripped from the SYN on the path from A to
B, and Host B does not have a listener on the relevant B and Host B does not have a listener on the relevant
port, it will respond with a RST in the normal way. If in port, it will respond with a RST in the normal way. If in
response to a SYN with an MP_JOIN option, a SYN/ACK is response to a SYN with an MP_JOIN option a SYN/ACK is
received without the MP_JOIN option (either since it was received without the MP_JOIN option (because it was either
stripped on the return path, or it was stripped on the stripped on the return path, or stripped on the
outgoing path but Host B responded as if outgoing path leading to Host B responding as if
it were a new regular TCP session), then the subflow is it was a new regular TCP session), then the subflow is
unusable and Host A MUST close it with a RST.</t> unusable and Host A <bcp14>MUST</bcp14> close it with a RST.</t>
<t pn="section-3.2-25">Note that additional subflows can be created
<t>Note that additional subflows can be created between any pair of ports (but see <xref target="heuristics" format="def
between any pair of ports (but see <xref target="heuristics"/> for ault" sectionFormat="of" derivedContent="Section 3.9"/> for
heuristics); no explicit application-level accept calls or heuristics); no explicit application-level accept calls or
bind calls are required to open additional subflows. To bind calls are required to open additional subflows. To
associate a new subflow with an existing connection, the token associate a new subflow with an existing connection, the token
supplied in the subflow's SYN exchange is used for supplied in the subflow's SYN exchange is used for
demultiplexing. This then binds the 5-tuple of the TCP demultiplexing. This then binds the 5-tuple of the TCP
subflow to the local token of the connection. A consequence is subflow to the local token of the connection. One consequence is
that it is possible to allow any port pairs to be used for a that it is possible to allow any port pairs to be used for a
connection. </t> connection. </t>
<t pn="section-3.2-26">Demultiplexing subflow SYNs <bcp14>MUST</bcp14> b
<t>Demultiplexing subflow SYNs MUST be done using the token; e done using the token;
this is unlike traditional TCP, where the destination port is this is unlike traditional TCP, where the destination port is
used for demultiplexing SYN packets. Once a subflow is set up, used for demultiplexing SYN packets. Once a subflow is set up,
demultiplexing packets is done using the 5-tuple, as in demultiplexing packets is done using the 5-tuple, as in
traditional TCP. The 5-tuples will be mapped to the local traditional TCP. The 5-tuples will be mapped to the local
connection identifier (token). Note that Host A will know its connection identifier (token). Note that Host A will know its
local token for the subflow even though it is not sent on the local token for the subflow even though it is not sent on the
wire -- only the responder's token is sent.</t> wire -- only the responder's token is sent.</t>
</section> </section>
<section anchor="sec_generalop" numbered="true" toc="include" removeInRFC=
<section title="General MPTCP Operation" anchor="sec_generalop"> "false" pn="section-3.3">
<t>This section discusses operation of MPTCP for data transfer. At a hig <name slugifiedName="name-mptcp-operation-and-data-tr">MPTCP Operation a
h level, an MPTCP implementation will take one input data stream from an applica nd Data Transfer</name>
tion, and split it into one or more subflows, with sufficient control informatio <t pn="section-3.3-1">This section discusses the operation of MPTCP for
n to allow it to be reassembled and delivered reliably and in order to the recip data transfer. At a high level, an MPTCP implementation will take one input data
ient application. The following subsections define this behavior in detail.</t> stream from an application and split it into one or more subflows, with suffici
ent control information to allow it to be reassembled and delivered reliably and
<t>The data sequence mapping and the Data ACK are signaled in the Data S in order to the recipient application. The following subsections define this be
equence Signal (DSS) option (<xref target="tcpm_dsn"/>). Either or both can be s havior in detail.</t>
ignaled in one DSS, depending on the flags set. The data sequence mapping define <t pn="section-3.3-2">The Data Sequence Mapping and the Data ACK are sig
s how the sequence space on the subflow maps to the connection level, and the Da naled in the DSS option (<xref target="tcpm_dsn" format="default" sectionFormat=
ta ACK acknowledges receipt of data at the connection level. These functions are "of" derivedContent="Figure 9"/>). Either or both can be signaled in one DSS, de
described in more detail in the following two subsections.</t> pending on the flags set. The Data Sequence Mapping defines how the sequence spa
ce on the subflow maps to the connection level, and the Data ACK acknowledges re
<?rfc needLines='18'?> ceipt of data at the connection level. These functions are described in more det
<figure align="center" anchor="tcpm_dsn" title="Data Sequence Signal (DS ail in the following two subsections.</t>
S) Option"> <figure anchor="tcpm_dsn" align="left" suppress-title="false" pn="figure
<artwork align="left"><![CDATA[ -9">
<name slugifiedName="name-data-sequence-signal-dss-op">Data Sequence S
ignal (DSS) Option</name>
<artwork align="left" name="" type="" alt="" pn="section-3.3-3.1">
1 2 3 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+----------------------+ +---------------+---------------+-------+----------------------+
| Kind | Length |Subtype| (reserved) |F|m|M|a|A| | Kind | Length |Subtype| (reserved) |F|m|M|a|A|
+---------------+---------------+-------+----------------------+ +---------------+---------------+-------+----------------------+
| Data ACK (4 or 8 octets, depending on flags) | | Data ACK (4 or 8 octets, depending on flags) |
+--------------------------------------------------------------+ +--------------------------------------------------------------+
| Data sequence number (4 or 8 octets, depending on flags) | | Data Sequence Number (4 or 8 octets, depending on flags) |
+--------------------------------------------------------------+ +--------------------------------------------------------------+
| Subflow Sequence Number (4 octets) | | Subflow Sequence Number (4 octets) |
+-------------------------------+------------------------------+ +-------------------------------+------------------------------+
| Data-Level Length (2 octets) | Checksum (2 octets) | | Data-Level Length (2 octets) | Checksum (2 octets) |
+-------------------------------+------------------------------+ +-------------------------------+------------------------------+ </artwork>
]]></artwork>
</figure> </figure>
<t pn="section-3.3-4">The flags, when set, define the contents of this o ption, as follows:
<t>The flags, when set, define the contents of this option, as follows: </t>
<ul spacing="normal" bare="false" empty="false" pn="section-3.3-5">
<list style="symbols"> <li pn="section-3.3-5.1">A = Data ACK present</li>
<t>A = Data ACK present</t> <li pn="section-3.3-5.2">a = Data ACK is 8 octets (if not set, Data AC
<t>a = Data ACK is 8 octets (if not set, Data ACK is 4 octets)</t> K is 4 octets)</li>
<t>M = Data Sequence Number (DSN), Subflow Sequence Number (SSN), Da <li pn="section-3.3-5.3">M = Data Sequence Number (DSN), Subflow Seque
ta-Level Length, and Checksum (if negotiated) present</t> nce Number (SSN), Data-Level Length, and Checksum (if negotiated) present</li>
<t>m = Data sequence number is 8 octets (if not set, DSN is 4 octets <li pn="section-3.3-5.4">m = Data Sequence Number is 8 octets (if not
)</t> set, DSN is 4 octets)</li>
</list> </ul>
<t pn="section-3.3-6">
The flags 'a' and 'm' only have meaning if the corresponding 'A' or 'M'
flags are set; otherwise, they will be ignored. The maximum length of this optio
n, with all flags set, is 28 octets.</t>
<t>The 'F' flag indicates "Data FIN". If present, this means that this m
apping covers the final data from the sender. This is the connection-level equiv
alent to the FIN flag in single-path TCP. A connection is not closed unless ther
e has been a Data FIN exchange, a MP_FASTCLOSE (<xref target="sec_fastclose"/>)
message, or an implementation-specific, connection-level send timeout. The purpo
se of the Data FIN and the interactions between this flag, the subflow-level FIN
flag, and the data sequence mapping are described in <xref target="sec_close"/>
.
The remaining reserved bits MUST be set to zero by an implementation of
this specification.</t>
<t>Note that the checksum is only present in this option if the use of M
PTCP checksumming has been negotiated at the MP_CAPABLE handshake (see <xref tar
get="sec_init"/>). The presence of the checksum can be inferred from the length
of the option. If a checksum is present, but its use had not been negotiated in
the MP_CAPABLE handshake, the receiver MUST close the subflow with a RST as it n
ot behaving as negotiated. If a checksum is not present when its use has been ne
gotiated, the receiver MUST close the subflow with a RST as it is considered bro
ken. In both cases, this RST SHOULD be accompanied with a MP_TCPRST option (<xre
f target="sec_reset"/>) with the reason code for a "MPTCP specific error".</t>
<section title="Data Sequence Mapping" anchor="sec_dsn">
<t>The data stream as a whole can be reassembled through the use of th
e data sequence mapping components of the DSS option (<xref target="tcpm_dsn"/>)
, which define the
mapping from the subflow sequence number to the data sequence number. This is us
ed by the receiver to ensure in-order delivery to the application layer. Meanwhi
le, the subflow-level sequence numbers (i.e., the regular sequence numbers in th
e TCP header) have subflow-only relevance. It is expected (but not mandated) tha
t SACK <xref target='RFC2018'/> is used at the subflow level to improve efficien
cy.</t>
<t>The data sequence mapping specifies a mapping from subflow sequence s
pace to data sequence space. This is expressed in terms of starting sequence num
bers for the subflow and the data level, and a length of bytes for which this ma
pping is valid.
This explicit mapping for a range of data was chosen rather than per-packet sign
aling to assist with compatibility with situations where TCP/IP segmentation or
coalescing is undertaken separately from the stack that is generating the data f
low (e.g., through the use of TCP segmentation offloading on network interface c
ards, or by middleboxes such as performance enhancing proxies). It also allows a
single mapping to cover many packets, which may be useful in bulk transfer situ
ations.</t>
<t>A mapping is fixed, in that the subflow sequence number is bound to t
he data sequence number after the mapping has been processed. A sender MUST NOT
change this mapping
after it has been declared; however, the same data sequence number can be mapped
to by different subflows for retransmission purposes (see <xref target="sec_ret
ransmit"/>). This would also permit the same data to be sent simultaneously on m
ultiple subflows for resilience or efficiency purposes, especially in the case o
f lossy links. Although the detailed specification of such operation is outside
the scope of this document, an implementation SHOULD treat the first data that i
s received at a subflow for the data sequence space as that which should be deli
vered to the application, and any later data for that sequence space SHOULD be i
gnored.</t>
<t>The data sequence number is specified as an absolute value, whereas t
he subflow sequence numbering is relative (the SYN at the start of the subflow h
as relative subflow sequence number 0). This is to allow middleboxes to change t
he initial sequence number of a subflow, such as firewalls that undertake Initia
l Sequence Number (ISN) randomization.</t>
<t>The data sequence mapping also contains a checksum of the data that t
his mapping covers, if use of checksums has been negotiated at the MP_CAPABLE ex
change. Checksums are used to detect if the payload has been adjusted in any way
by a non-MPTCP-aware middlebox. If this checksum fails, it will trigger a failu
re of the subflow, or a fallback to regular TCP, as documented in <xref target="
sec_fallback"/>, since MPTCP can no longer reliably know the subflow sequence sp
ace at the receiver to build data sequence mappings. Without checksumming enable
d, corrupt data may be delivered to the application if a middlebox alters segmen
t boundaries, alters content, or does not deliver all segments covered by a data
sequence mapping. It is therefore RECOMMENDED to use checksumming unless it is
known the network path contains no such devices.</t>
<t>The checksum algorithm used is the standard TCP checksum <xref target
="RFC0793"/>, operating over the data covered by this mapping, along with a pseu
do-header as shown in <xref target="fig_pseudo"/>.</t>
<?rfc needLines='18'?> The flags "a" and "m" only have meaning if the corresponding "A" or "M"
<figure align="center" anchor="fig_pseudo" title="Pseudo-Header for DSS flags are set; otherwise, they will be ignored. The maximum length of this optio
Checksum"> n, with all flags set, is 28 octets.</t>
<artwork align="left"><![CDATA[ <t pn="section-3.3-7">The "F" flag indicates "Data FIN". If present, thi
s means that this
mapping covers the final data from the sender. This is the
connection-level equivalent of the FIN flag in single-path TCP. A connec
tion is not closed unless there has been a Data FIN exchange, an MP_FASTCLOSE (<
xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent="
Section 3.5"/>) message, or an implementation-specific connection-level send tim
eout. The purpose of the Data FIN and the interactions between this flag, the su
bflow-level FIN flag, and the Data Sequence Mapping are described in <xref targe
t="sec_close" format="default" sectionFormat="of" derivedContent="Section 3.3.3"
/>.
The remaining reserved bits <bcp14>MUST</bcp14> be set to 0 by an implem
entation of this specification.</t>
<t pn="section-3.3-8">Note that the checksum is only present in this opt
ion if the use of
MPTCP checksumming has been negotiated at the MP_CAPABLE handshake
(see <xref target="sec_init" format="default" sectionFormat="of" derived
Content="Section 3.1"/>). The presence of the
checksum can be inferred from the length of the option. If a checksum
is present but its use had not been negotiated in the MP_CAPABLE
handshake, the receiver <bcp14>MUST</bcp14> close the subflow with a
RST, as it is not behaving as negotiated. If a checksum is not present w
hen its use has been negotiated, the receiver <bcp14>MUST</bcp14> close the subf
low with a RST, as it is considered broken. In both cases, this RST <bcp14>SHOUL
D</bcp14> be accompanied by an MP_TCPRST option (<xref target="sec_reset" format
="default" sectionFormat="of" derivedContent="Section 3.6"/>) with the reason co
de for an "MPTCP-specific error".</t>
<section anchor="sec_dsn" numbered="true" toc="include" removeInRFC="fal
se" pn="section-3.3.1">
<name slugifiedName="name-data-sequence-mapping">Data Sequence Mapping
</name>
<t pn="section-3.3.1-1">The data stream as a whole can be reassembled
through the use of the Data Sequence Mapping components of the DSS option (<xref
target="tcpm_dsn" format="default" sectionFormat="of" derivedContent="Figure 9"
/>), which define the
mapping from the subflow sequence number to the data sequence number. This is
used by the receiver to ensure in-order delivery to the application
layer. Meanwhile, the subflow-level sequence numbers (i.e., the
regular sequence numbers in the TCP header) are only relevant to the s
ubflow. It is expected (but not mandated) that SACK <xref target="RFC2018" forma
t="default" sectionFormat="of" derivedContent="RFC2018"/> will be used at the su
bflow level to improve efficiency.</t>
<t pn="section-3.3.1-2">The Data Sequence Mapping specifies a mapping
from the subflow
sequence space to the data sequence space. This is expressed in terms
of starting sequence numbers for the subflow and the data level, and a length of
bytes for which this mapping is valid.
This explicit mapping for a range of data, rather than per‑packet signaling, was
chosen to assist with compatibility with
situations where TCP/IP segmentation or coalescing is undertaken
separately from the stack that is generating the data flow (e.g.,
through the use of TCP segmentation offloading on network interface
cards, or by middleboxes such as Performance Enhancing Proxies
(PEPs) <xref target="RFC3135" format="default" sectionFormat="of" deri
vedContent="RFC3135"/>). It
also allows a single mapping to cover many packets; this may be useful
in bulk‑transfer situations.</t>
<t pn="section-3.3.1-3">A mapping is fixed, in that the subflow sequen
ce number is bound to the data sequence number after the mapping has been proces
sed. A sender <bcp14>MUST NOT</bcp14> change this mapping
after it has been declared; however, the same data sequence number can be
mapped to by different subflows for retransmission purposes (see
<xref target="sec_retransmit" format="default" sectionFormat="of" deri
vedContent="Section 3.3.6"/>). This would also
permit the same data to be sent simultaneously on multiple subflows
for resilience or efficiency purposes, especially in the case of
lossy links. Although the detailed specification of such operation
is outside the scope of this document, an implementation
<bcp14>SHOULD</bcp14> treat the first data that is received at a
subflow for the data sequence space as the data that should be deliver
ed to the application, and any subsequent data for that sequence space <bcp14>SH
OULD</bcp14> be ignored.</t>
<t pn="section-3.3.1-4">The data sequence number is specified as an ab
solute value,
whereas the subflow sequence numbering is relative (the SYN at the
start of the subflow has a relative subflow sequence number of
0). This is done to allow middleboxes to change the Initial Sequence
Number (ISN) of a subflow, such as firewalls that undertake ISN random
ization.</t>
<t pn="section-3.3.1-5">The Data Sequence Mapping also contains a chec
ksum of the data
that this mapping covers, if the use of checksums has been negotiated
at
the MP_CAPABLE exchange. Checksums are used to detect if the payload
has been adjusted in any way by a non-MPTCP-aware middlebox. If this
checksum fails, it will trigger a failure of the subflow, or a
fallback to regular TCP, as documented in <xref target="sec_fallback"
format="default" sectionFormat="of" derivedContent="Section 3.7"/>, since MPTCP
can no longer
reliably know the subflow sequence space at the receiver to build
Data Sequence Mappings. Without checksumming enabled, corrupt data
may be delivered to the application if a middlebox alters segment
boundaries, alters content, or does not deliver all segments covered
by a Data Sequence Mapping. It is therefore
<bcp14>RECOMMENDED</bcp14> that checksumming be used, unless it is kno
wn
that the network path contains no such devices.</t>
<t pn="section-3.3.1-6">The checksum algorithm used is the standard TC
P checksum <xref target="RFC0793" format="default" sectionFormat="of" derivedCon
tent="RFC0793"/>, operating over the data covered by this mapping, along with a
pseudo‑header as shown in <xref target="fig_pseudo" format="default" sectionForm
at="of" derivedContent="Figure 10"/>.</t>
<figure anchor="fig_pseudo" align="left" suppress-title="false" pn="fi
gure-10">
<name slugifiedName="name-pseudo-header-for-dss-check">Pseudo-Header
for DSS Checksum</name>
<artwork align="left" name="" type="" alt="" pn="section-3.3.1-7.1">
1 2 3 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+--------------------------------------------------------------+ +--------------------------------------------------------------+
| | | |
| Data Sequence Number (8 octets) | | Data Sequence Number (8 octets) |
| | | |
+--------------------------------------------------------------+ +--------------------------------------------------------------+
| Subflow Sequence Number (4 octets) | | Subflow Sequence Number (4 octets) |
+-------------------------------+------------------------------+ +-------------------------------+------------------------------+
| Data-Level Length (2 octets) | Zeros (2 octets) | | Data-Level Length (2 octets) | Zeros (2 octets) |
+-------------------------------+------------------------------+ +-------------------------------+------------------------------+ </artwork>
]]></artwork> </figure>
</figure> <t pn="section-3.3.1-8">Note that the data sequence number used in the
pseudo-header is always the 64-bit value, irrespective of what length is used i
<t>Note that the data sequence number used in the pseudo-header is alway n the DSS option itself. The standard TCP checksum algorithm has been chosen, si
s the 64-bit value, irrespective of what length is used in the DSS option itself nce it will be calculated anyway for the TCP subflow, and if calculated first ov
. The standard TCP checksum algorithm has been chosen since it will be calculate er the data before adding the pseudo-headers, it only needs to be calculated onc
d anyway for the TCP subflow, and if calculated first over the data before addin e. Furthermore, since the TCP checksum is additive, the checksum for a DSN_MAP c
g the pseudo-headers, it only needs to be calculated once. Furthermore, since th an be constructed by simply adding together the checksums for the data of each c
e TCP checksum is additive, the checksum for a DSN_MAP can be constructed by sim onstituent TCP segment and adding the checksum for the DSS pseudo‑header.</t>
ply adding together the checksums for the data of each constituent TCP segment, <t pn="section-3.3.1-9">Note that checksumming relies on the TCP subfl
and adding the checksum for the DSS pseudo-header.</t> ow containing contiguous data; therefore, a TCP subflow <bcp14>MUST NOT</bcp14>
use the Urgent Pointer to interrupt an existing mapping. Further note, however,
<t>Note that checksumming relies on the TCP subflow containing contiguou that if Urgent data is received on a subflow, it <bcp14>SHOULD</bcp14> be mapped
s data; therefore, a TCP subflow MUST NOT use the Urgent Pointer to interrupt an to the data sequence space and delivered to the application, analogous to Urgen
existing mapping. Further note, however, that if Urgent data is received on a s t data in regular TCP.</t>
ubflow, it SHOULD be mapped to the data sequence space and delivered to the appl <t pn="section-3.3.1-10">To avoid possible deadlock scenarios, subflow
ication analogous to Urgent data in regular TCP.</t> -level
processing should be undertaken separately from processing at the
<t>To avoid possible deadlock scenarios, subflow-level
processing should be undertaken separately from that at
connection level. Therefore, even if a mapping does not exist connection level. Therefore, even if a mapping does not exist
from the subflow space to the data-level space, the data from the subflow space to the data‑level space, the data
SHOULD still be ACKed at the subflow (if it is in-window). <bcp14>SHOULD</bcp14> still be ACKed at the subflow (if it is in-window)
.
This data cannot, however, be acknowledged at the data level This data cannot, however, be acknowledged at the data level
(<xref target="sec_dataack"/>) because its data sequence (<xref target="sec_dataack" format="default" sectionFormat="of" derivedC
numbers are unknown. Implementations MAY hold onto such ontent="Section 3.3.2"/>) because its data sequence
unmapped data for a short while in the expectation that a numbers are unknown. Implementations <bcp14>MAY</bcp14> hold onto such
unmapped data for a short while, in the expectation that a
mapping will arrive shortly. Such unmapped data cannot be mapping will arrive shortly. Such unmapped data cannot be
counted as being within the connection level receive window because this is counted as being within the connection-level receive window because this is
relative to the data sequence numbers, so if the receiver runs relative to the data sequence numbers, so if the receiver runs
out of memory to hold this data, it will have to be discarded. out of memory to hold this data, it will have to be discarded.
If a mapping for that subflow-level sequence space does not If a mapping for that subflow-level sequence space does not
arrive within a receive window of data, that subflow SHOULD be arrive within a receive window of data, that subflow <bcp14>SHOULD</bcp1 4> be
treated as broken, closed with a RST, and any unmapped data treated as broken, closed with a RST, and any unmapped data
silently discarded.</t> silently discarded.</t>
<t pn="section-3.3.1-11">Data sequence numbers are always 64-bit quant
<t>Data sequence numbers are always 64-bit quantities, and ities and
MUST be maintained as such in implementations. If a <bcp14>MUST</bcp14> be maintained as such in implementations. If a
connection is progressing at a slow rate, so protection connection is progressing at a slow rate, so protection
against wrapped sequence numbers is not required, against wrapped sequence numbers is not required,
then an implementation MAY include just the lower 32 then an implementation <bcp14>MAY</bcp14> include just the lower 32
bits of the data sequence number in the data sequence mapping and/or bits of the data sequence number in the Data Sequence Mapping and⁠/or
Data ACK as an optimization, and an implementation can make this choice Data ACK as an optimization, and an implementation can make this choice
independently for each packet. An implementation MUST be able to receive independently for each packet. An implementation <bcp14>MUST</bcp14> be
and process both 64-bit or 32-bit sequence number values, but it is not able to receive
required that an implementation is able to send both.</t> and process both 64-bit and 32-bit sequence number values, but it is not
required that an implementation be able to send both.</t>
<t>An implementation MUST send the full 64-bit data sequence number <t pn="section-3.3.1-12">An implementation <bcp14>MUST</bcp14> send th
e full 64-bit data sequence number
if it is transmitting at a sufficiently high rate that the 32-bit value if it is transmitting at a sufficiently high rate that the 32-bit value
could wrap within the Maximum Segment Lifetime could wrap within the Maximum Segment Lifetime
(MSL) <xref target="RFC7323"/>. The lengths of the DSNs used in these (MSL) <xref target="RFC7323" format="default" sectionFormat="of" derived Content="RFC7323"/>. The lengths of the DSNs used in these
values (which may be different) are declared with flags in the values (which may be different) are declared with flags in the
DSS option. Implementations MUST accept a 32-bit DSN and implicitly DSS option. Implementations <bcp14>MUST</bcp14> accept a 32-bit DSN and implicitly
promote it to a 64-bit quantity by incrementing the upper 32 promote it to a 64-bit quantity by incrementing the upper 32
bits of sequence number each time the lower 32 bits of the sequence number each time the lower 32
bits wrap. A sanity check MUST be implemented to ensure that bits wrap. A sanity check <bcp14>MUST</bcp14> be implemented to ensure t
hat
a wrap occurs at an expected time (e.g., the sequence number jumps a wrap occurs at an expected time (e.g., the sequence number jumps
from a very high number to a very low number) and is not triggered from a very high number to a very low number) and is not triggered
by out-of-order packets.</t> by out‑of-order packets.</t>
<t pn="section-3.3.1-13">As with the standard TCP sequence number, the
<t>As with the standard TCP sequence number, the data sequence data sequence
number should not start at zero, but at a random value to make number should not start at zero, but at a random value to make
blind session hijacking harder. This specification requires blind session hijacking harder. This specification requires
setting the initial data sequence number (IDSN) of each host to the setting the IDSN of each host to the
least significant 64 bits of the SHA-256 hash of the host's key, as least significant 64 bits of the SHA-256 hash of the host's key, as
described in <xref target="sec_init"/>. This is required also in described in <xref target="sec_init" format="default" sectionFormat="of"
order for the receiver to know what the expected IDSN is, and thus derivedContent="Section 3.1"/>. This is also required in
order for the receiver to know what the expected IDSN is and thus
determine if any initial connection-level packets are missing; this determine if any initial connection-level packets are missing; this
is particularly relevant if two subflows start transmitting simultaneous ly.</t> is particularly relevant if two subflows start transmitting simultaneous ly.</t>
<t pn="section-3.3.1-14">The mapping provided by a Data Sequence Mappi
<t>A data sequence mapping does not need to be included in ng MUST apply to
some or all of the subflow sequence space in the TCP segment that
carries the option. It does not need to be included in
every MPTCP packet, as long as the subflow sequence space in every MPTCP packet, as long as the subflow sequence space in
that packet is covered by a mapping known at the receiver. This that packet is covered by a mapping known at the receiver. This
can be used to reduce overhead in cases where the mapping is can be used to reduce overhead in cases where the mapping is
known in advance; one such case is when there is a single known in advance. One such case is when there is a single
subflow between the hosts, another is when segments of subflow between the hosts, and another is when segments of
data are scheduled in larger than packet-sized chunks.</t> data are scheduled in larger-than-packet-sized chunks.</t>
<t pn="section-3.3.1-15">An "infinite" mapping can be used to fall bac
<t>An "infinite" mapping can be used to fall back to regular TCP by k to regular TCP by
mapping the subflow-level data to the connection-level data mapping the subflow-level data to the connection-level data
for the remainder of the connection (see for the remainder of the connection (see
<xref target="sec_fallback"/>). This is achieved by setting <xref target="sec_fallback" format="default" sectionFormat="of" derivedC ontent="Section 3.7"/>). This is achieved by setting
the Data-Level Length field of the DSS option to the reserved value of 0 . The the Data-Level Length field of the DSS option to the reserved value of 0 . The
checksum, in such a case, will also be set to zero.</t> checksum, in such a case, will also be set to 0.</t>
</section> </section>
<section anchor="sec_dataack" numbered="true" toc="include" removeInRFC=
<section title="Data Acknowledgments" anchor="sec_dataack"> "false" pn="section-3.3.2">
<t>To provide full end-to-end resilience, MPTCP provides a <name slugifiedName="name-data-acknowledgments">Data Acknowledgments</
name>
<t pn="section-3.3.2-1">To provide full end-to-end resilience, MPTCP p
rovides a
connection-level acknowledgment, to act as a cumulative ACK for connection-level acknowledgment, to act as a cumulative ACK for
the connection as a whole. This is the "Data ACK" field of the connection as a whole. This is done via the "Data ACK" field of
the DSS option (<xref target="tcpm_dsn"/>). The Data ACK the DSS option (<xref target="tcpm_dsn" format="default" sectionFormat="
of" derivedContent="Figure 9"/>). The Data ACK
is analogous to the behavior is analogous to the behavior
of the standard TCP cumulative ACK -- indicating of the standard TCP cumulative ACK -- indicating
how much data has been successfully received (with no how much data has been successfully received (with no
holes). This is in comparison to the subflow-level ACK, which holes). This can be compared to the subflow-level ACK, which
acts analogous to TCP SACK, given that there may still be acts in a fashion analogous to TCP SACK, given that there may still be
holes in the data stream at the connection level. holes in the data stream at the connection level.
The Data ACK specifies the next data sequence number The Data ACK specifies the next data sequence number
it expects to receive.</t> it expects to receive.</t>
<t pn="section-3.3.2-2">The Data ACK, as for the DSN, can be sent as t
<t>The Data ACK, as for the DSN, can be sent as the full 64-bit he full 64-bit
value, or as the lower 32 bits. If data is received with a 64-bit DSN, value or as the lower 32 bits. If data is received with a 64-bit DSN,
it MUST be acknowledged with a 64-bit Data ACK. If the DSN received it <bcp14>MUST</bcp14> be acknowledged with a 64-bit Data ACK. If the D
is 32 bits, an implementation can choose whether to send a 32-bit or SN received
64-bit Data ACK, and an implementation MUST accept either in this situat is 32 bits, an implementation can choose whether to send a 32-bit or
ion.</t> 64-bit Data ACK, and an implementation <bcp14>MUST</bcp14> accept either
in this situation.</t>
<t>The Data ACK proves that the data, and all required MPTCP <t pn="section-3.3.2-3">The Data ACK proves that the data, and all req
signaling, has been received and accepted by the remote end. uired MPTCP
signaling, have been received and accepted by the remote end.
One key use of the Data ACK signal is that it is used to indicate One key use of the Data ACK signal is that it is used to indicate
the left edge of the advertised receive window. As explained in the left edge of the advertised receive window. As explained in
<xref target="sec_rwin"/>, the receive window is shared by all <xref target="sec_rwin" format="default" sectionFormat="of" derivedConte nt="Section 3.3.4"/>, the receive window is shared by all
subflows and is relative to the Data ACK. Because of this, an subflows and is relative to the Data ACK. Because of this, an
implementation MUST NOT use the RCV.WND field of a TCP segment implementation <bcp14>MUST NOT</bcp14> use the RCV.WND field of a TCP se gment
at the connection level if it does not also carry a DSS option with at the connection level if it does not also carry a DSS option with
a Data ACK field. Furthermore, a Data ACK field. Furthermore,
separating the connection-level acknowledgments from the separating the connection-level acknowledgments from the
subflow level allows processing to be done separately, and subflow level allows processing to be done separately, and
a receiver has the freedom to drop segments after acknowledgment a receiver has the freedom to drop segments after acknowledgment
at the subflow level, for example, due to memory constraints at the subflow level -- for example, due to memory constraints
when many segments arrive out of order.</t> when many segments arrive out of order.</t>
<t pn="section-3.3.2-4">An MPTCP sender <bcp14>MUST NOT</bcp14> free d
<t>An MPTCP sender MUST NOT free data from the send buffer until ata from the send buffer until
it has been acknowledged by both a Data ACK received on any subflow it has been acknowledged by both a Data ACK received on any subflow
and at the subflow level by all subflows on which the data was sent. and at the subflow level by all subflows on which the data was sent.
The former condition ensures liveness of the The former condition ensures liveness of the
connection and the latter condition ensures liveness and connection, and the latter condition ensures liveness and
self-consistence of a subflow when data needs to be self-consistence of a subflow when data needs to be
retransmitted. retransmitted.
Note, however, that if some data needs to be retransmitted multiple Note, however, that if some data needs to be retransmitted multiple
times over a subflow, there is a risk of blocking the sending times over a subflow, there is a risk of blocking the send
window. In this case, the MPTCP sender can decide to terminate the window. In this case, the MPTCP sender can decide to terminate the
subflow that is behaving badly by sending a RST, using an appropriate subflow that is behaving badly by sending a RST, using an appropriate
MP_TCPRST (<xref target="sec_reset"/>) error code.</t> MP_TCPRST (<xref target="sec_reset" format="default" sectionFormat="of"
derivedContent="Section 3.6"/>) error code.</t>
<t>The Data ACK MAY be included in all segments; however, optimizations <t pn="section-3.3.2-5">The Data ACK <bcp14>MAY</bcp14> be included in
SHOULD be considered in more advanced implementations, where the all segments; however, optimizations
<bcp14>SHOULD</bcp14> be considered in more advanced implementations, wh
ere the
Data ACK is present in segments Data ACK is present in segments
only when the Data ACK value advances, and this behavior MUST only when the Data ACK value advances, and this behavior <bcp14>MUST</bc
be treated as valid. This behavior ensures the sender buffer p14>
be treated as valid. This behavior ensures that the send buffer
is freed, while reducing overhead when the data transfer is is freed, while reducing overhead when the data transfer is
unidirectional.</t> unidirectional.</t>
</section> </section>
<section anchor="sec_close" numbered="true" toc="include" removeInRFC="f
<section title="Closing a Connection" anchor="sec_close"> alse" pn="section-3.3.3">
<t>In regular TCP, a FIN announces the receiver that the sender has no m <name slugifiedName="name-closing-a-connection">Closing a Connection</
ore data to send. name>
<t pn="section-3.3.3-1">In regular TCP, a FIN announces to the receive
r that the sender has no more data to send.
In order to allow subflows to operate independently and to keep the appearance o f TCP over the wire, In order to allow subflows to operate independently and to keep the appearance o f TCP over the wire,
a FIN in MPTCP only affects the subflow on which it is sent. This a FIN in MPTCP only affects the subflow on which it is sent. This
allows nodes to exercise considerable freedom over which paths are in use at any one time. allows nodes to exercise considerable freedom over which paths are in use at any one time.
The semantics of a FIN remain as for regular TCP; i.e., it is not until both sid es have ACKed The semantics of a FIN remain as for regular TCP; i.e., it is not until both sid es have ACKed
each other's FINs that the subflow is fully closed.</t> each other's FINs that the subflow is fully closed.</t>
<t>When an application calls close() on a socket, this indicates that it has no more <t pn="section-3.3.3-2">When an application calls close() on a socket, this indicates that it has no more
data to send; for regular TCP, this would result in a FIN on the connection. For MPTCP, an data to send; for regular TCP, this would result in a FIN on the connection. For MPTCP, an
equivalent mechanism is needed, and this is referred to as the DATA_FIN.</t> equivalent mechanism is needed; this is referred to as the DATA_FIN.</t>
<t pn="section-3.3.3-3">A DATA_FIN is an indication that the sender ha
<t>A DATA_FIN is an indication that the sender has no more data to send, s no more data to send, and
and as such it can be used to verify that all data has been successfully rec
as such can be used to verify that all data has been successfully receiv eived. A DATA_FIN,
ed. A DATA_FIN,
as with the FIN on a regular TCP connection, is a unidirectional signal. </t> as with the FIN on a regular TCP connection, is a unidirectional signal. </t>
<t pn="section-3.3.3-4">The DATA_FIN is signaled by setting the "F" fl
<t>The DATA_FIN is signaled by setting the 'F' flag in the Data Sequence ag in the DSS
Signal option (<xref target="tcpm_dsn"/>) to 1. A DATA_FIN occupies 1 octet (th option (<xref target="tcpm_dsn" format="default" sectionFormat="of" de
e final octet) of the connection-level sequence space. Note that the DATA_FIN is rivedContent="Figure 9"/>)
included in the Data-Level Length, but not at the subflow level: for example, a to 1. A DATA_FIN occupies 1 octet (the final octet) of the
segment with DSN 80, and Data-Level Length 11, with DATA_FIN set, would map 10 connection-level sequence space. Note that the
octets from the subflow into data sequence space 80-89, the DATA_FIN is DSN 90; DATA_FIN is included in the Data-Level Length but not at the subflow
therefore, this segment including DATA_FIN would be acknowledged with a DATA_ACK level: for example, a segment with a DSN value of 80 and a
of 91.</t> Data-Level Length of 11, with DATA_FIN set, would map 10 octets from
the subflow into data sequence space 80-89, and the DATA_FIN would
<t>Note that when the DATA_FIN is not attached to a TCP segment containi be DSN 90; therefore, this segment, including DATA_FIN, would be
ng data, the Data Sequence Signal MUST have a subflow sequence number of 0, a Da acknowledged with a DATA_ACK of 91.</t>
ta-Level Length of 1, and the data sequence number that corresponds with the DAT <t pn="section-3.3.3-5">Note that when the DATA_FIN is not attached to
A_FIN itself. The checksum in this case will only cover the pseudo-header.</t> a TCP segment containing data, the DSS <bcp14>MUST</bcp14> have a subflow seque
nce number of 0, a Data-Level Length of 1, and the data sequence number that cor
<t>A DATA_FIN has the semantics and behavior as a regular TCP FIN, but a responds with the DATA_FIN itself. The checksum in this case will only cover the
t the connection level. Notably, it is only DATA_ACKed once all data has been su pseudo-header.</t>
ccessfully received at the connection level. Note, therefore, that a DATA_FIN is <t pn="section-3.3.3-6">A DATA_FIN has the same semantics and behavior
decoupled from a subflow FIN. It is only permissible to combine these signals o as a regular TCP FIN, but at the connection level. Notably, it is only DATA_ACK
n one subflow if there is no data outstanding on other subflows. Otherwise, it m ed once all data has been successfully received at the connection level. Note, t
ay be necessary to retransmit data on different subflows. Essentially, a host MU herefore, that a DATA_FIN is decoupled from a subflow FIN. It is only permissibl
ST NOT close all functioning subflows unless it is safe to do so, i.e., until al e to combine these signals on one subflow if there is no data outstanding on oth
l outstanding data has been DATA_ACKed, or until the segment with the DATA_FIN f er subflows. Otherwise, it may be necessary to retransmit data on different subf
lag set is the only outstanding segment.</t> lows. Essentially, a host <bcp14>MUST NOT</bcp14> close all functioning subflows
unless it is safe to do so, i.e., until all outstanding data has been DATA_ACKe
<t>Once a DATA_FIN has been acknowledged, all remaining subflows MUST be d or until the segment with the DATA_FIN flag set is the only outstanding segmen
closed with standard FIN exchanges. Both hosts SHOULD send FINs on all subflows t.</t>
, as a courtesy to allow middleboxes to clean up state even if an individual sub <t pn="section-3.3.3-7">Once a DATA_FIN has been acknowledged, all rem
flow has failed. It is also encouraged to reduce the timeouts (Maximum Segment L aining subflows
ifetime) on subflows at end hosts after receiving a DATA_FIN. In particular, any <bcp14>MUST</bcp14> be closed with standard FIN exchanges. Both
subflows where there is still outstanding data queued (which has been retransmi hosts <bcp14>SHOULD</bcp14> send FINs on all subflows, as a courtesy,
tted on other subflows in order to get the DATA_FIN acknowledged) MAY be closed to allow middleboxes to clean up state even if an individual subflow
with a RST with MP_TCPRST (<xref target="sec_reset"/>) error code for "too much has failed. Reducing the timeouts (MSL) on subflows at end hosts after
outstanding data".</t> receiving a
DATA_FIN is also encouraged. In particular, any subflows where there i
<t>A connection is considered closed once both hosts' DATA_FINs have bee s still
n acknowledged by DATA_ACKs.</t> outstanding data queued (which has been retransmitted on other
subflows in order to get the DATA_FIN acknowledged)
<t>As specified above, a standard TCP FIN on an individual subflow only <bcp14>MAY</bcp14> be closed with a RST with an MP_TCPRST (<xref targe
shuts down the subflow on which it was sent. If all subflows have been closed wi t="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>
th a FIN exchange, but no DATA_FIN has been received and acknowledged, the MPTCP ) error code for "too much outstanding data".</t>
connection is treated as closed only after a timeout. This implies that an impl <t pn="section-3.3.3-8">A connection is considered closed once both ho
ementation will have TIME_WAIT states at both the subflow and connection levels sts' DATA_FINs have been acknowledged by DATA_ACKs.</t>
(see <xref target="app_fsm"/>). This permits "break-before-make" scenarios where <t pn="section-3.3.3-9">As specified above, a standard TCP FIN on an i
connectivity is lost on all subflows before a new one can be re-established.</t ndividual subflow
> only shuts down the subflow on which it was sent. If all subflows
</section> have been closed with a FIN exchange but no DATA_FIN has been
received and acknowledged, the MPTCP connection is treated as closed
<section title="Receiver Considerations" anchor="sec_rwin"> only after a timeout. This implies that an implementation will have
<t>Regular TCP advertises a receive window in each packet, telling the TIME_WAIT states at both the subflow level and the connection level (s
sender how much data the receiver ee <xref target="app_fsm" format="default" sectionFormat="of" derivedContent="Ap
is willing to accept past the cumulative ack. The receive window is used to impl pendix D"/>). This permits "break-before-make" scenarios where connectivity is l
ement flow control, throttling ost on all subflows before a new one can be re‑established.</t>
</section>
<section anchor="sec_rwin" numbered="true" toc="include" removeInRFC="fa
lse" pn="section-3.3.4">
<name slugifiedName="name-receiver-considerations">Receiver Considerat
ions</name>
<t pn="section-3.3.4-1">Regular TCP advertises a receive window in eac
h packet, telling the sender how much data the receiver
is willing to accept past the cumulative ACK. The receive window is used to impl
ement flow control, throttling
down fast senders when receivers cannot keep up. </t> down fast senders when receivers cannot keep up. </t>
<t pn="section-3.3.4-2">MPTCP also uses a unique receive window, share
<t>MPTCP also uses a unique receive window, shared between the subflow d between the subflows. The idea is to allow any
s. The idea is to allow any subflow to send data as long as the receiver is willing to accept it. The
subflow to send data as long as the receiver is willing to accept it. The altern alternative -- maintaining per-subflow
ative, maintaining per subflow receive windows -- could end up stalling some subflows while others would not us
receive windows, could end up stalling some subflows while others would not use e up their window.</t>
up their window.</t> <t pn="section-3.3.4-3">The receive window is relative to the DATA_ACK
. As in TCP, a receiver <bcp14>MUST NOT</bcp14> shrink the right edge of the rec
<t>The receive window is relative to the DATA_ACK. As in TCP, a receiv eive window (i.e., DATA_ACK + receive window). The receiver will
er MUST NOT shrink the right edge of the receive window (i.e., DATA_ACK + receiv
e window). The receiver will
use the data sequence number to tell if a packet should be accepted at the conne ction level.</t> use the data sequence number to tell if a packet should be accepted at the conne ction level.</t>
<t pn="section-3.3.4-4">When deciding to accept packets at the subflow
<t>When deciding to accept packets at subflow level, regular TCP check level, regular TCP checks
s
the sequence number in the packet against the allowed receive window. the sequence number in the packet against the allowed receive window.
With multipath, such a check is done using only the connection-level window. A s With MPTCP, such a check is done using only the connection-level window. A sanit
anity y
check SHOULD be performed at subflow level to ensure that the subflow and mapped check <bcp14>SHOULD</bcp14> be performed at the subflow level to ensure that the
sequence subflow and mapped sequence
numbers meet the following test: SSN - SUBFLOW_ACK &lt;= DSN - DATA_ACK, where S SN is the subflow sequence number of the received packet and SUBFLOW_ACK is the RCV.NXT (next expected sequence number) of the subflow (with the equivalent conn ection-level definitions for DSN and DATA_ACK).</t> numbers meet the following test: SSN - SUBFLOW_ACK &lt;= DSN - DATA_ACK, where S SN is the subflow sequence number of the received packet and SUBFLOW_ACK is the RCV.NXT (next expected sequence number) of the subflow (with the equivalent conn ection-level definitions for DSN and DATA_ACK).</t>
<t pn="section-3.3.4-5">In regular TCP, once a segment is deemed in-wi
<t>In regular TCP, once a segment is deemed in-window, it is put either ndow, it is put in either
in the in-order receive queue or in the out-of-order queue. the in-order receive queue or the out-of-order queue.
In Multipath TCP, the same happens but at the connection level: a segment In Multipath TCP, the same thing happens, but at the connection level: a segment
is placed in the connection level in-order or out-of-order queue if is placed in the connection-level in-order or out-of-order queue if
it is in-window at both connection and subflow levels. it is in-window at both the connection level and the subflow level.
The stack still has to remember, for each subflow, which segments were The stack still has to remember, for each subflow, which segments were
received successfully so that it can ACK them at subflow level appropriately. received successfully so that it can ACK them at the subflow level appropriately
Typically, this will be implemented by keeping per subflow out-of-order .
queues (containing only message headers, not the payloads) and remembering Typically, this will be implemented by keeping per-subflow out-of-order
queues (containing only message headers -- not the payloads) and remembering
the value of the cumulative ACK. the value of the cumulative ACK.
</t> </t>
<t pn="section-3.3.4-6">It is important for implementers to understand
<t>It is important for implementers to understand how large how large
a receiver buffer is appropriate. The lower bound for full a receive buffer is appropriate. The lower bound for full
network utilization is the maximum bandwidth-delay product network utilization is the maximum bandwidth-delay product
of any one of the paths. However, this might be insufficient of any one of the paths. However, this might be insufficient
when a packet is lost on a slower subflow and needs to be when a packet is lost on a slower subflow and needs to be
retransmitted (see <xref target="sec_retransmit"/>). A tight retransmitted (see <xref target="sec_retransmit" format="default" sect ionFormat="of" derivedContent="Section 3.3.6"/>). A tight
upper bound would be the maximum round-trip time (RTT) of any path mul tiplied upper bound would be the maximum round-trip time (RTT) of any path mul tiplied
by the total bandwidth available across all paths. This by the total bandwidth available across all paths. This
permits all subflows to continue at full speed while a permits all subflows to continue at full speed while a
packet is fast-retransmitted on the maximum RTT path. Even packet is fast-retransmitted on the maximum RTT path. Even
this might be insufficient to maintain full performance in this might be insufficient to maintain full performance in
the event of a retransmit timeout on the maximum RTT path. the event of a retransmit timeout on the maximum RTT path.
It is for future study to determine the relationship between Determining the relationship between
retransmission strategies and receive buffer sizing.</t> retransmission strategies and receive buffer sizing is left for future
study.</t>
</section> </section>
<section anchor="sec_sender" numbered="true" toc="include" removeInRFC="
<section title="Sender Considerations" anchor="sec_sender"> false" pn="section-3.3.5">
<t>The sender remembers receiver window advertisements from the receiv <name slugifiedName="name-sender-considerations">Sender Considerations
er. It should only update its local receive window values when the largest seque </name>
nce number allowed (i.e., DATA_ACK + receive window) increases, on the receipt o <t pn="section-3.3.5-1">The sender remembers receive window advertisem
f a DATA_ACK. This is important to allow using paths with different RTTs, and th ents from the
us different feedback loops. </t> receiver. It should only update its local receive window values when
the largest sequence number allowed (i.e., DATA_ACK + receive
<t>MPTCP uses a single receive window across all subflows, and if the window) increases on the receipt of a DATA_ACK. This is important
receive window was guaranteed to be unchanged end-to-end, a host could always re for allowing the use of paths with different RTTs and thus different f
ad the most recent receive window value. However, some classes of middleboxes ma eedback loops. </t>
y alter the TCP-level receive window. Typically, these will <t pn="section-3.3.5-2">MPTCP uses a single receive window across all
subflows, and if
the receive window was guaranteed to be unchanged end to end, a host c
ould always read the most recent receive window value. However, some classes of
middleboxes may alter the TCP-level receive window. Typically, these will
shrink the offered window, although for short periods of time it may be possible for the window to be larger (however, shrink the offered window, although for short periods of time it may be possible for the window to be larger (however,
note that this would not continue for long periods since ultimately the middlebo x must keep up with note that this would not continue for long periods, since ultimately the middleb ox must keep up with
delivering data to the receiver). Therefore, if receive window sizes differ on m ultiple subflows, delivering data to the receiver). Therefore, if receive window sizes differ on m ultiple subflows,
when sending data MPTCP SHOULD take the largest of the most recent window sizes as the one to use in calculations. when sending data MPTCP <bcp14>SHOULD</bcp14> take the largest of the most recen t window sizes as the one to use in calculations.
This rule is implicit in the requirement not to reduce the right edge of the win dow.</t> This rule is implicit in the requirement not to reduce the right edge of the win dow.</t>
<t pn="section-3.3.5-3">The sender <bcp14>MUST</bcp14> also remember t
<t>The sender MUST also remember the receive windows advertised by eac he receive windows advertised by each subflow.
h subflow.
The allowed window for subflow i is (ack_i, ack_i + rcv_wnd_i), where ack_i is t he The allowed window for subflow i is (ack_i, ack_i + rcv_wnd_i), where ack_i is t he
subflow-level cumulative ACK of subflow i. This ensures data will not be sent to a middlebox subflow-level cumulative ACK of subflow i. This ensures that data will not be se nt to a middlebox
unless there is enough buffering for the data. </t> unless there is enough buffering for the data. </t>
<t pn="section-3.3.5-4">Putting the two rules together, we get the fol
<t>Putting the two rules together, we get the following: a sender is a lowing: a sender is allowed to send
llowed to send
data segments with data-level sequence numbers between (DATA_ACK, DATA_ACK + rec eive_window). data segments with data-level sequence numbers between (DATA_ACK, DATA_ACK + rec eive_window).
Each of these segments will be mapped onto subflows, as long as subflow sequence numbers Each of these segments will be mapped onto subflows, as long as subflow sequence numbers
are in the allowed windows for those subflows. Note that subflow sequence number s do not are in the allowed windows for those subflows. Note that subflow sequence number s do not
generally affect flow control if the same receive window is advertised across al l subflows. generally affect flow control if the same receive window is advertised across al l subflows.
They will perform flow control for those subflows with a smaller advertised rece ive window. They will perform flow control for those subflows with a smaller advertised rece ive window.
</t> </t>
<t pn="section-3.3.5-5">The send buffer <bcp14>MUST</bcp14>, at a mini
<t>The send buffer MUST, at a minimum, be as big as the receive buffer mum, be as big as the receive buffer, to enable the sender to reach maximum thro
, to enable the sender to reach maximum throughput.</t> ughput.</t>
</section> </section>
<section anchor="sec_retransmit" numbered="true" toc="include" removeInR
<section title="Reliability and Retransmissions" anchor="sec_retransmit" FC="false" pn="section-3.3.6">
> <name slugifiedName="name-reliability-and-retransmiss">Reliability and
Retransmissions</name>
<t>The data sequence mapping allows senders to resend data with the sa <t pn="section-3.3.6-1">The Data Sequence Mapping allows senders to re
me data sequence number on a different subflow. When doing this, a host MUST sti send data with the
ll retransmit the original data on the original subflow, in order to preserve th same data sequence number on a different subflow. When doing this, a
e subflow integrity (middleboxes could replay old data, and/or could reject hole host <bcp14>MUST</bcp14> still retransmit the original data on the
s in subflows), and a receiver will ignore these retransmissions. While this is original subflow, in order to preserve the subflow's integrity
clearly suboptimal, for compatibility reasons this is sensible behavior. Optimiz (middleboxes could replay old data and⁠/or could reject holes in
ations could be negotiated in future versions of this protocol. Note also that t subflows), and a receiver will ignore these retransmissions. While
his property would also permit a sender to always send the same data, with the s this is clearly suboptimal, for compatibility reasons this is
ame data sequence number, on multiple subflows, if desired for reliability reaso sensible behavior. Optimizations could be negotiated in future
ns.</t> versions of this protocol. Note also that this property would also per
mit a sender to always send the same data, with the same data sequence number, o
<t>This protocol specification does not mandate any mechanisms for han n multiple subflows, if desired for reliability reasons.</t>
dling retransmissions, and much will be dependent upon local policy <t pn="section-3.3.6-2">This protocol specification does not mandate a
(as discussed in <xref target="sec_policy"/>). One can imagine aggressive connec ny mechanisms for handling retransmissions, and much will be dependent upon loca
tion-level retransmissions policies where every packet lost at subflow level is l policy
retransmitted on (as discussed in <xref target="sec_policy" format="default" sectionFormat="of" d
a different subflow (hence, wasting bandwidth but possibly reducing application- erivedContent="Section 3.3.8"/>). One can imagine aggressive connection-level re
to-application delays), or conservative retransmission policies where connection transmission policies where every packet lost at the subflow level is retransmit
-level retransmits ted on
a different subflow (hence wasting bandwidth but possibly reducing application-t
o-application delays) or conservative retransmission policies where connection-l
evel retransmissions
are only used after a few subflow-level retransmission timeouts occur.</t> are only used after a few subflow-level retransmission timeouts occur.</t>
<t pn="section-3.3.6-3">It is envisaged that a standard connection-lev
<t>It is envisaged that a standard connection-level retransmission mec el retransmission mechanism
hanism
would be implemented around a connection-level data queue: all segments that hav en't would be implemented around a connection-level data queue: all segments that hav en't
been DATA_ACKed are stored. A timer is set when been DATA_ACKed are stored. A timer is set when
the head of the connection-level is ACKed at subflow level but its corresponding the head of the connection level is ACKed at the subflow level but is not DATA_A
data CKed at the data level. This timer will guard against retransmission failures
is not ACKed at data level. This timer will guard against failures in retransmis
sion
by middleboxes that proactively ACK data.</t> by middleboxes that proactively ACK data.</t>
<t pn="section-3.3.6-4">The sender <bcp14>MUST</bcp14> keep data in it
<t>The sender MUST keep data in its send buffer as long as the data ha s send buffer as
s not been acknowledged at both connection level and on all subflows on which it long as the data has not been acknowledged both (1) at the
connection level and (2) on all subflows on which it
has been sent. In this way, the sender can always retransmit the data if needed, on the same subflow or on a different one. A special case is when a subflow fai ls: the sender has been sent. In this way, the sender can always retransmit the data if needed, on the same subflow or on a different one. A special case is when a subflow fai ls: the sender
will typically resend the data on other working subflows after a timeout, and wi will typically resend the data on other working subflows after a timeout and wil
ll keep trying to retransmit the data l keep trying to retransmit the data
on the failed subflow too. The sender will declare the subflow failed after a pr on the failed subflow too. The sender will declare the subflow failed after a pr
edefined upper bound on retransmissions is reached (which MAY be lower than the edefined upper bound on retransmissions is reached (which <bcp14>MAY</bcp14> be
usual TCP limits of the Maximum Segment Life), or on the receipt of an ICMP erro lower than the usual TCP limits of the MSL) or on the receipt of an ICMP error,
r, and only then delete the outstanding data segments. </t> and only then delete the outstanding data segments. </t>
<t pn="section-3.3.6-5">If multiple retransmissions that indicate that
<t>If multiple retransmissions are triggered that indicate that a subf a
low performs badly, this MAY lead to a host resetting the subflow with a RST. Ho subflow is performing badly are triggered, this <bcp14>MAY</bcp14> lea
wever, additional research is required to understand the heuristics of how and w d to a host resetting the subflow with a RST. However, additional research is re
hen to reset underperforming subflows. For example, a highly asymmetric path may quired to understand the heuristics of how and when to reset underperforming sub
be misdiagnosed as underperforming. A RST for this purpose SHOULD be accompanie flows. For example, a highly asymmetric path may be misdiagnosed as underperform
d with an "Unacceptable performance" MP_TCPRST option (<xref target="sec_reset"/ ing. A RST for this purpose <bcp14>SHOULD</bcp14> be accompanied by an "Unaccept
>).</t> able performance" MP_TCPRST option (<xref target="sec_reset" format="default" se
ctionFormat="of" derivedContent="Section 3.6"/>).</t>
</section> </section>
<section anchor="sec_cc" numbered="true" toc="include" removeInRFC="fals
<section title="Congestion Control Considerations" anchor="sec_cc"> e" pn="section-3.3.7">
<t>Different subflows in an MPTCP connection have different congestion <name slugifiedName="name-congestion-control-consider">Congestion Cont
windows. rol Considerations</name>
<t pn="section-3.3.7-1">Different subflows in an MPTCP connection have
different congestion windows.
To achieve fairness at bottlenecks and resource pooling, it is necessary to coup le the To achieve fairness at bottlenecks and resource pooling, it is necessary to coup le the
congestion windows in use on each subflow, in order to push most traffic to unco ngested links. congestion windows in use on each subflow, in order to push most traffic to unco ngested links.
One algorithm for achieving this is presented in <xref target="RFC6356"/>; One algorithm for achieving this is presented in <xref target="RFC6356" format=" default" sectionFormat="of" derivedContent="RFC6356"/>;
the algorithm does not achieve perfect resource pooling but is "safe" in that it is readily the algorithm does not achieve perfect resource pooling but is "safe" in that it is readily
deployable in the current Internet. By this, we mean that it does not take up mo re capacity deployable in the current Internet. By this we mean that it does not take up mor e capacity
on any one path than if it was a single path flow using only that route, so this ensures on any one path than if it was a single path flow using only that route, so this ensures
fair coexistence with single-path TCP at shared bottlenecks.</t> fair coexistence with single-path TCP at shared bottlenecks.</t>
<t pn="section-3.3.7-2">It is foreseeable that different congestion co
<t>It is foreseeable that different congestion controllers will be imp ntrollers will be
lemented for MPTCP, each aiming to achieve different properties in the resource implemented for MPTCP, each aiming to achieve different properties
pooling/fairness/stability design space, as well as those for achieving differen in the resource pooling / fairness / stability design space, as well a
t properties in quality of service, reliability, and resilience.</t> s those for achieving different properties in quality of service, reliability, a
nd resilience.</t>
<t>Regardless of the algorithm used, <t pn="section-3.3.7-3">Regardless of the algorithm used,
the design of the MPTCP protocol aims to provide the congestion control implemen the design of MPTCP aims to provide the congestion control
tations sufficient information implementations with sufficient information
to take the right decisions; this information includes, for each subflow, which to make the right decisions; this information includes, for each subflow, which
packets were lost and when. </t> packets were lost and when. </t>
</section> </section>
<section anchor="sec_policy" numbered="true" toc="include" removeInRFC="
<section title="Subflow Policy" anchor="sec_policy"> false" pn="section-3.3.8">
<t>Within a local MPTCP implementation, a host may use any local polic <name slugifiedName="name-subflow-policy">Subflow Policy</name>
y it wishes to decide how to share the traffic to be sent over the available pat <t pn="section-3.3.8-1">Within a local MPTCP implementation, a host ma
hs.</t> y use any local policy it wishes to decide how to share the traffic to be sent o
<t>In the typical use case, where the goal is to maximize throughput, ver the available paths.</t>
all available paths will be used simultaneously for data transfer, using coupled <t pn="section-3.3.8-2">In the typical use case, where the goal is to
congestion control as described in <xref target="RFC6356"/>. It is expected, ho maximize throughput, all available paths will be used simultaneously for data tr
wever, that other use cases will appear.</t> ansfer, using coupled congestion control as described in <xref target="RFC6356"
<t>For instance, a possibility is an 'all-or-nothing' approach, i.e., format="default" sectionFormat="of" derivedContent="RFC6356"/>. It is expected,
have a second path ready for use in the event of however, that other use cases will appear.</t>
<t pn="section-3.3.8-3">For instance, one possibility is an "all-or-no
thing" approach, i.e., have a second path ready for use in the event of
failure of the first path, but alternatives could include entirely saturating on e path before using an additional failure of the first path, but alternatives could include entirely saturating on e path before using an additional
path (the 'overflow' case). Such choices would be most likely based on the monet ary cost of links, but may also be path (the "overflow" case). Such choices would be most likely based on the monet ary cost of links but may also be
based on properties such as the delay or jitter of links, where stability (of de lay or bandwidth) is more important than throughput. Application based on properties such as the delay or jitter of links, where stability (of de lay or bandwidth) is more important than throughput. Application
requirements such as these are discussed in detail in <xref target="RFC6897"/>.< requirements such as these are discussed in detail in <xref target="RFC6897" for
/t> mat="default" sectionFormat="of" derivedContent="RFC6897"/>.</t>
<t>The ability to make effective choices at the sender requires full k <t pn="section-3.3.8-4">The ability to make effective choices at the s
nowledge of the path "cost", which ender requires full knowledge of the path "cost", which
is unlikely to be the case. It would be desirable for a receiver to be able to s ignal their own preferences for paths, is unlikely to be the case. It would be desirable for a receiver to be able to s ignal their own preferences for paths,
since they will often be the multihomed party, and may have to pay for metered i since they will often be the multihomed party and may have to pay for metered in
ncoming bandwidth.</t> coming bandwidth.</t>
<t>To enable this, the MP_JOIN option (see <xref target="sec_join"/>) <t pn="section-3.3.8-5">To enable this behavior, the MP_JOIN option (s
contains the 'B' bit, which allows a host to indicate to its peer that this path ee <xref target="sec_join" format="default" sectionFormat="of" derivedContent="S
should be treated as a backup path to use only in the event of failure of other ection 3.2"/>) contains the "B" bit,
working subflows (i.e., a subflow where the receiver has indicated B=1 SHOULD N which allows a host to indicate to its peer that this path should be
OT be used to send data unless there are no usable subflows where B=0).</t> treated as a backup path to use only in the event of failure of
<t>In the event that the available set of paths changes, a host may wi other working subflows (i.e., a subflow where the receiver has
sh to signal a change in priority of subflows to the peer (e.g., a subflow that indicated that B=1 <bcp14>SHOULD NOT</bcp14> be used to send data unle
was previously set as backup should now take priority over all remaining subflow ss there are no usable subflows where B=0).</t>
s). Therefore, the MP_PRIO option, shown in <xref target="tcpm_prio"/>, can be u <t pn="section-3.3.8-6">In the event that the available set of paths c
sed to change the 'B' flag of the subflow on which it is sent.</t> hanges, a host may
<t>Another use of the MP_PRIO option is to set the 'B' flag on a subfl wish to signal a change in priority of subflows to the peer (e.g., a
ow to cleanly retire its use before closing it and removing it with REMOVE_ADDR subflow that was previously set as a backup should now take priority
<xref target="sec_remove_addr"/>, for example to support make-before-break sessi over all remaining subflows). Therefore, the MP_PRIO option, shown
on continuity, where new subflows are added before the previously used ones are in <xref target="tcpm_prio" format="default" sectionFormat="of" derive
closed.</t> dContent="Figure 11"/>, can be used to
<?rfc needLines='8'?> change the "B" flag of the subflow on which it is sent.</t>
<figure align="center" anchor="tcpm_prio" title="Change Subflow Priori <figure anchor="tcpm_prio" align="left" suppress-title="false" pn="fig
ty (MP_PRIO) Option"> ure-11">
<artwork align="left"><![CDATA[ <name slugifiedName="name-change-subflow-priority-mp_">Change Subflo
1 2 3 w Priority (MP_PRIO) Option</name>
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 <artwork align="left" name="" type="" alt="" pn="section-3.3.8-7.1">
+---------------+---------------+-------+-----+-+ 1 2 3
| Kind | Length |Subtype|(rsv)|B| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----+-+ +---------------+---------------+-------+-----+-+
]]></artwork> | Kind | Length |Subtype|(rsv)|B|
+---------------+---------------+-------+-----+-+ </artwork>
</figure> </figure>
<t pn="section-3.3.8-8">Another use of the MP_PRIO option is to set th
<t>It should be noted that the backup flag is a request from a data receiver to e "B" flag on a
a data sender only, and the data sender SHOULD adhere to these requests. A host subflow to cleanly "retire" its use before closing it and removing it
cannot assume that the data sender will do so, however, since local policies -- with REMOVE_ADDR (<xref target="sec_remove_addr" format="default" sect
or technical difficulties -- may override MP_PRIO requests. Note also that this ionFormat="of" derivedContent="Section 3.4.2"/>) -- for example, to support make
signal applies to a single direction, and so the sender of this option could cho -before-break session continuity, where new subflows are added before the previo
ose to continue using the subflow to send data even if it has signaled B=1 to th usly used subflows are closed.</t>
e other host.</t> <t pn="section-3.3.8-9">It should be noted that the backup flag is a r
equest from a data receiver to a data sender only, and the data sender <bcp14>SH
OULD</bcp14> adhere to these requests. A host cannot assume that the data sender
will do so, however, since local policies -- or technical difficulties -- may o
verride MP_PRIO requests. Note also that this signal applies to a single directi
on, and so the sender of this option could choose to continue using the subflow
to send data even if it has signaled B=1 to the other host.</t>
</section> </section>
</section> </section>
<section anchor="sec_pm" numbered="true" toc="include" removeInRFC="false"
<section title="Address Knowledge Exchange (Path Management)" anchor="sec_ pn="section-3.4">
pm"> <name slugifiedName="name-address-knowledge-exchange-">Address Knowledge
<t>We use the term "path management" to refer to the exchange of informa Exchange (Path Management)</name>
tion about additional paths between hosts, which in this design is managed by mu <t pn="section-3.4-1">We use the term "path management" to refer to the
ltiple addresses at hosts. For more detail of the architectural thinking behind exchange of information about additional paths between hosts, which in this desi
this design, see the MPTCP Architecture document <xref target="RFC6182"/>.</t> gn is managed by multiple addresses at hosts. For more details regarding the arc
<t>This design makes use of two methods of sharing such hitectural thinking behind this design, see the MPTCP architecture document <xre
f target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/
>.</t>
<t pn="section-3.4-2">This design makes use of two methods of sharing su
ch
information, and both can be used on a connection. information, and both can be used on a connection.
The first is the direct The first is the direct
setup of new subflows, already described in setup of new subflows (described in
<xref target="sec_join"/>, where the initiator has an <xref target="sec_join" format="default" sectionFormat="of" derivedConte
additional address. The second method, described in the nt="Section 3.2"/>), where the initiator has an
following subsections, signals addresses explicitly to the additional address. The second method (described in the
following subsections) signals addresses explicitly to the
other host to allow it to initiate new subflows. The other host to allow it to initiate new subflows. The
two mechanisms are complementary: the first is implicit and two mechanisms are complementary: the first is implicit and
simple, while the explicit is more complex but is more simple, while the second (explicit) is more complex but is more
robust. Together, the mechanisms allow addresses to change in robust. Together, these mechanisms allow addresses to change in
flight (and thus support operation through NATs, since the flight (and thus support operation through NATs, since the
source address need not be known), and also allow the source address need not be known); they also allow the
signaling of previously unknown addresses, and of addresses signaling of previously unknown addresses and of addresses
belonging to other address families (e.g., both IPv4 and IPv6).</t> belonging to other address families (e.g., both IPv4 and IPv6).</t>
<t pn="section-3.4-3">Here is an example of typical operation of the pro
<t>Here is an example of typical operation of the protocol: tocol:
<list style="symbols"> </t>
<t>An MPTCP connection is initially set up between address/port A1 o <ul spacing="normal" bare="false" empty="false" pn="section-3.4-4">
f Host A <li pn="section-3.4-4.1">An MPTCP connection is initially set up betwe
and address/port B1 of Host B.&nbsp; If Host A is multihomed and en address⁠/port A1 of Host A
and address⁠/port B1 of Host B. If Host A is multihomed and
multiaddressed, it can start an additional subflow from multiaddressed, it can start an additional subflow from
its address A2 to B1, by sending a SYN with a Join its address A2 to B1, by sending a SYN with an MP_JOIN
option from A2 to B1, using B's previously declared option from A2 to B1, using B's previously declared
token for this connection. Alternatively, if B is token for this connection. Alternatively, if B is
multihomed, it can try to set up a new subflow from B2 to multihomed, it can try to set up a new subflow from B2 to
A1, using A's previously declared token. In either A1, using A's previously declared token. In either
case, the SYN will be sent to the port already in use case, the SYN will be sent to the port already in use
for the original subflow on the receiving host.</t> for the original subflow on the receiving host.</li>
<li pn="section-3.4-4.2">Simultaneously (or after a timeout), an ADD_A
<t>Simultaneously (or after a timeout), an ADD_ADDR option DDR option
(<xref target="sec_add_address"/>) is sent on an existing subflow, informing (<xref target="sec_add_address" format="default" sectionFormat="of" derivedConte
nt="Section 3.4.1"/>) is sent on an existing subflow, informing
the receiver of the sender's alternative address(es). The recipient can use the receiver of the sender's alternative address(es). The recipient can use
this information to open a new subflow to the sender's additional address. this information to open a new subflow to the sender's additional address(es).
In our example, A will send ADD_ADDR option informing B of address/port A2. In our example, A will send the ADD_ADDR option informing B of address⁠/port A2.
The mix of using the SYN-based option and the ADD_ADDR option, including The mix of using the SYN‑based option and the ADD_ADDR option, including
timeouts, is implementation specific and can be tailored to agree with local pol timeouts, is implementation specific and can be tailored to agree with local pol
icy.</t> icy.</li>
<li pn="section-3.4-4.3">If subflow A2-B1 is successfully set up, Host
<t>If subflow A2-B1 is successfully set up, Host B can use the Addre B can use the Address ID in
ss ID in the MP_JOIN option to correlate this source address with the ADD_ADDR option tha
the Join option to correlate this with the ADD_ADDR option that will also arrive t will also arrive on
on
an existing subflow; now B knows not to open A2-B1, ignoring the ADD_ADDR. an existing subflow; now B knows not to open A2-B1, ignoring the ADD_ADDR.
Otherwise, if B has not received the A2-B1 MP_JOIN SYN but received the ADD_ADDR , Otherwise, if B has not received the A2-B1 MP_JOIN SYN but received the ADD_ADDR ,
it can try to initiate a new subflow from one or more of its addresses to addres s it can try to initiate a new subflow from one or more of its addresses to addres s
A2. This permits new sessions to be opened if one host is behind a NAT.</t> A2. This permits new sessions to be opened if one host is behind a NAT.</li>
</list> </ul>
<t pn="section-3.4-5">
Other ways of using the two signaling mechanisms are possible; for instan ce, Other ways of using the two signaling mechanisms are possible; for instan ce,
signaling addresses in other address families can only be done explicitly using signaling addresses in other address families can only be done explicitly
the Add Address option. using the Add Address (ADD_ADDR) option.
</t> </t>
<section anchor="sec_add_address" numbered="true" toc="include" removeIn
<section title="Address Advertisement" anchor="sec_add_address"> RFC="false" pn="section-3.4.1">
<t>The Add Address (ADD_ADDR) MPTCP option announces additional addresse <name slugifiedName="name-address-advertisement">Address Advertisement
s (and optionally, ports) on which a </name>
host can be reached (<xref target="tcpm_address"/>). <t pn="section-3.4.1-1">The ADD_ADDR MPTCP option announces additional
addresses (and, optionally, ports) on which a
host can be reached (<xref target="tcpm_address" format="default" sectionFormat=
"of" derivedContent="Figure 12"/>).
This option can be used at any time during a connection, depending on when the This option can be used at any time during a connection, depending on when the
sender wishes to enable multiple paths and/or when paths become available. As wi sender wishes to enable multiple paths and⁠/or when paths become available. As w
th all MPTCP ith all MPTCP
signals, the receiver MUST undertake standard TCP validity checks, e.g. <xref ta signals, the receiver <bcp14>MUST</bcp14> undertake standard TCP validity
rget="RFC5961"/>, before acting upon it.</t> checks, e.g., per <xref target="RFC5961" format="default" sectionForma
t="of" derivedContent="RFC5961"/>, before
<t>Every address has an Address ID that can be used for uniquely identif acting upon it.</t>
ying the address within a connection for address removal. The Address ID is also <figure anchor="tcpm_address" align="left" suppress-title="false" pn="
used to identify MP_JOIN options (see <xref target="sec_join"/>) relating to figure-12">
the same address, even when address translators are in use. The Address ID MUST <name slugifiedName="name-add-address-add_addr-option">Add Address (
uniquely ADD_ADDR) Option</name>
identify the address for the sender of the option (within the scope of the conne <artwork align="left" name="" type="" alt="" pn="section-3.4.1-2.1">
ction), but the mechanism for 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-------+---------------+
| Kind | Length |Subtype|(rsv)|E| Address ID |
+---------------+---------------+-------+-------+---------------+
| Address (IPv4: 4 octets / IPv6: 16 octets) |
+-------------------------------+-------------------------------+
| Port (2 octets, optional) | |
+-------------------------------+ |
| Truncated HMAC (8 octets, if E=0) |
| +-------------------------------+
| |
+-------------------------------+ </artwork>
</figure>
<t pn="section-3.4.1-3">Every address has an Address ID that can be us
ed for uniquely identifying the address within a connection for address removal.
The Address ID is also
used to identify MP_JOIN options (see <xref target="sec_join" format="default" s
ectionFormat="of" derivedContent="Section 3.2"/>) relating to
the same address, even when address translators are in use. The Address ID <bcp1
4>MUST</bcp14> uniquely
identify the address for the sender of the option (within the scope of the conne
ction); the mechanism for
allocating such IDs is implementation specific.</t> allocating such IDs is implementation specific.</t>
<t pn="section-3.4.1-4">All Address IDs learned via either MP_JOIN or
<t>All address IDs learned via either MP_JOIN or ADD_ADDR ADD_ADDR
SHOULD be stored by the receiver in a data structure that gathers all th <bcp14>SHOULD</bcp14> be stored by the receiver in a data structure
e Address ID that gathers all the Address-ID-to-address mappings for a connection
to address mappings for a connection (identified by a token pair). In th (identified by a token pair). In this way, there is
is way, there is a stored mapping between the Address ID, observed source address, and to
a stored mapping between Address ID, observed source address, and token ken pair for
pair for
future processing of control information for a connection. Note that an implementation future processing of control information for a connection. Note that an implementation
MAY discard incoming address advertisements at will, for example, for av <bcp14>MAY</bcp14> discard incoming address advertisements at will -- fo
oiding updating r example, to avoid updating
mapping state, or because advertised addresses are of no use to it (for mapping state or because advertised addresses are of no use to it (for
example, IPv6 addresses when it has IPv4 only). Therefore, a host MUST t example, IPv6 addresses when it has IPv4 only). Therefore, a host <bcp14
reat address >MUST</bcp14> treat address
advertisements as soft state, and it MAY choose to refresh advertisement advertisements as soft state, and it <bcp14>MAY</bcp14> choose to refres
s periodically. h advertisements periodically.
Note also that an implementation MAY choose to cache these address adver Note also that an implementation <bcp14>MAY</bcp14> choose to cache thes
tisements even e address advertisements even
if they are not currently relevant but may be relevant in the future, su ch as IPv4 if they are not currently relevant but may be relevant in the future, su ch as IPv4
addresses when IPv6 connectivity is available but IPv4 is awaiting DHCP. </t> addresses when IPv6 connectivity is available but IPv4 is awaiting DHCP. </t>
<t pn="section-3.4.1-5">This option is shown in <xref target="tcpm_add
<t>This option is shown in <xref target="tcpm_address"/>. The illustrati ress" format="default" sectionFormat="of" derivedContent="Figure 12"/>. The illu
on is sized for stration is sized for
IPv4 addresses. For IPv6, the length of the address will be 16 octets (i IPv4 addresses. For IPv6, the length of the address will be 16 octets (i
nstead of 4).</t> nstead of 4).</t>
<t pn="section-3.4.1-6">The 2 octets that specify the TCP port number
<t>The 2 octets that specify the TCP port number to use are optional and to use are optional, and their presence
their presence
can be inferred from the length of the option. Although it is expected t hat the majority of can be inferred from the length of the option. Although it is expected t hat the majority of
use cases will use the same port pairs as used for the initial subflow ( e.g., port use cases will use the same port pairs as those used for the initial sub flow (e.g., port
80 remains port 80 on all subflows, as does the ephemeral port at the cl ient), there 80 remains port 80 on all subflows, as does the ephemeral port at the cl ient), there
may be cases (such as port-based load balancing) where the explicit spec ification of may be cases (such as port-based load balancing) where the explicit spec ification of
a different port is required. If no port is specified, MPTCP SHOULD atte a different port is required. If no port is specified, MPTCP <bcp14>SHOU
mpt to LD</bcp14> attempt to
connect to the specified address on the same port as is already in use b connect to the specified address on the same port as the port that is al
y the subflow ready in use by the subflow
on which the ADD_ADDR signal was sent; this is discussed in more detail on which the ADD_ADDR signal was sent; this is discussed in more detail
in <xref target="heuristics"/>.</t> in <xref target="heuristics" format="default" sectionFormat="of" derivedContent=
"Section 3.9"/>.</t>
<t>The Truncated HMAC present in this Option is the rightmost 64 bits of <t pn="section-3.4.1-7">The Truncated HMAC parameter present in this o
an HMAC, negotiated and ption is the rightmost 64 bits of an HMAC, negotiated and
calculated in the same way as for MP_JOIN as described in <xref target=" calculated in the same way as for MP_JOIN as described in <xref target="
sec_join"/>. For this sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>. Fo
r this
specification of MPTCP, as there is only one hash algorithm option speci fied, this will be HMAC as specification of MPTCP, as there is only one hash algorithm option speci fied, this will be HMAC as
defined in <xref target="RFC2104"/>, using the SHA-256 hash algorithm <x ref target="RFC6234"/>. defined in <xref target="RFC2104" format="default" sectionFormat="of" de rivedContent="RFC2104"/>, using the SHA-256 hash algorithm <xref target="RFC6234 " format="default" sectionFormat="of" derivedContent="RFC6234"/>.
In the same way as for MP_JOIN, the key for the HMAC In the same way as for MP_JOIN, the key for the HMAC
algorithm, in the case of the message transmitted by Host A, will be Key -A followed by Key-B, and in algorithm, in the case of the message transmitted by Host A, will be Key -A followed by Key-B, and in
the case of Host B, Key-B followed by Key-A. These are the keys that we re exchanged in the original the case of Host B, Key-B followed by Key-A. These are the keys that we re exchanged in the original
MP_CAPABLE handshake. The message for the HMAC is the Address ID, IP Add ress, and Port which precede MP_CAPABLE handshake. The message for the HMAC is the Address ID, IP add ress, and port that precede
the HMAC in the ADD_ADDR option. If the port is not present in the ADD_A DDR option, the HMAC message the HMAC in the ADD_ADDR option. If the port is not present in the ADD_A DDR option, the HMAC message
will nevertheless include two octets of value zero. The rationale for th e HMAC is to will nevertheless include 2 octets of value zero. The rationale for the HMAC is to
prevent unauthorized entities from injecting ADD_ADDR signals in an atte mpt to hijack a connection. prevent unauthorized entities from injecting ADD_ADDR signals in an atte mpt to hijack a connection.
Note that additionally the presence of this HMAC prevents the address be Note that, additionally, the presence of this HMAC prevents the
ing changed in flight unless address from being changed in flight unless
the key is known by an intermediary. If a host receives an ADD_ADDR opti on for which it cannot the key is known by an intermediary. If a host receives an ADD_ADDR opti on for which it cannot
validate the HMAC, it SHOULD silently ignore the option.</t> validate the HMAC, it <bcp14>SHOULD</bcp14> silently ignore the option.<
/t>
<t>A set of four flags are present after the subtype and before the Addr <t pn="section-3.4.1-8">A set of four flags is present after the subty
ess ID. Only the rightmost pe and before the Address ID. Only the rightmost
bit - labelled 'E' - is assigned in this specification. The other bits a bit -- labeled "E" -- is assigned in this specification. The other
re currently unassigned and MUST bits are currently unassigned; they <bcp14>MUST</bcp14>
be set to zero by a sender and MUST be ignored by the receiver.</t> be set to 0 by a sender and <bcp14>MUST</bcp14> be ignored by the receiv
er.</t>
<t>The 'E' flag exists to provide reliability for this option. Because t <t pn="section-3.4.1-9">The "E" flag exists to provide reliability for
his option will often be sent this option. Because this option will often be sent
on pure ACKs, there is no guarantee of reliability. Therefore, a receive r receiving a fresh ADD_ADDR on pure ACKs, there is no guarantee of reliability. Therefore, a receive r receiving a fresh ADD_ADDR
option (where E=0), will send the same option back to the sender, but no option (where E=0) will send the same option back to the sender, but not
t including the HMAC, and including the HMAC and
with E=1, to indicate receipt. The lack of this echo can be used by the with E=1, to indicate receipt. According to local policy, the lack of
initial ADD_ADDR sender to this type of "echo" can indicate to the initial ADD_ADDR sender that the
retransmit the ADD_ADDR according to local policy.</t> ADD_ADDR needs to be retransmitted.</t>
<t pn="section-3.4.1-10">Due to the proliferation of NATs, it is reaso
<?rfc needLines='11'?> nably likely that
<figure align="center" anchor="tcpm_address" title="Add Address (ADD_ADD one host may attempt to advertise private addresses <xref target="RFC1
R) Option"> 918" format="default" sectionFormat="of" derivedContent="RFC1918"/>. It is not d
<artwork align="left"><![CDATA[ esirable to prohibit
1 2 3 this behavior, since there may be cases where both hosts have additional
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 interfaces on the same private network, and a host
+---------------+---------------+-------+-------+---------------+ <bcp14>MAY</bcp14> advertise such addresses. The MP_JOIN handshake
| Kind | Length |Subtype|(rsv)|E| Address ID | to create a new subflow (<xref target="sec_join" format="default" sect
+---------------+---------------+-------+-------+---------------+ ionFormat="of" derivedContent="Section 3.2"/>)
| Address (IPv4 - 4 octets / IPv6 - 16 octets) | provides mechanisms to minimize security risks. The MP_JOIN message
+-------------------------------+-------------------------------+ contains a 32-bit token that uniquely identifies the connection to
| Port (2 octets, optional) | | the receiving host. If the token is unknown, the host will respond
+-------------------------------+ | with a RST. In the unlikely event that the token is valid at the
| Truncated HMAC (8 octets, if E=0) | receiving host, subflow setup will continue, but the HMAC exchange
| +-------------------------------+ must occur for authentication. The HMAC exchange
| | will fail and will provide
+-------------------------------+ sufficient protection against two unconnected hosts accidentally
]]></artwork> setting up a new subflow upon the signal of a private address.
</figure> Further security considerations around the issue of ADD_ADDR messages that acci
dentally misdirect, or maliciously direct, new MP_JOIN attempts are discussed in
<t>Due to the proliferation of NATs, it is reasonably likely that one ho <xref target="sec_security" format="default" sectionFormat="of" derivedContent=
st may attempt to advertise private addresses <xref target="RFC1918"/>. It is no "Section 5"/>.</t>
t desirable to prohibit this, since there may be cases where both hosts have add <t pn="section-3.4.1-11">A host that receives an ADD_ADDR but finds th
itional interfaces on the same private network, and a host MAY advertise such ad at a connection set up to that IP address and port number is unsuccessful <bcp14
dresses. The MP_JOIN handshake to create a new subflow (<xref target="sec_join"/ >SHOULD NOT</bcp14> perform further connection attempts to this address⁠/port co
>) provides mechanisms to minimize security risks. The MP_JOIN message contains mbination for this connection. A sender that wants to trigger a new incoming con
a 32-bit token that uniquely identifies the connection to the receiving host. If nection attempt on a previously advertised address⁠/port combination can therefo
the token is unknown, the host will return with a RST. In the unlikely event th re refresh ADD_ADDR information by sending the option again.</t>
at the token is valid at the receiving host, subflow setup will continue, but th <t pn="section-3.4.1-12">A host can therefore send an ADD_ADDR message
e HMAC exchange must occur for authentication. This will fail, and will provide with an
sufficient protection against two unconnected hosts accidentally setting up a ne already-assigned Address ID, but the address <bcp14>MUST</bcp14> be
w subflow upon the signal of a private address. Further security considerations the same as the address previously assigned to this Address ID. A
around the issue of ADD_ADDR messages that accidentally misdirect, or maliciousl new ADD_ADDR may have the same port number or a different port number.
y direct, new MP_JOIN attempts are discussed in <xref target="sec_security"/>.</ If the port number is different, the receiving host <bcp14>SHOULD</bcp14> try t
t> o set up a new subflow to this new address⁠/port combination.</t>
<t pn="section-3.4.1-13">A host wishing to replace an existing Address
<t>A host that receives an ADD_ADDR but finds a connection set up to tha ID <bcp14>MUST</bcp14> first remove the existing one (<xref target="sec_remove_
t IP address and port number is unsuccessful SHOULD NOT perform further connecti addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/>).</t>
on attempts to this address/port combination for this connection. A sender that <t pn="section-3.4.1-14">During normal MPTCP operation, it is unlikely
wants to trigger a new incoming connection attempt on a previously advertised ad that there will be sufficient TCP option space for ADD_ADDR to be included alon
dress/port combination can therefore refresh ADD_ADDR information by sending the g with those for data sequence numbering (<xref target="sec_dsn" format="default
option again.</t> " sectionFormat="of" derivedContent="Section 3.3.1"/>). Therefore, it is expecte
d that an MPTCP implementation will send the ADD_ADDR option on separate ACKs. A
<t>A host can therefore send an ADD_ADDR message with an already assigne s discussed earlier, however, an MPTCP implementation <bcp14>MUST NOT</bcp14> tr
d Address ID, but the Address MUST be the same as previously assigned to this Ad eat duplicate ACKs with any MPTCP option, with the exception of the DSS option,
dress ID. A new ADD_ADDR may have the same, or different, port number. If the po as indications of congestion <xref target="RFC5681" format="default" sectionForm
rt number is different, the receiving host SHOULD try to set up a new subflow to at="of" derivedContent="RFC5681"/>, and an MPTCP implementation <bcp14>SHOULD NO
this new address/port combination.</t> T</bcp14> send more than two duplicate ACKs in a row for signaling purposes.</t>
</section>
<t>A host wishing to replace an existing Address ID MUST first remove th <section anchor="sec_remove_addr" numbered="true" toc="include" removeIn
e existing one (<xref target="sec_remove_addr"/>).</t> RFC="false" pn="section-3.4.2">
<name slugifiedName="name-remove-address">Remove Address</name>
<t>During normal MPTCP operation, it is unlikely that there will be suff <t pn="section-3.4.2-1">If, during the lifetime of an MPTCP connection
icient TCP option space for ADD_ADDR to be included along with those for data se , a previously
quence numbering (<xref target="sec_dsn"/>). Therefore, it is expected that an M announced address becomes invalid (e.g., if the interface
PTCP implementation will send the ADD_ADDR option on separate ACKs. As discussed disappears or an IPv6 address is no longer preferred), the affected
earlier, however, an MPTCP implementation MUST NOT treat duplicate ACKs with an host <bcp14>SHOULD</bcp14> announce this situation so that the peer ca
y MPTCP option, with the exception of the DSS option, as indications of congesti n remove
on <xref target="RFC5681"/>, and an MPTCP implementation SHOULD NOT send more th subflows related to this address. Even if an address is not in use
an two duplicate ACKs in a row for signaling purposes.</t> by an MPTCP connection, if it has been previously announced, an
implementation <bcp14>SHOULD</bcp14> announce its removal. A host
</section> <bcp14>MAY</bcp14> also choose to announce that a valid IP address
<section title="Remove Address" anchor="sec_remove_addr"> should not be used any longer -- for example, for make‑before-break se
<t>If, during the lifetime of an MPTCP connection, a previously announce ssion continuity.</t>
d address becomes invalid (e.g., if the interface disappears, or an IPv6 address <t pn="section-3.4.2-2">This is achieved through the Remove Address (R
is no longer preferred), the affected host SHOULD announce this so that the pee EMOVE_ADDR) option
r can remove subflows related to this address. Even if an address is not in use (<xref target="tcpm_remove" format="default" sectionFormat="of" derive
by a MPTCP connection, if it has been previously announced, an implementation SH dContent="Figure 13"/>), which will remove a
OULD announce its removal. A host MAY also choose to announce that a valid IP ad previously added address (or list of addresses) from a connection
dress should not be used any longer, for example for make-before-break session c and terminate any subflows currently using that address.</t>
ontinuity.</t> <figure anchor="tcpm_remove" align="left" suppress-title="false" pn="f
<t>This is achieved through the Remove Address (REMOVE_ADDR) option (<xr igure-13">
ef target="tcpm_remove"/>), which will remove a previously added address (or lis <name slugifiedName="name-remove-address-remove_addr-">Remove Addres
t of addresses) from a connection and terminate any subflows currently using tha s (REMOVE_ADDR) Option</name>
t address.</t> <artwork align="left" name="" type="" alt="" pn="section-3.4.2-3.1">
<t>For security purposes, if a host receives a REMOVE_ADDR option, it mu 1 2 3
st ensure the affected path(s) are no longer in use before it instigates closure 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
. The receipt of REMOVE_ADDR SHOULD first trigger the sending of a TCP keepalive +---------------+---------------+-------+-------+---------------+
<xref target="RFC1122"/> on the path, and if a response is received the path SH | Kind |Length = 3 + n |Subtype|(resvd)| Address ID | ...
OULD NOT be removed. If the path is found to still be alive, the receiving host +---------------+---------------+-------+-------+---------------+
SHOULD no longer use the specified address for future connections, but it is the (followed by n-1 Address IDs, if required) </artwork>
responsibility of the host which sent the REMOVE_ADDR to shut down the subflow. </figure>
The requesting host MAY also use MP_PRIO (<xref target="sec_policy"/>) to reque <t pn="section-3.4.2-4">For security purposes, if a host receives a RE
st a path is no longer used, before removal. Typical TCP validity tests on the s MOVE_ADDR option,
ubflow (e.g., ensuring sequence and ACK numbers are correct) MUST also be undert it must ensure that the affected path or paths are no longer in use
aken. An implementation can use indications of these test failures as part of in before it instigates closure. The receipt of REMOVE_ADDR
trusion detection or error logging.</t> <bcp14>SHOULD</bcp14> first trigger the sending of a TCP keepalive
<t>The sending and receipt (if no keepalive response was received) of th <xref target="RFC1122" format="default" sectionFormat="of" derivedCont
is message SHOULD trigger the sending of RSTs by both hosts on the affected subf ent="RFC1122"/> on the path, and if a
low(s) (if possible), as a courtesy to cleaning up middlebox state, before clean response is received, the path <bcp14>SHOULD NOT</bcp14> be
ing up any local state.</t> removed. If the path is found to still be alive, the receiving host
<t>Address removal is undertaken by ID, so as to permit the use of NATs <bcp14>SHOULD</bcp14> no longer use the specified address for future
and other middleboxes that rewrite source addresses. If there is no address at t connections, but it is the responsibility of the host that sent the
he requested ID, the receiver will silently ignore the request.</t> REMOVE_ADDR to shut down the subflow. Before the address is removed,
<t>A subflow that is still functioning MUST be closed with a FIN exchang the requesting host
e as in regular TCP, rather than using this option. For more information, see <x <bcp14>MAY</bcp14> also use MP_PRIO (<xref target="sec_policy" format=
ref target="sec_close"/>.</t> "default" sectionFormat="of" derivedContent="Section 3.3.8"/>) to request that a
<?rfc needLines='8'?> path no longer be used. Typical TCP validity tests on the subflow (e.g., ensuri
<figure align="center" anchor="tcpm_remove" title="Remove Address (REMOV ng
E_ADDR) Option"> that sequence and ACK numbers are correct) <bcp14>MUST</bcp14> also be
<artwork align="left"><![CDATA[ undertaken. An implementation can use indications of these test failures as par
1 2 3 t of intrusion detection or error logging.</t>
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 <t pn="section-3.4.2-5">The sending and receipt (if no keepalive respo
+---------------+---------------+-------+-------+---------------+ nse was received)
| Kind | Length = 3+n |Subtype|(resvd)| Address ID | ... of this message <bcp14>SHOULD</bcp14> trigger the sending of RSTs by
+---------------+---------------+-------+-------+---------------+ both hosts on the affected subflow(s) (if possible), as a courtesy,
(followed by n-1 Address IDs, if required) to allow the cleanup of middlebox state before cleaning up any local s
]]></artwork> tate.</t>
</figure> <t pn="section-3.4.2-6">Address removal is undertaken according to the
</section> Address ID, so as to
permit the use of NATs and other middleboxes that rewrite source
addresses. If an Address ID is not known, the receiver will
silently ignore the request.</t>
<t pn="section-3.4.2-7">A subflow that is still functioning <bcp14>MUS
T</bcp14> be closed with a FIN exchange as in regular TCP, rather than using thi
s option. For more information, see <xref target="sec_close" format="default" se
ctionFormat="of" derivedContent="Section 3.3.3"/>.</t>
</section>
</section> </section>
<section anchor="sec_fastclose" numbered="true" toc="include" removeInRFC=
<section title="Fast Close" anchor="sec_fastclose"> "false" pn="section-3.5">
<t>Regular TCP has the means of sending a reset (RST) signal to abruptly <name slugifiedName="name-fast-close">Fast Close</name>
close a connection. With MPTCP, a regular RST only has the scope of the <t pn="section-3.5-1">Regular TCP has the means of sending a RST signal
subflow to abruptly
and will only close the concerned subflow but not affect the remaining close a connection. With MPTCP, a regular RST only has the scope of
the subflow; it
will only close the applicable subflow and will not affect the remaining
subflows. MPTCP's connection will stay alive at the data level, in order subflows. MPTCP's connection will stay alive at the data level, in order
to permit break-before-make handover between subflows. It is therefore to permit break-before-make handover between subflows. It is therefore
necessary to provide an MPTCP-level "reset" to allow the abrupt closure necessary to provide an MPTCP-level "reset" to allow the abrupt closure
of the whole MPTCP connection, and this is the MP_FASTCLOSE option.</t> of the whole MPTCP connection; this is done via the MP_FASTCLOSE option.
</t>
<t>MP_FASTCLOSE is used to indicate to the peer that the connection will <t pn="section-3.5-2">MP_FASTCLOSE is used to indicate to the peer that
be the connection will be
abruptly closed and no data will be accepted anymore. The reasons for abruptly closed and no data will be accepted anymore. The reasons for
triggering an MP_FASTCLOSE are implementation specific. Regular TCP does triggering an MP_FASTCLOSE are implementation specific. Regular TCP does
not allow sending a RST while the connection is in a synchronized not allow the sending of a RST while the connection is in a synchronized
state <xref target="RFC0793"/>. Nevertheless, implementations allow state <xref target="RFC0793" format="default" sectionFormat="of" derived
the sending of a RST in this state, if, for example, the operating Content="RFC0793"/>. Nevertheless, implementations allow
the sending of a RST in this state if, for example, the operating
system is running out of resources. In these cases, MPTCP should send system is running out of resources. In these cases, MPTCP should send
the MP_FASTCLOSE. This option is illustrated in <xref target="tcpm_fastc the MP_FASTCLOSE. This option is illustrated in <xref target="tcpm_fastc
lose"/>.</t> lose" format="default" sectionFormat="of" derivedContent="Figure 14"/>.</t>
<figure anchor="tcpm_fastclose" align="left" suppress-title="false" pn="
<?rfc needLines='12'?> figure-14">
<figure align="center" anchor="tcpm_fastclose" title="Fast Close (MP_FAS <name slugifiedName="name-fast-close-mp_fastclose-opt">Fast Close (MP_
TCLOSE) Option"> FASTCLOSE) Option</name>
<artwork align="left"><![CDATA[ <artwork align="left" name="" type="" alt="" pn="section-3.5-3.1">
1 2 3 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+-----------------------+ +---------------+---------------+-------+-----------------------+
| Kind | Length |Subtype| (reserved) | | Kind | Length |Subtype| (reserved) |
+---------------+---------------+-------+-----------------------+ +---------------+---------------+-------+-----------------------+
| Option Receiver's Key | | Option Receiver's Key |
| (64 bits) | | (64 bits) |
| | | |
+---------------------------------------------------------------+ +---------------------------------------------------------------+ </artwork>
]]></artwork>
</figure> </figure>
<t pn="section-3.5-4">If Host A wants to force the closure of an MPTCP c
<t>If Host A wants to force the closure of an MPTCP connection, it has t onnection, it can
wo do so via two
different options: options:
<list style="symbols"> </t>
<t>Option A (ACK) : Host A sends an ACK containing the MP_FASTCLOSE <ul spacing="normal" bare="false" empty="false" pn="section-3.5-5">
option on one subflow, containing the key of Host B as declared in <li pn="section-3.5-5.1">Option A (ACK): Host A sends an ACK containin
the initial connection handshake. On all the other subflows, Host A g the MP_FASTCLOSE
sends a regular TCP RST to close these subflows, and tears them down. option on one subflow, containing the key of Host B as declared in
Host A now enters FASTCLOSE_WAIT state.</t> the initial connection handshake. On all the other subflows, Host A
sends a regular TCP RST to close these subflows and tears them down.
<t>Option R (RST) : Host A sends a RST containing the MP_FASTCLOSE Host A now enters FASTCLOSE_WAIT state.</li>
option on all subflows, containing the key of Host B as declared in <li pn="section-3.5-5.2">Option R (RST): Host A sends a RST containing
the initial connection handshake. Host A can tear the subflows and the MP_FASTCLOSE
the connection down immediately.</t> option on all subflows, containing the key of Host B as declared in
</list> the initial connection handshake. Host A can tear down the subflows
</t> and
the connection immediately.</li>
<t>If host A decides to force the closure by using Option A and sending </ul>
an ACK with the MP_FASTCLOSE option, the connection shall proceed as foll <t pn="section-3.5-6">If Host A decides to force the closure by using Op
ows: tion A and sending
<list style="symbols"> an ACK with the MP_FASTCLOSE option, the connection shall proceed as fol
<t>Upon receipt of an ACK with MP_FASTCLOSE by Host B, containing th lows:
e valid key, Host B answers </t>
on the same subflow with a TCP RST and tears down all subflows also <ul spacing="normal" bare="false" empty="false" pn="section-3.5-7">
through sending TCP RST signals. Host B can <li pn="section-3.5-7.1">Upon receipt of an ACK with MP_FASTCLOSE by H
now close the whole MPTCP connection (it transitions directly to CLO ost B, containing the valid key, Host B answers
SED state).</t> on the same subflow with a TCP RST and tears down all subflows
also through sending TCP RST signals. Host B can
<t>As soon as Host A has received the TCP RST on the remaining subfl now close the whole MPTCP connection (it transitions directly to CLO
ow, it SED state).</li>
<li pn="section-3.5-7.2">As soon as Host A has received the TCP RST on
the remaining subflow, it
can close this subflow and tear down the whole connection (transitio n from can close this subflow and tear down the whole connection (transitio n from
FASTCLOSE_WAIT to CLOSED states). If Host A receives an MP_FASTCLOSE instead FASTCLOSE_WAIT state to CLOSED state). If Host A receives an MP_FAST CLOSE instead
of a TCP RST, both hosts attempted fast closure simultaneously. Host A should of a TCP RST, both hosts attempted fast closure simultaneously. Host A should
reply with a TCP RST and tear down the connection.</t> reply with a TCP RST and tear down the connection.</li>
<li pn="section-3.5-7.3">If Host A does not receive a TCP RST in reply
<t>If Host A does not receive a TCP RST in reply to its MP_FASTCLOSE to its MP_FASTCLOSE after one
after one retransmission timeout (RTO) (the RTO of the subflow where the MP_FA
retransmission timeout (RTO) (the RTO of the subflow where the MP_FA STCLOSE has been sent), it <bcp14>SHOULD</bcp14>
STCLOSE has been sent), it SHOULD retransmit the MP_FASTCLOSE. To keep this connection from being
retransmit the MP_FASTCLOSE. The number of retransmissions SHOULD be retained for a long time, the number of retransmissions <bcp14>SHOUL
limited to avoid this connection from being retained for a long time D</bcp14> be
, but limited;
this limit is implementation specific. A RECOMMENDED number is 3. If this limit is implementation specific. A <bcp14>RECOMMENDED</bcp14>
no TCP RST number is 3. If no TCP RST
is received in response, Host A SHOULD send a TCP RST with the MP_FA is received in response, Host A <bcp14>SHOULD</bcp14> send a TCP RST
STCLOSE option with the MP_FASTCLOSE option
itself when it releases state in order to clear any remaining state a itself when it releases state in order to clear any remaining state
t middleboxes.</t> at middleboxes.</li>
</list> </ul>
</t> <t pn="section-3.5-8">If, however, Host A decides to force the closure b
y using Option R and
<t>If however host A decides to force the closure by using Option R and sending a RST with the MP_FASTCLOSE option, Host B will act as follows:
sending a RST with the MP_FASTCLOSE option, Host B will act as follows: upon receipt of a RST with MP_FASTCLOSE, containing the valid key,
Upon receipt of a RST with MP_FASTCLOSE, containing the valid key, Host B tears down all subflows by sending a TCP RST. Host B can now clos
Host B tears down all subflows by sending a TCP RST. Host B can now close e the whole MPTCP
the whole MPTCP connection (it transitions directly to CLOSED state).</t>
connection (it transitions directly to CLOSED state).</t>
</section> </section>
<section anchor="sec_reset" numbered="true" toc="include" removeInRFC="fal
<section title="Subflow Reset" anchor="sec_reset"> se" pn="section-3.6">
<t>An implementation of MPTCP may also need to send a regular TCP RST to <name slugifiedName="name-subflow-reset">Subflow Reset</name>
force <t pn="section-3.6-1">An implementation of MPTCP may also need to send a
the closure of a subflow. A host sends a TCP RST in order to close a subf regular TCP RST to force
low the closure of a subflow. A host sends a TCP RST in order to close a sub
or reject an attempt to open a subflow (MP_JOIN). In order to inform the flow
receiving host why a subflow is being closed or rejected, the TCP RST pac or reject an attempt to open a subflow (MP_JOIN). In order to let the
ket receiving host know why a subflow is being closed or rejected, the TCP R
MAY include the MP_TCPRST Option. The host MAY use this information to ST packet
decide, for example, whether it tries to re-establish the subflow <bcp14>MAY</bcp14> include the MP_TCPRST option (<xref target="tcpm_rese
immediately, later, or never.</t> t" format="default" sectionFormat="of" derivedContent="Figure 15"/>). The host <
bcp14>MAY</bcp14> use this information to
<?rfc needLines='8'?> decide, for example, whether it tries to re-establish the subflow
<figure align="center" anchor="tcpm_reset" title="TCP RST Reason (MP_TCP immediately, later, or never.</t>
RST) Option"> <figure anchor="tcpm_reset" align="left" suppress-title="false" pn="figu
<artwork align="left"><![CDATA[ re-15">
1 2 3 <name slugifiedName="name-tcp-rst-reason-mp_tcprst-op">TCP RST Reason
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 (MP_TCPRST) Option</name>
+---------------+---------------+-------+-----------------------+ <artwork align="left" name="" type="" alt="" pn="section-3.6-2.1">
| Kind | Length |Subtype|U|V|W|T| Reason | 1 2 3
+---------------+---------------+-------+-----------------------+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
]]></artwork> +---------------+---------------+-------+-----------------------+
| Kind | Length |Subtype|U|V|W|T| Reason |
+---------------+---------------+-------+-----------------------+ </artwork>
</figure> </figure>
<t pn="section-3.6-3">The MP_TCPRST option contains a reason code that a
<t>The MP_TCPRST option contains a reason code that allows the llows the
sender of the option to provide more information about the reason for sender of the option to provide more information about the reason for
the termination of the subflow. Using 12 bits of option space, the the termination of the subflow. Using 12 bits of option space, the
first four bits are reserved for flags (only one of which is currently first bits are reserved for flags (only one of which is currently
defined), and the remaining octet is used to express a reason code for defined), and the remaining octet is used to express a reason code for
this subflow termination, from which a receiver MAY infer information this subflow termination, from which a receiver <bcp14>MAY</bcp14> infer information
about the usability of this path.</t> about the usability of this path.</t>
<t pn="section-3.6-4">The "T" flag is used by the sender to indicate whe
<t>The "T" flag is used by the sender to indicate whether the error ther the error
condition that is reported is Transient (T bit set to 1) or Permanent condition that is reported is Transient ("T" bit set to 1) or Permanent
(T bit set to 0). If the error condition is considered to be ("T" bit set to 0). If the error condition is considered to be
Transient by the sender of the RST segment, the recipient of this Transient by the sender of the RST segment, the recipient of this
segment MAY try to reestablish a subflow for this connection over the segment <bcp14>MAY</bcp14> try to re-establish a subflow for this connec
failed path. The time at which a receiver may try to re-establish this tion over the
is implementation-specific, but SHOULD take into account the properties failed path. The time at which a receiver may try to
of the failure defined by the following reason code. If the error condi re‑establish this subflow
tion is implementation specific but <bcp14>SHOULD</bcp14> take into account t
is considered to be permanent, the receiver of the RST segment SHOULD NO he properties
T try of the failure as defined by the provided reason code. If the error con
to reestablish a subflow for this connection over this path. The "U", " dition
V" is considered to be Permanent, the receiver of the RST segment <bcp14>SH
OULD NOT</bcp14> try
to re‑establish a subflow for this connection over this path. The "U",
"V",
and "W" flags are not defined by this specification and are reserved for and "W" flags are not defined by this specification and are reserved for
future use. An implementation of this specification MUST set these flags future use. An implementation of this specification <bcp14>MUST</bcp14>
to 0, and a receiver MUST ignore them.</t> set these flags
to 0, and a receiver <bcp14>MUST</bcp14> ignore them.</t>
<t>The "Reason" code is an 8-bit field that indicates the reason for <t pn="section-3.6-5">"Reason" is an 8-bit field that indicates the reas
on code for
the termination of the subflow. The following codes are defined in the termination of the subflow. The following codes are defined in
this document: this document:
<list style="symbols"> </t>
<t>Unspecified error (code 0x0). This is the default error implying <ul spacing="normal" bare="false" empty="false" pn="section-3.6-6">
the <li pn="section-3.6-6.1">Unspecified error (code 0x00). This is the d
efault error;
it implies that the
subflow is no longer available. The presence of this option shows subflow is no longer available. The presence of this option shows
that the RST was generated by a MPTCP-aware device.</t> that the RST was generated by an MPTCP-aware device.</li>
<li pn="section-3.6-6.2">MPTCP-specific error (code 0x01). An error h
<t>MPTCP specific error (code 0x01). An error has been detected in as been detected in the
the
processing of MPTCP options. This is the usual reason code to retur n processing of MPTCP options. This is the usual reason code to retur n
in the cases where a RST is being sent to close a subflow for reason in the cases where a RST is being sent to close a subflow because
s of an invalid response.</li>
of an invalid response.</t> <li pn="section-3.6-6.3">Lack of resources (code 0x02). This code ind
icates that the
<t>Lack of resources (code 0x02). This code indicates that the
sending host does not have enough resources to support the sending host does not have enough resources to support the
terminated subflow.</t> terminated subflow.</li>
<li pn="section-3.6-6.4">Administratively prohibited (code 0x03). Thi
<t>Administratively prohibited (code 0x03). This code indicates tha s code indicates that
t
the requested subflow is prohibited by the policies of the sending the requested subflow is prohibited by the policies of the sending
host.</t> host.</li>
<li pn="section-3.6-6.5">Too much outstanding data (code 0x04). This
<t>Too much outstanding data (code 0x04). This code indicates that code indicates that
there is an excessive amount of data that need to be transmitted there is an excessive amount of data that needs to be transmitted
over the terminated subflow while having already been acknowledged over the terminated subflow while having already been acknowledged
over one or more other subflows. This may occur if a path has been over one or more other subflows. This may occur if a path has been
unavailable for a short period and it is more efficient to reset and unavailable for a short period and it is more efficient to reset and
start again than it is to retransmit the queued data.</t> start again than it is to retransmit the queued data.</li>
<li pn="section-3.6-6.6">Unacceptable performance (code 0x05). This c
<t>Unacceptable performance (code 0x05). This code indicates that ode indicates that
the performance of this subflow was too low compared to the other the performance of this subflow was too low compared to the other
subflows of this Multipath TCP connection.</t> subflows of this Multipath TCP connection.</li>
<li pn="section-3.6-6.7">Middlebox interference (code 0x06). Middlebo
<t>Middlebox interference (code 0x06). Middlebox interference has x interference has
been detected over this subflow making MPTCP signaling invalid. For been detected over this subflow, making MPTCP signaling invalid. Fo
example, this may be sent if the checksum does not validate.</t> r
</list> example, this may be sent if the checksum does not validate.</li>
</t> </ul>
</section> </section>
<section anchor="sec_fallback" numbered="true" toc="include" removeInRFC="
<section title="Fallback" anchor="sec_fallback"> false" pn="section-3.7">
<t>Sometimes, middleboxes will exist on a path that could prevent the op <name slugifiedName="name-fallback">Fallback</name>
eration of MPTCP. MPTCP has been designed in order to cope with many middlebox m <t pn="section-3.7-1">Sometimes, middleboxes will exist on a path that c
odifications (see <xref target="sec_middleboxes"/>), but there are still some ca ould prevent the
ses where a subflow could fail to operate within the MPTCP requirements. These c operation of MPTCP. MPTCP has been designed to cope with many
ases are notably the following: the loss of MPTCP options on a path, and the mod middlebox modifications (see <xref target="sec_middleboxes" format="defa
ification of payload data. If such an event occurs, it is necessary to "fall bac ult" sectionFormat="of" derivedContent="Section 6"/>), but there are still some
k" to the previous, safe operation. This may be either falling back to regular T cases where a subflow
CP or removing a problematic subflow.</t> could fail to operate within the MPTCP requirements. Notably, these case
s are the following: the loss of MPTCP options on a path and the modification of
<t>At the start of an MPTCP connection (i.e., the first subflow), it is payload data. If such an event occurs, it is necessary to "fall back" to the pr
important to ensure that the path is fully MPTCP capable and the necessary MPTCP evious, safe operation. This may be either falling back to regular TCP or removi
options can reach each host. The handshake as described in <xref target="sec_in ng a problematic subflow.</t>
it"/> SHOULD fall back to regular TCP if either of the SYN messages do not have <t pn="section-3.7-2">At the start of an MPTCP connection (i.e., the fir
the MPTCP options: this is the same, and desired, behavior in the case where a h st subflow), it is important to ensure that the path is fully MPTCP capable and
ost is not MPTCP capable, or the path does not support the MPTCP options. When a the necessary MPTCP options can reach each host. The handshake as described in <
ttempting to join an existing MPTCP connection (<xref target="sec_join"/>), if a xref target="sec_init" format="default" sectionFormat="of" derivedContent="Secti
path is not MPTCP capable and the MPTCP options do not get through on the SYNs, on 3.1"/> <bcp14>SHOULD</bcp14> fall back to regular TCP if either of the SYN me
the subflow will be closed according to the MP_JOIN logic.</t> ssages does not have the MPTCP options: this is the same, and desired, behavior
in the case where a host is not MPTCP capable or the path does not support the M
<t>There is, however, another corner case that should be addressed. That PTCP options. When attempting to join an existing MPTCP connection (<xref target
is one of MPTCP options getting through on the SYN, but not on regular packets. ="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>),
This can be resolved if the subflow is the first subflow, and thus all data in if a path is not MPTCP capable and the MPTCP options do not get through on the
flight is contiguous, using the following rules.</t> SYNs, the subflow will be closed according to the MP_JOIN logic.</t>
<t pn="section-3.7-3">There is, however, another corner case that should
<t>A sender MUST include a DSS option with data sequence mapping in ever be addressed:
y segment until one of the sent segments has been acknowledged with a DSS option the case where MPTCP options get through on the SYN but not on regular
containing a Data ACK. Upon reception of the acknowledgment, the sender has the packets. If the subflow is the first subflow and thus all data in
confirmation that the DSS option passes in both directions and may choose to se flight is contiguous, this situation can be resolved by using the follow
nd fewer DSS options than once per segment.</t> ing rules:</t>
<ul spacing="normal" bare="false" empty="false" pn="section-3.7-4">
<t>If, however, an ACK is received for data (not just for the SYN) witho <li pn="section-3.7-4.1">A sender <bcp14>MUST</bcp14> include a DSS op
ut a DSS option containing a Data ACK, the sender determines the path is not MPT tion with Data Sequence Mapping in every segment until one of the sent segments
CP capable. In the case of this occurring on an additional subflow (i.e., one st has been acknowledged with a DSS option containing a Data ACK. Upon reception of
arted with MP_JOIN), the host MUST close the subflow with a RST, which SHOULD co the acknowledgment, the sender has the confirmation that the DSS option passes
ntain a MP_TCPRST option (<xref target="sec_reset"/>) with a "Middlebox interfer in both directions and may choose to send fewer DSS options than once per segmen
ence" reason code.</t> t.</li>
<li pn="section-3.7-4.2">If, however, an ACK is received for data (not
<t>In the case of such an ACK being received on the first subflow (i.e., just for the SYN)
that started with MP_CAPABLE), before any additional subflows are added, the im without a DSS option containing a Data ACK, the sender determines that t
plementation MUST drop out of an MPTCP mode, back to regular TCP. The sender wil he path is not MPTCP capable. In the case of this occurring on an additional sub
l send one final data sequence mapping, with the Data-Level Length value of 0 in flow (i.e., one started with MP_JOIN), the host <bcp14>MUST</bcp14> close the su
dicating an infinite mapping (to inform the other end in case the path drops opt bflow with a RST, which <bcp14>SHOULD</bcp14> contain an MP_TCPRST option (<xref
ions in one direction only), and then revert to sending data on the single subfl target="sec_reset" format="default" sectionFormat="of" derivedContent="Section
ow without any MPTCP options.</t> 3.6"/>) with a "Middlebox interference" reason code.</li>
<li pn="section-3.7-4.3">In the case of such an ACK being received on
<t>If a subflow breaks during operation, e.g. if it is re-routed and MPT the first subflow
CP options are no longer permitted, then once this is detected (by the subflow-l (i.e., that started with MP_CAPABLE), before any additional subflows
evel receive buffer filling up, since there is no mapping available in order to are added, the implementation <bcp14>MUST</bcp14> drop out of MPTCP
DATA_ACK this data), the subflow SHOULD be treated as broken and closed with a R mode and fall back to regular TCP. The sender will send one final Data S
ST, since no data can be delivered to the application layer, and no fallback sig equence Mapping, with the Data-Level Length value of 0 indicating an infinite ma
nal can be reliably sent. This RST SHOULD include the MP_TCPRST option (<xref ta pping (to inform the other end in case the path drops options in one direction o
rget="sec_reset"/>) with a "Middlebox interference" reason code.</t> nly), and then revert to sending data on the single subflow without any MPTCP op
tions.</li>
<t>These rules should cover all cases where such a failure could happen: <li pn="section-3.7-4.4">If a subflow breaks during operation, e.g., i
whether it's on the forward or reverse path and whether the server or the clien f it is rerouted and
t first sends data.</t> MPTCP options are no longer permitted, then once this is detected (by
the subflow-level receive buffer filling up, since there is no mapping
<t>So far this section has discussed the loss of MPTCP options, either i available in order to DATA_ACK this data), the subflow
nitially, or during the course of the connection. As described in <xref target=" <bcp14>SHOULD</bcp14> be treated as broken and closed with a RST,
sec_generalop"/>, each portion of data for which there is a mapping is protected since no data can be delivered to the application layer and no
by a checksum, if checksums have been negotiated. This mechanism is used to det fallback signal can be reliably sent. This RST <bcp14>SHOULD</bcp14>
ect if middleboxes have made any adjustments to the payload (added, removed, or include the MP_TCPRST option (<xref target="sec_reset" format="default"
changed data). A checksum will fail if the data has been changed in any way. Thi sectionFormat="of" derivedContent="Section 3.6"/>) with a "Middlebox interferenc
s will also detect if the length of data on the subflow is increased or decrease e" reason code.</li>
d, and this means the data sequence mapping is no longer valid. The sender no lo </ul>
nger knows what subflow-level sequence number the receiver is genuinely operatin <t pn="section-3.7-5">These rules should cover all cases where such a fa
g at (the middlebox will be faking ACKs in return), and it cannot signal any fur ilure could
ther mappings. Furthermore, in addition to the possibility of payload modificati happen -- whether it's on the forward or reverse path and whether the se
ons that are valid at the application layer, there is the possibility that such rver or the client first sends data.</t>
modifications could be triggered across MPTCP segment boundaries, corrupting the <t pn="section-3.7-6">So far, this section has discussed the loss of MPT
data. Therefore, all data from the start of the segment that failed the checksu CP options,
m onwards is not trustworthy.</t> either initially or during the course of the connection. As described
in <xref target="sec_generalop" format="default" sectionFormat="of" deri
<t>Note that if checksum usage has not been negotiated, this fallback me vedContent="Section 3.3"/>, each portion of
chanism cannot be used unless there is some higher or lower layer signal to info data for which there is a mapping is protected by a checksum, if
rm the MPTCP implementation that the payload has been tampered with.</t> checksums have been negotiated. This mechanism is used to detect if
middleboxes have made any adjustments to the payload (added, removed,
<t>When multiple subflows are in use, the data in flight on a subflow wi or changed data). A checksum will fail if the data has been changed in
ll likely involve data that is not contiguously part of the connection-level str any way. The use of a checksum will also detect whether the length of da
eam, since segments will be spread across the multiple subflows. Due to the prob ta on the subflow is
lems identified above, it is not possible to determine what adjustment has done increased or decreased, and this means the Data Sequence Mapping is no
to the data (notably, any changes to the subflow sequence numbering). Therefore, longer valid. The sender no longer knows what subflow-level sequence
it is not possible to recover the subflow, and the affected subflow must be imm number the receiver is genuinely operating at (the middlebox will be
ediately closed with a RST, featuring an MP_FAIL option (<xref target="tcpm_fall faking ACKs in return), and it cannot signal any further
back"/>), which defines the data sequence number at the start of the segment (de mappings. Furthermore, in addition to the possibility of payload
fined by the data sequence mapping) that had the checksum failure. Note that the modifications that are valid at the application layer, it is possible th
MP_FAIL option requires the use of the full 64-bit sequence number, even if 32- at such modifications could be triggered across MPTCP segment boundaries, corrup
bit sequence numbers are normally in use in the DSS signals on the path.</t> ting the data. Therefore, all data from the start of the segment that failed the
checksum onward is not trustworthy.</t>
<?rfc needLines='8'?> <t pn="section-3.7-7">Note that if checksum usage has not been negotiate
<figure align="center" anchor="tcpm_fallback" title="Fallback (MP_FAIL) d, this fallback mechanism cannot be used unless there is some higher-layer or l
Option"> ower‑layer signal to inform the MPTCP implementation that the payload has been t
<artwork align="left"><![CDATA[ ampered with.</t>
1 2 3 <t pn="section-3.7-8">When multiple subflows are in use, the data in fli
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ght on a subflow
+---------------+---------------+-------+----------------------+ will likely involve data that is not contiguously part of the
| Kind | Length=12 |Subtype| (reserved) | connection-level stream, since segments will be spread across the
+---------------+---------------+-------+----------------------+ multiple subflows. Due to the problems identified above, it is not
| | possible to determine what adjustments have been done to the data (notab
| Data Sequence Number (8 octets) | ly,
| | any changes to the subflow sequence numbering). Therefore, it is not
+--------------------------------------------------------------+ possible to recover the subflow, and the affected subflow must be
immediately closed with a RST that includes an MP_FAIL option (<xref tar
]]></artwork> get="tcpm_fallback" format="default" sectionFormat="of" derivedContent="Figure 1
6"/>), which defines the data sequence number at the start of the segment (defin
ed by the Data Sequence Mapping) that had the checksum failure. Note that the MP
_FAIL option requires the use of the full 64-bit sequence number, even if 32-bit
sequence numbers are normally in use in the DSS signals on the path.</t>
<figure anchor="tcpm_fallback" align="left" suppress-title="false" pn="f
igure-16">
<name slugifiedName="name-fallback-mp_fail-option">Fallback (MP_FAIL)
Option</name>
<artwork align="left" name="" type="" alt="" pn="section-3.7-9.1">
1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+---------------+---------------+-------+----------------------+
| Kind | Length=12 |Subtype| (reserved) |
+---------------+---------------+-------+----------------------+
| |
| Data Sequence Number (8 octets) |
| |
+--------------------------------------------------------------+ </artwork>
</figure> </figure>
<t pn="section-3.7-10">The receiver of this option <bcp14>MUST</bcp14> d
iscard all data following the data sequence number specified.
Failed data <bcp14>MUST NOT</bcp14> be DATA_ACKed and so will be retrans
mitted on other subflows (<xref target="sec_retransmit" format="default" section
Format="of" derivedContent="Section 3.3.6"/>). </t>
<t pn="section-3.7-11">A special case is when there is a single subflow
and it fails with a checksum error. If it is known that all unacknowledged data
in
flight is contiguous (which will usually be the case with a single
subflow), an infinite mapping can be applied to the subflow without
the need to close it first, essentially turning off all further
MPTCP signaling.
<t>The receiver of this option MUST discard all data following the data In this case, if a receiver identifies a checksum failure
sequence number specified.
Failed data MUST NOT be DATA_ACKed and so will be retransmitted on other
subflows (<xref target="sec_retransmit"/>). </t>
<t>A special case is when there is a single subflow and it fails with a
checksum error.
If it is known that all unacknowledged data in flight is
contiguous (which will usually be the case with a single subflow), an infinite m
apping can be applied to the subflow without the need to close it first, and
essentially turn off all further MPTCP signaling. In this case, if a receiver id
entifies a checksum failure
when there is only one path, it will send back an MP_FAIL option on the subflow- level ACK, referring to the data-level sequence number of the start of the when there is only one path, it will send back an MP_FAIL option on the subflow- level ACK, referring to the data-level sequence number of the start of the
segment on which the checksum error was detected. The sender will receive segment on which the checksum error was detected. The sender will receive
this, and if all unacknowledged data in flight is contiguous, will signal an inf this information and, if all unacknowledged data in flight is contiguous, will s
inite mapping. ignal an infinite mapping.
This infinite mapping will be a DSS option (<xref target="sec_generalop"/>) This infinite mapping will be a DSS option (<xref target="sec_generalop" format=
on the first new packet, containing a data sequence mapping that acts retroactiv "default" sectionFormat="of" derivedContent="Section 3.3"/>)
ely, referring to the start of the subflow sequence on the first new packet, containing a Data Sequence Mapping that acts retroactiv
number of the most recent segment that was known to be delivered intact (i.e. wa ely, referring to the start of the subflow sequence
s successfully DATA_ACKed). From that point onwards, data can be altered number of the most recent segment that was known to be delivered intact (i.e., w
as successfully DATA_ACKed). From that point onward, data can be altered
by a middlebox without affecting MPTCP, as the data stream is equivalent to a re gular, legacy TCP session. by a middlebox without affecting MPTCP, as the data stream is equivalent to a re gular, legacy TCP session.
Whilst in theory paths may only be damaged in one direction, and the MP_FAIL sig While in theory paths may only be damaged in one direction -- and the MP_FAIL
nal affects only one direction of traffic, signal affects only one direction of traffic --
for implementation simplicity, the receiver of an MP_FAIL MUST also respond with for simplicity of implementation, the receiver of an MP_FAIL <bcp14>MUST</bcp14>
an MP_FAIL in the reverse direction and entirely revert to a regular TCP sessio also respond with an MP_FAIL in the reverse direction and entirely revert to a
n.</t> regular TCP session.</t>
<t pn="section-3.7-12">In the rare case that the data is not contiguous
<t>In the rare case that the data is not contiguous (which could happen (which could happen when there is only one subflow but it is retransmitting data
when there is only one subflow but it is retransmitting data from a subflow from a subflow
that has recently been uncleanly closed), the receiver MUST close the subflow wi that has recently been uncleanly closed), the receiver <bcp14>MUST</bcp14> close
th a RST with MP_FAIL. The receiver MUST discard all data that follows the the subflow with a RST with MP_FAIL. The receiver <bcp14>MUST</bcp14> discard a
data sequence number specified. The sender MAY attempt to create a new subflow b ll data that follows the
elonging to the same connection, and, if it chooses to do so, SHOULD place data sequence number specified. The sender <bcp14>MAY</bcp14> attempt to
the single subflow immediately in single-path mode by setting an infinite data s create a new subflow belonging to the same connection and, if it chooses to do
equence mapping. This mapping will begin from the data-level sequence number so, <bcp14>SHOULD</bcp14> immediately place
the single subflow in single-path mode by setting an infinite Data Sequence Mapp
ing. This mapping will begin from the data-level sequence number
that was declared in the MP_FAIL.</t> that was declared in the MP_FAIL.</t>
<t pn="section-3.7-13">After a sender signals an infinite mapping, it <b
<t>After a sender signals an infinite mapping, it MUST only use subflow cp14>MUST</bcp14> only use subflow ACKs to clear its send buffer.
ACKs to clear its send buffer.
This is because Data ACKs may become misaligned with the subflow ACKs when middl eboxes insert or delete data. This is because Data ACKs may become misaligned with the subflow ACKs when middl eboxes insert or delete data.
The receive SHOULD stop generating Data ACKs after it receives an infinite mappi The receiver <bcp14>SHOULD</bcp14> stop generating Data ACKs after it receives
ng. </t> an infinite mapping.</t>
<t pn="section-3.7-14">When a connection has fallen back with an infinit
<t>When a connection has fallen back with an infinite mapping, only one e mapping, only one subflow can send data; otherwise, the receiver would not kno
subflow can send data; otherwise, the receiver would not know how to reorder the w how to reorder the data. In practice, this means that all MPTCP subflows will
data. In practice, this means that all MPTCP subflows will have to be terminate have to be terminated except one. Once MPTCP falls back to regular TCP, it <bcp1
d except one. Once MPTCP falls back to regular TCP, it MUST NOT revert to MPTCP 4>MUST NOT</bcp14> revert to MPTCP later in the connection.</t>
later in the connection.</t> <t pn="section-3.7-15">It should be emphasized that MPTCP is not attempt
ing to prevent the use of middleboxes that want to adjust the payload. An MPTCP-
<t>It should be emphasized that MPTCP is not attempting to prevent the u aware middlebox could provide such functionality by also rewriting checksums.</t
se of middleboxes that want to adjust the payload. An MPTCP-aware middlebox coul >
d provide such functionality by also rewriting checksums.</t>
</section> </section>
<section anchor="sec_errors" numbered="true" toc="include" removeInRFC="fa
<section title="Error Handling" anchor="sec_errors"> lse" pn="section-3.8">
<t>In addition to the fallback mechanism as described above, the standar <name slugifiedName="name-error-handling">Error Handling</name>
d classes of TCP errors may need to be handled in an MPTCP-specific way. Note th <t pn="section-3.8-1">In addition to the fallback mechanism described ab
at changing semantics -- such as the relevance of a RST -- are covered in <xref ove, the standard classes of TCP errors may need to be handled in an MPTCP‑speci
target="sec_semantics"/>. Where possible, we do not want to deviate from regular fic way. Note that changing semantics -- such as the relevance of a RST -- are c
TCP behavior.</t> overed in <xref target="sec_semantics" format="default" sectionFormat="of" deriv
<t>The following list covers possible errors and the appropriate MPTCP b edContent="Section 4"/>. Where possible, we do not want to deviate from regular
ehavior: TCP behavior.</t>
<list style="symbols"> <t pn="section-3.8-2">The following list covers possible errors and the
<t>Unknown token in MP_JOIN (or HMAC failure in MP_JOIN ACK, or miss appropriate MPTCP behavior:
ing MP_JOIN in SYN/ACK response): send RST (analogous to TCP's behavior on an un
known port)</t>
<t>DSN out of window (during normal operation): drop the data, do no
t send Data ACKs</t>
<t>Remove request for unknown address ID: silently ignore</t>
</list>
</t> </t>
<ul spacing="normal" bare="false" empty="false" pn="section-3.8-3">
<li pn="section-3.8-3.1">Unknown token in MP_JOIN (or HMAC failure in
MP_JOIN ACK, or missing MP_JOIN in SYN/ACK response): send RST (analogous to TCP
's behavior on an unknown port)</li>
<li pn="section-3.8-3.2">DSN out of window (during normal operation):
drop the data; do not send Data ACKs</li>
<li pn="section-3.8-3.3">Remove request for unknown Address ID: silent
ly ignore</li>
</ul>
</section> </section>
<section anchor="heuristics" numbered="true" toc="include" removeInRFC="fa
<section title="Heuristics" anchor="heuristics"> lse" pn="section-3.9">
<name slugifiedName="name-heuristics">Heuristics</name>
<t>There are a number of heuristics that are needed for <t pn="section-3.9-1">There are a number of heuristics that are needed f
or
performance or deployment but that are not required for performance or deployment but that are not required for
protocol correctness. In this section, we detail such protocol correctness. In this section, we detail such
heuristics. Note that discussion of buffering and certain heuristics. Note that discussions of buffering and certain
sender and receiver window behaviors are presented in Sections sender and receiver window behaviors are presented in Sections
<xref target="sec_rwin" format="counter"/> and <xref target="sec_sender" <xref target="sec_rwin" format="counter" sectionFormat="of" derivedConte
format="counter"/>, nt="3.3.4"/> and <xref target="sec_sender" format="counter" sectionFormat="of" d
as well as retransmission in <xref target="sec_retransmit"/>.</t> erivedContent="3.3.5"/>,
and retransmission is discussed in <xref target="sec_retransmit" format=
<section title="Port Usage"> "default" sectionFormat="of" derivedContent="Section 3.3.6"/>.</t>
<t>Under typical operation, an MPTCP implementation SHOULD use <section numbered="true" toc="include" removeInRFC="false" pn="section-3
the same ports as already in use. In other words, the .9.1">
destination port of a SYN containing an MP_JOIN option SHOULD <name slugifiedName="name-port-usage">Port Usage</name>
<t pn="section-3.9.1-1">Under typical operation, an MPTCP implementati
on <bcp14>SHOULD</bcp14> use
the same ports as the ports that are already in use. In other words, t
he
destination port of a SYN containing an MP_JOIN option <bcp14>SHOULD</
bcp14>
be the same as the remote port of the first subflow in the be the same as the remote port of the first subflow in the
connection. The local port for such SYNs SHOULD also be the connection. The local port for such SYNs <bcp14>SHOULD</bcp14> also b
same as for the first subflow (and as such, an e the
implementation SHOULD reserve ephemeral ports across all same as the port for the first subflow (and as such, an
implementation <bcp14>SHOULD</bcp14> reserve ephemeral ports across al
l
local IP addresses), although there may be cases where this local IP addresses), although there may be cases where this
is infeasible. This strategy is intended to maximize the is infeasible. This strategy is intended to maximize the
probability of the SYN being permitted by a firewall or NAT probability of the SYN being permitted by a firewall or NAT
at the recipient and to avoid confusing any network at the recipient and to avoid confusing any network-monitoring softwar
monitoring software.</t> e.</t>
<t pn="section-3.9.1-2">There may also be cases, however, where a host
<t>There may also be cases, however, where a host wishes to wishes to
signal that a specific port should be used, and this facility signal that a specific port should be used; this facility
is provided in the ADD_ADDR option as documented in is provided in the ADD_ADDR option as documented in
<xref target="sec_add_address"/>. It is therefore feasible <xref target="sec_add_address" format="default" sectionFormat="of" der ivedContent="Section 3.4.1"/>. It is therefore feasible
to allow multiple subflows between the same two addresses to allow multiple subflows between the same two addresses
but using different port pairs, and but using different port pairs, and
such a facility could be used to allow load balancing within such a facility could be used to allow load balancing within
the network based on 5-tuples (e.g., some ECMP implementations <xref t arget="RFC2992"/>).</t> the network based on 5-tuples (e.g., some ECMP implementations <xref t arget="RFC2992" format="default" sectionFormat="of" derivedContent="RFC2992"/>). </t>
</section> </section>
<section numbered="true" toc="include" removeInRFC="false" pn="section-3
<section title="Delayed Subflow Start and Subflow Symmetry"> .9.2">
<t>Many TCP connections are short-lived and consist only of a few <name slugifiedName="name-delayed-subflow-start-and-s">Delayed Subflow
segments, and so the overheads Start and Subflow Symmetry</name>
of using MPTCP outweigh any benefits. A heuristic is required, <t pn="section-3.9.2-1">Many TCP connections are short-lived and consi
st only of a few
segments, and so the overhead
of using MPTCP outweighs any benefits. A heuristic is required,
therefore, to decide when to start using additional subflows in therefore, to decide when to start using additional subflows in
an MPTCP connection. Experimental deployments have shown that an MPTCP connection. Experimental deployments have shown that
MPTCP can be applied in a range of scenarios so an implementation MPTCP can be applied in a range of scenarios, so an implementation
is likely to need to take into account factors including the type of will likely need to take into account such factors as the type of
traffic being sent and duration of session, and this information traffic being sent and the duration of the session; this information
MAY be signalled by the application layer.</t> <bcp14>MAY</bcp14> be signaled by the application layer.</t>
<t pn="section-3.9.2-2">However, for standard TCP traffic, a suggested
<t>However, for standard TCP traffic, a suggested general-purpose general-purpose
heuristic that an implementation MAY choose to employ is as follows.</ heuristic that an implementation <bcp14>MAY</bcp14> choose to employ i
t> s as follows.</t>
<t pn="section-3.9.2-3">If a host has data buffered for its peer (whic
<t>If a host has data buffered for its peer (which implies that the h implies that the
application has received a request for data), the host opens one application has received a request for data), the host opens one
subflow for each initial window's worth of data that is buffered.</t> subflow for each initial window's worth of data that is buffered.</t>
<t pn="section-3.9.2-4">Consideration should also be given to limiting
<t>Consideration should also be given to limiting the rate of adding the rate of adding
new subflows, as well as limiting the total number of subflows open new subflows, as well as limiting the total number of subflows open
for a particular connection. A host may choose to vary these values for a particular connection. A host may choose to vary these values
based on its load or knowledge of traffic and path characteristics.</t > based on its load or knowledge of traffic and path characteristics.</t >
<t pn="section-3.9.2-5">Note that this heuristic alone is probably ins
<t>Note that this heuristic alone is probably insufficient. Traffic ufficient. Traffic
for many common applications, such as downloads, is highly asymmetric for many common applications, such as downloads, is highly asymmetric,
and and
the host that is multihomed may well be the client that will never fil l the host that is multihomed may well be the client that will never fil l
its buffers, and thus never use MPTCP according to this heuristic. Adv anced APIs that allow an its buffers and thus never use MPTCP according to this heuristic. Adva nced APIs that allow an
application to signal its traffic requirements would aid in these deci sions.</t> application to signal its traffic requirements would aid in these deci sions.</t>
<t pn="section-3.9.2-6">An additional time-based heuristic could be ap
<t>An additional time-based heuristic could be applied, opening additi plied, opening additional
onal
subflows after a given period of time has passed. This would alleviate the subflows after a given period of time has passed. This would alleviate the
above issue, and also provide resilience for low-bandwidth but long-li ved above issue and also provide resilience for low‑bandwidth but long-liv ed
applications.</t> applications.</t>
<t pn="section-3.9.2-7">Another issue is that both communicating hosts
<t>Another issue is that both communicating hosts may simultaneously t may simultaneously try to
ry to set up a subflow between the same pair of addresses. This leads to an
set up a subflow between the same pair of addresses. This leads to an inefficient use of resources.</t>
inefficient use of resources.</t> <t pn="section-3.9.2-8">If the same ports are used on all subflows, as
recommended above,
<t>If the same ports are used on all subflows, as recommended above, then standard TCP simultaneous-open logic should take care of this sit
then standard TCP simultaneous open logic should take care of this sit uation
uation
and only one subflow will be established between the address pairs. Ho wever, and only one subflow will be established between the address pairs. Ho wever,
this relies on the same ports being used at both end hosts. If a host does this relies on the same ports being used at both end hosts. If a host does
not support TCP simultaneous open, it is RECOMMENDED that some element not support TCP simultaneous open, it is <bcp14>RECOMMENDED</bcp14> th
of randomization is applied to the time to wait before opening new sub at some element
flows, of randomization be applied to the time to wait before opening new sub
flows,
so that only one subflow is created between a given address pair. If, however, so that only one subflow is created between a given address pair. If, however,
hosts signal additional ports to use (for example, for leveraging ECMP on-path), hosts signal additional ports to use (for example, for leveraging ECMP on-path),
this heuristic is not appropriate.</t> this heuristic is not appropriate.</t>
<t pn="section-3.9.2-9">This section has shown some of the factors tha
<t>This section has shown some of the considerations that an implement t an implementer
er should consider when developing MPTCP heuristics, but it is not intend
should give when developing MPTCP heuristics, but is not intended to b ed to be
e
prescriptive.</t> prescriptive.</t>
</section> </section>
<section numbered="true" toc="include" removeInRFC="false" pn="section-3
<section title="Failure Handling"> .9.3">
<t>Requirements for MPTCP's handling of unexpected signals have been <name slugifiedName="name-failure-handling">Failure Handling</name>
given in <xref target="sec_errors"/>. There are other failure cases, <t pn="section-3.9.3-1">Requirements for MPTCP's handling of unexpecte
however, where a hosts can choose appropriate behavior.</t> d signals are
given in <xref target="sec_errors" format="default" sectionFormat="of"
<t>For example, <xref target="sec_init"/> suggests that a host SHOULD derivedContent="Section 3.8"/>. There are other failure cases,
however, where hosts can choose appropriate behavior.</t>
<t pn="section-3.9.3-2">For example, <xref target="sec_init" format="d
efault" sectionFormat="of" derivedContent="Section 3.1"/> suggests that a host <
bcp14>SHOULD</bcp14>
fall back to trying regular TCP SYNs after one or more failures of MPT CP fall back to trying regular TCP SYNs after one or more failures of MPT CP
SYNs for a connection. A host may keep a system-wide cache of such SYNs for a connection. A host may keep a system-wide cache of such
information, so that it can back off from using MPTCP, firstly for tha t information, so that it can back off from using MPTCP, firstly for tha t
particular destination host, and eventually on a whole interface, if particular destination host and, eventually, on a whole interface, if
MPTCP connections continue failing. The duration of such a cache would MPTCP connections continue to fail. The duration of such a cache would
be implementation-specific.</t> be implementation specific.</t>
<t pn="section-3.9.3-3">Another failure could occur when the MP_JOIN h
<t>Another failure could occur when the MP_JOIN handshake fails. andshake fails.
<xref target="sec_errors"/> specifies that an incorrect handshake MUST <xref target="sec_errors" format="default" sectionFormat="of" derivedC
ontent="Section 3.8"/> specifies that an incorrect handshake <bcp14>MUST</bcp14>
lead to the subflow being closed with a RST. A host operating an activ e lead to the subflow being closed with a RST. A host operating an activ e
intrusion detection system may choose to start blocking MP_JOIN packet s intrusion-detection system may choose to start blocking MP_JOIN packet s
from the source host if multiple failed MP_JOIN attempts are seen. Fro m from the source host if multiple failed MP_JOIN attempts are seen. Fro m
the connection initiator's point of view, if an MP_JOIN fails, it SHOU the connection initiator's point of view, if an MP_JOIN fails, it
LD <bcp14>SHOULD NOT</bcp14>
NOT attempt to connect to the same IP address and port during the life attempt to connect to the same IP address and port during the lifetime
time
of the connection, unless the other host refreshes the information wit h of the connection, unless the other host refreshes the information wit h
another ADD_ADDR option. Note that the ADD_ADDR option is informationa l another ADD_ADDR option. Note that the ADD_ADDR option is informationa l
only, and does not guarantee the other host will attempt a connection. only and does not guarantee that the other host will attempt a connect
</t> ion.</t>
<t pn="section-3.9.3-4">In addition, an implementation may learn, over
<t>In addition, an implementation may learn, over a number of connecti a number of connections,
ons,
that certain interfaces or destination addresses consistently fail and that certain interfaces or destination addresses consistently fail and
may default to not trying to use MPTCP for these. Behavior could also may default to not trying to use MPTCP for such interfaces or
be learned for particularly badly performing subflows or subflows that addresses. The behavior of subflows that perform particularly badly
regularly fail during use, in order to temporarily choose not to use or subflows that regularly fail during use could also
be learned, so that an implementation can temporarily choose not to us
e
these paths.</t> these paths.</t>
</section> </section>
</section> </section>
</section> </section>
<section anchor="sec_semantics" numbered="true" toc="include" removeInRFC="f
<section title="Semantic Issues" anchor="sec_semantics"> alse" pn="section-4">
<t>In order to support multipath operation, the semantics of some TCP comp <name slugifiedName="name-semantic-issues">Semantic Issues</name>
onents have changed. To aid clarity, this section collects these semantic change <t pn="section-4-1">In order to support multipath operation, the semantics
s as a reference. of some TCP
<list style="hanging"> components have changed. To help clarify, this section lists these
<t hangText="Sequence number:"> The (in-header) TCP sequence semantic changes as a point of reference.
</t>
<dl newline="false" spacing="normal" indent="3" pn="section-4-2">
<dt pn="section-4-2.1">Sequence number:</dt>
<dd pn="section-4-2.2"> The (in-header) TCP sequence
number is specific to the subflow. To allow the receiver to number is specific to the subflow. To allow the receiver to
reorder application data, an additional data-level reorder application data, an additional data-level
sequence space is used. In this data-level sequence space, the initi sequence space is used. In this data‑level sequence space, the initi
al SYN and al SYN and
the final DATA_FIN occupy 1 octet of sequence space. This is to ensu the final DATA_FIN occupy 1 octet of sequence space. This is done to
re these ensure that these
signals are acknowledged at the connection level. There is an explic it signals are acknowledged at the connection level. There is an explic it
mapping of data sequence space to subflow sequence space, mapping of data sequence space to subflow sequence space,
which is signaled through TCP options in data which is signaled through TCP options in data
packets.</t> packets.</dd>
<dt pn="section-4-2.3">ACK:</dt>
<t hangText="ACK:"> The ACK field in the TCP header <dd pn="section-4-2.4"> The ACK field in the TCP header
acknowledges only the subflow sequence number, not the acknowledges only the subflow sequence number -- not the
data-level sequence space. Implementations SHOULD NOT data-level sequence space. Implementations <bcp14>SHOULD NOT</bcp14>
attempt to infer a data-level acknowledgment from the attempt to infer a data-level acknowledgment from the
subflow ACKs. subflow ACKs.
This separates subflow- and connection-level processing This separates subflow-level and connection-level processing
at an end host.</t> at an end host.</dd>
<dt pn="section-4-2.5">Duplicate ACK:</dt>
<t hangText="Duplicate ACK:"> A duplicate ACK that includes any MPTCP <dd pn="section-4-2.6"> A duplicate ACK that includes any MPTCP signalin
signaling g
(with the exception of the DSS option) MUST NOT be treated as a sign (with the exception of the DSS option) <bcp14>MUST NOT</bcp14> be tr
al of congestion. eated as a signal of congestion.
To limit the chances of non-MPTCP-aware entities mistakenly interpre ting duplicate To limit the chances of non-MPTCP-aware entities mistakenly interpre ting duplicate
ACKs as a signal of congestion, MPTCP SHOULD NOT send more than two ACKs as a signal of congestion, MPTCP <bcp14>SHOULD NOT</bcp14> send
duplicate ACKs more than two duplicate ACKs
containing (non-DSS) MPTCP signals in a row.</t> containing (non-DSS) MPTCP signals in a row.</dd>
<dt pn="section-4-2.7">Receive Window:</dt>
<t hangText="Receive Window:">The receive window in the TCP <dd pn="section-4-2.8">The receive window in the TCP
header indicates the amount of free buffer space for the header indicates the amount of free buffer space for the
whole data-level connection (as opposed to for this whole data-level connection (as opposed to the amount of space for t
subflow) that is available at the receiver. This is the his
same semantics as regular TCP, but to maintain these subflow) that is available at the receiver. The
semantics are the same as for regular TCP, but to maintain these
semantics the receive window must be interpreted at the semantics the receive window must be interpreted at the
sender as relative to the sequence number given in the sender as relative to the sequence number given in the
DATA_ACK rather than the subflow ACK in the TCP header. DATA_ACK rather than the subflow ACK in the TCP header.
In this way, the original flow control role is preserved. In this way, the original role of flow control is preserved.
Note that some middleboxes may change the receive window, Note that some middleboxes may change the receive window,
and so a host SHOULD use the maximum value of those recently and so a host <bcp14>SHOULD</bcp14> use the maximum value of those r ecently
seen on the constituent subflows for the connection-level seen on the constituent subflows for the connection-level
receive window, and also needs to maintain a subflow-level receive window and also needs to maintain a subflow-level
window for subflow-level processing.</t> window for subflow-level processing.</dd>
<dt pn="section-4-2.9">FIN:</dt>
<t hangText="FIN:"> The FIN flag in the TCP header applies <dd pn="section-4-2.10"> The FIN flag in the TCP header applies
only to the subflow it is sent on, not to the whole only to the subflow it is sent on -- not to the whole
connection. For connection-level FIN semantics, the connection. For connection-level FIN semantics, the
DATA_FIN option is used.</t> DATA_FIN option is used.</dd>
<dt pn="section-4-2.11">RST:</dt>
<t hangText="RST:"> The RST flag in the TCP header applies <dd pn="section-4-2.12"> The RST flag in the TCP header applies
only to the subflow it is sent on, not to the whole only to the subflow it is sent on -- not to the whole
connection. The MP_FASTCLOSE option provides the fast close connection. The MP_FASTCLOSE option provides the Fast Close
functionality of a RST at the MPTCP connection level.</t> functionality of a RST at the MPTCP connection level.</dd>
<dt pn="section-4-2.13">Address List:</dt>
<t hangText="Address List:"> Address list management (i.e., <dd pn="section-4-2.14"> Address list management (i.e.,
knowledge of the local and remote hosts' lists of knowledge of the local and remote hosts' lists of
available IP addresses) is handled available IP addresses) is handled
on a per-connection basis (as opposed to per subflow, per on a per-connection basis (as opposed to per subflow, per
host, or per pair of communicating hosts). This permits host, or per pair of communicating hosts). This permits
the application of per-connection local policy. Adding an the application of per-connection local policy. Adding an
address to one connection (either explicitly through an Add address to one connection (either explicitly through an
Address message, or implicitly through a Join) has no implication ADD_ADDR message or implicitly through an MP_JOIN) has no implicatio
for other connections between the same pair of hosts.</t> ns
for other connections between the same pair of hosts.</dd>
<t hangText="5-tuple:"> The 5-tuple (protocol, local <dt pn="section-4-2.15">5-tuple:</dt>
<dd pn="section-4-2.16"> The 5-tuple (protocol, local
address, local port, remote address, remote port) address, local port, remote address, remote port)
presented by kernel APIs to the application layer in a presented by kernel APIs to the application layer in a
non-multipath-aware application is that of the first non-multipath-aware application is that of the first
subflow, even if the subflow has since been closed and subflow, even if the subflow has since been closed and
removed from the connection. This decision, and other removed from the connection. This decision, and other
related API issues, are discussed in more detail in related API issues, are discussed in more detail in
<xref target="RFC6897"/>.</t> <xref target="RFC6897" format="default" sectionFormat="of" derivedCo
</list> ntent="RFC6897"/>.</dd>
</t> </dl>
</section> </section>
<section anchor="sec_security" numbered="true" toc="include" removeInRFC="fa
<section title="Security Considerations" anchor="sec_security"> lse" pn="section-5">
<t>As identified in <xref target="RFC6181"/>, the addition of multipath ca <name slugifiedName="name-security-considerations">Security Considerations
pability to TCP will bring with it a number of new classes of threat. In order t </name>
o prevent these, <xref target="RFC6182"/> presents a set of requirements for a s <t pn="section-5-1">As identified in <xref target="RFC6181" format="defaul
ecurity solution for MPTCP. The fundamental goal is for the security of MPTCP to t" sectionFormat="of" derivedContent="RFC6181"/>, the
be "no worse" than regular TCP today, and the key security requirements are: addition of multipath capability to TCP will bring with it a number of
<list style="symbols"> new classes of threats. In order to prevent these threats, <xref target="R
<t>Provide a mechanism to confirm that the parties in a subflow handsh FC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> presents
ake are the same as in the original connection setup.</t> a set of requirements for a security
<t>Provide verification that the peer can receive traffic at a new add solution for MPTCP. The fundamental goal is for the security of MPTCP to
ress before using it as part of a connection.</t> be "no worse" than regular TCP today. The key security requirements
<t>Provide replay protection, i.e., ensure that a request to add/remov are as follows:
e a subflow is 'fresh'.</t> </t>
</list> <ul spacing="normal" bare="false" empty="false" pn="section-5-2">
<li pn="section-5-2.1">Provide a mechanism to confirm that the parties i
In order to achieve these goals, MPTCP includes a hash-based handshake a n a subflow
lgorithm documented in Sections <xref target="sec_init" format="counter"/> and < handshake are the same as the parties in the original connection setup.<
xref target="sec_join" format="counter"/>.</t> /li>
<li pn="section-5-2.2">Provide verification that the peer can receive tr
<t>The security of the MPTCP connection hangs on the use of keys that are affic at a new address before using it as part of a connection.</li>
shared once at the start of the first subflow, and are never sent again over the <li pn="section-5-2.3">Provide replay protection, i.e., ensure that a re
network (unless used in the fast close mechanism, <xref target="sec_fastclose"/ quest to add⁠/remove a subflow is "fresh".</li>
>). To ease demultiplexing while not giving away any cryptographic material, fu </ul>
ture subflows use a truncated cryptographic hash of this key as the connection i <t pn="section-5-3">
dentification "token". The keys are concatenated and used as keys for creating In order to achieve these goals, MPTCP includes a hash-based handshake
Hash-based Message Authentication Codes (HMACs) used on subflow setup, in order algorithm, as documented in Sections <xref target="sec_init" format="count
to verify that the parties in the handshake are the same as in the original conn er" sectionFormat="of" derivedContent="3.1"/> and <xref target="sec_join" format
ection setup. It also provides verification that the peer can receive traffic a ="counter" sectionFormat="of" derivedContent="3.2"/>.</t>
t this new address. Replay attacks would still be possible when only keys are u <t pn="section-5-4">The security of the MPTCP connection hangs on the use
sed; therefore, the handshakes use single-use random numbers (nonces) at both en of keys that
ds -- this ensures the HMAC will never be the same on two handshakes. Guidance o are shared once at the start of the first subflow and are never sent
n generating random numbers suitable for use as keys is given in <xref target="R again over the network (unless used in the Fast Close mechanism (<xref tar
FC4086"/> and discussed in <xref target="sec_init"/>. The nonces are valid for t get="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section
he lifetime of the TCP connection attempt. HMAC is also used to secure the ADD_A 3.5"/>)). To ease demultiplexing
DDR option, due to the threats identified in <xref target="RFC7430"/>.</t> while not giving away any cryptographic material, future subflows use a
<t>The use of crypto capability bits in the initial connection handshake t truncated cryptographic hash of this key as the connection
o negotiate use of a particular algorithm allows the deployment of additional cr identification "token". The keys are concatenated and used as keys for
ypto mechanisms in the future. This negotiation would nevertheless be susceptib creating Hash-based Message Authentication Codes (HMACs) used on subflow
le to a bid-down attack by an on-path active attacker who could modify the crypt setup, in order to verify that the parties in the handshake are the same
o capability bits in the response from the receiver to use a less secure crypto as the parties in the original connection setup. It also provides verific
mechanism. The security mechanism presented in this document should therefore pr ation that
otect against all forms of flooding and hijacking attacks discussed in <xref tar the peer can receive traffic at this new address. Replay attacks would
get="RFC6181"/>.</t> still be possible when only keys are used; therefore, the handshakes use
single-use random numbers (nonces) at both ends -- this ensures that the H
<t>The version negotiation specified in <xref target="sec_init"/>, if diff MAC will never be the same on two handshakes. Guidance on generating random numb
ering MPTCP versions shared a common negotiation format, would allow an on-path ers suitable for use as keys is given in <xref target="RFC4086" format="default"
attacker to apply a theoretical bid-down attack. Since the v1 and v0 protocols h sectionFormat="of" derivedContent="RFC4086"/> and discussed in <xref target="se
ave a different handshake, such an attack would require the client to re-establi c_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>. The
sh the connection using v0, and this being supported by the server. Note that an nonces are valid for the lifetime of the TCP connection attempt. HMAC is also us
on-path attacker would have access to the raw data, negating any other TCP-leve ed to secure the ADD_ADDR option, due to the threats identified in <xref target=
l security mechanisms. "RFC7430" format="default" sectionFormat="of" derivedContent="RFC7430"/>.</t>
Also a change from RFC6824 has removed the subflow identifier from the MP_ <t pn="section-5-5">The use of crypto capability bits in the initial conne
PRIO option (<xref target="sec_policy"/>), to remove the theoretical attack wher ction handshake
e a subflow could be placed in "backup" mode by an attacker.</t> to negotiate the use of a particular algorithm allows the deployment of ad
ditional crypto mechanisms in the future. This negotiation would nevertheless b
<t>During normal operation, regular TCP protection mechanisms (such as ens e susceptible to a bid-down attack by an on-path active attacker who could modif
uring sequence numbers are in-window) will provide the same level of protection y the crypto capability bits in the response from the receiver to use a less sec
against attacks on individual TCP subflows as exists for regular TCP today. Impl ure crypto mechanism. The security mechanism presented in this document should t
ementations will introduce additional buffers compared to regular TCP, to reasse herefore protect against all forms of flooding and hijacking attacks discussed i
mble data at the connection level. The application of window sizing will minimiz n <xref target="RFC6181" format="default" sectionFormat="of" derivedContent="RFC
e the risk of denial-of-service attacks consuming resources.</t> 6181"/>.</t>
<t pn="section-5-6">The version negotiation specified in <xref target="sec
<t>As discussed in <xref target="sec_add_address"/>, a host may advertise _init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>, if di
its private addresses, but these might point to different hosts in the receiver' ffering MPTCP versions shared a common
s network. The MP_JOIN handshake (<xref target="sec_join"/>) will ensure that th negotiation format, would allow an on-path attacker to apply a
is does not succeed in setting up a subflow to the incorrect host. However, it c theoretical bid-down attack. Since the v1 and v0 protocols have a
ould still create unwanted TCP handshake traffic. This feature of MPTCP could be different handshake, such an attack would require that the client
a target for denial-of-service exploits, with malicious participants in MPTCP c re-establish the connection using v0 and that the server support v0.
onnections encouraging the recipient to target other hosts in the network. There Note that an on-path attacker would have access to the raw data, negating any o
fore, implementations should consider heuristics (<xref target="heuristics"/>) a ther TCP-level security mechanisms. As also noted in <xref target="app_changelog
t both the sender and receiver to reduce the impact of this.</t> " format="default" sectionFormat="of" derivedContent="Appendix E"/>, this docume
nt specifies the removal of the AddrID field <xref target="RFC6824" format="defa
<t>To further protect against malicious ADD_ADDR messages sent by an off-p ult" sectionFormat="of" derivedContent="RFC6824"/> in the MP_PRIO option (<xref
ath attacker, the ADD_ADDR includes an HMAC using the keys negotiated during the target="sec_policy" format="default" sectionFormat="of" derivedContent="Section
handshake. This effectively prevents an attacker from diverting an MPTCP connec 3.3.8"/>).
tion through an off-path ADD_ADDR injection into the stream.</t> This change eliminates the possibility of a theoretical attack where
a subflow could be placed in "backup" mode by an attacker.</t>
<t>A small security risk could theoretically exist with key reuse, but in <t pn="section-5-7">During normal operation, regular TCP protection mechan
order to accomplish a replay attack, both the sender and receiver keys, and the isms (such as
sender and receiver random numbers, in the MP_JOIN handshake (<xref target="sec_ ensuring that sequence numbers are in-window) will provide the same
join"/>) would have to match.</t> level of protection against attacks on individual TCP subflows as the
level of protection that exists for regular TCP today. Implementations wil
<t>Whilst this specification defines a "medium" security solution, meeting l introduce additional buffers compared to regular TCP, to reassemble data at th
the criteria specified at the start of this section and the threat analysis (<x e connection level. The application of window sizing will minimize the risk of d
ref target="RFC6181"/>), since attacks only ever get worse, it is likely that a enial-of-service attacks consuming resources.</t>
future version of MPTCP would need to be able to support stronger security. Ther <t pn="section-5-8">As discussed in <xref target="sec_add_address" format=
e are several ways the security of MPTCP could potentially be improved; some of "default" sectionFormat="of" derivedContent="Section 3.4.1"/>, a host may advert
these would be compatible with MPTCP as defined in this document, whilst others ise its private addresses, but these might point to different hosts in the recei
may not be. For now, the best approach is to get experience with the current app ver's network. The MP_JOIN handshake (<xref target="sec_join" format="default" s
roach, establish what might work, and check that the threat analysis is still ac ectionFormat="of" derivedContent="Section 3.2"/>) will ensure that this does not
curate.</t> succeed in setting up a subflow to the incorrect host. However, it could still
create unwanted TCP handshake traffic. This feature of MPTCP could be a target f
<t>Possible ways of improving MPTCP security could include:<list style="symbols" or denial-of-service exploits, with malicious participants in MPTCP connections
> encouraging the recipient to target other hosts in the network. Therefore, imple
<t>defining a new MPCTP cryptographic algorithm, as negotiated in MP_CAPABLE. A mentations should consider heuristics (<xref target="heuristics" format="default
sub-case could be to include an additional deployment assumption, such as statef " sectionFormat="of" derivedContent="Section 3.9"/>) at both the sender and rece
ul servers, in order to allow a more powerful algorithm to be used.</t> iver to reduce the impact of this.</t>
<t>defining how to secure data transfer with MPTCP, whilst not changing the sign <t pn="section-5-9">To further protect against malicious ADD_ADDR messages
aling part of the protocol.</t> sent by an off-path attacker, the ADD_ADDR includes an HMAC using the keys nego
<t>defining security that requires more option space, perhaps in conjunction wit tiated during the handshake. This effectively prevents an attacker from divertin
h a "long options" proposal for extending the TCP options space (such as those s g an MPTCP connection through an off-path ADD_ADDR injection into the stream.</t
urveyed in <xref target="TCPLO"/>), or perhaps building on the current approach >
with a second stage of MPTCP-option-based security.</t> <t pn="section-5-10">A small security risk could theoretically exist with
<t>revisiting the working group's decision to exclusively use TCP options for MP key reuse, but in order to accomplish a replay attack, both the sender and recei
TCP signaling, and instead look at also making use of the TCP payloads.</t> ver keys, and the sender and receiver random numbers, in the MP_JOIN handshake (
</list></t> <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Sect
ion 3.2"/>) would have to match.</t>
<t>MPTCP has been designed with several methods available to indicate a new secu <t pn="section-5-11">While this specification defines a "medium" security
rity mechanism, including: solution,
<list style="symbols"> meeting the criteria specified at the start of this section and in the
<t>available flags in MP_CAPABLE (<xref target="tcpm_capable"/>);</t> threat analysis document <xref target="RFC6181" format="default" sectionFo
<t>available subtypes in the MPTCP option (<xref target="fig_option"/>);</t> rmat="of" derivedContent="RFC6181"/>, since attacks
<t>the version field in MP_CAPABLE (<xref target="tcpm_capable"/>);</t> only ever get worse, it is likely that a future version of MPTCP would
</list></t> need to be able to support stronger security.
There are several ways the security of MPTCP could potentially be improved; som
e of these would be compatible with MPTCP as defined in this document, while oth
ers may not be. For now, the best approach is to gain experience with the curren
t approach, establish what might work, and check that the threat analysis is sti
ll accurate.</t>
<t pn="section-5-12">Possible ways of improving MPTCP security could inclu
de:</t>
<ul spacing="normal" bare="false" empty="false" pn="section-5-13">
<li pn="section-5-13.1">defining a new MPTCP cryptographic algorithm, as
negotiated in
MP_CAPABLE. If an implementation was being deployed in a controlled
environment where additional assumptions could be made, such as the
ability for the servers to store state during the TCP handshake, then
it may be possible to use a stronger cryptographic algorithm than
would otherwise be possible.</li>
<li pn="section-5-13.2">defining how to secure data transfer with MPTCP,
while not changing the signaling part of the protocol.</li>
<li pn="section-5-13.3">defining security that requires more option spac
e, perhaps in
conjunction with a "long options" proposal for extending the TCP
option space (such as those surveyed in <xref target="I-D.ananth-tcpm-tc
poptext" format="default" sectionFormat="of" derivedContent="TCPLO"/>), or perha
ps
building on the current approach with a second stage of
security based on MPTCP options.</li>
<li pn="section-5-13.4">revisiting the working group's decision to exclu
sively use TCP
options for MPTCP signaling and instead looking at the
possibility of using TCP payloads as well.</li>
</ul>
<t pn="section-5-14">MPTCP has been designed with several methods availabl
e to indicate a new security mechanism, including:
</t>
<ul spacing="normal" bare="false" empty="false" pn="section-5-15">
<li pn="section-5-15.1">available flags in MP_CAPABLE (<xref target="tcp
m_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>).</li
>
<li pn="section-5-15.2">available subtypes in the MPTCP option (<xref ta
rget="fig_option" format="default" sectionFormat="of" derivedContent="Figure 3"/
>).</li>
<li pn="section-5-15.3">the Version field in MP_CAPABLE (<xref target="t
cpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>).</
li>
</ul>
</section> </section>
<section anchor="sec_middleboxes" numbered="true" toc="include" removeInRFC=
<section title="Interactions with Middleboxes" anchor="sec_middleboxes"> "false" pn="section-6">
<name slugifiedName="name-interactions-with-middlebox">Interactions with M
<t>Multipath TCP was designed to be deployable in the present world. Its iddleboxes</name>
design takes into account "reasonable" <t pn="section-6-1">Multipath TCP was designed to be deployable in the pre
sent world. Its design takes into account "reasonable"
existing middlebox behavior. In this section, we outline a few representative mi ddlebox-related failure scenarios and existing middlebox behavior. In this section, we outline a few representative mi ddlebox-related failure scenarios and
show how Multipath TCP handles them. Next, we list the design decisions multipat show how Multipath TCP handles them. Next, we list the design decisions
h has made to accommodate the different Multipath TCP has made to accommodate the different
middleboxes.</t> middleboxes.</t>
<t pn="section-6-2">A primary concern is our use of a new TCP option. Midd
<t>A primary concern is our use of a new TCP option. Middleboxes should leboxes should forward packets
forward packets with unknown options unchanged, yet there are some that don't. We expect these
with unknown options unchanged, yet there are some that don't. These we expect w middleboxes to strip options and pass the data,
ill either strip options and pass the data,
drop packets with new options, copy the same option into multiple segments (e.g. , when doing segmentation), or drop drop packets with new options, copy the same option into multiple segments (e.g. , when doing segmentation), or drop
options during segment coalescing.</t> options during segment coalescing.</t>
<t pn="section-6-3">MPTCP uses a single new TCP option called "Kind", and
all message types are defined by "subtype" values (see <xref target="IANA" forma
t="default" sectionFormat="of" derivedContent="Section 7"/>). This should reduce
the chances of only some types of MPTCP options being passed; instead, the key
differing characteristics are different paths and the presence of the SYN flag.<
/t>
<t pn="section-6-4">MPTCP SYN packets on the first subflow of a connection
contain the MP_CAPABLE option (<xref target="sec_init" format="default" section
Format="of" derivedContent="Section 3.1"/>). If this is dropped, MPTCP <bcp14>SH
OULD</bcp14> fall back to regular TCP. If packets with the MP_JOIN option (<xref
target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3
.2"/>) are dropped, the paths will simply not be used.</t>
<t pn="section-6-5">If a middlebox strips options but otherwise passes the
packets
unchanged, MPTCP will behave safely. If an MP_CAPABLE option is dropped
on either the outgoing path or the return path, the initiating host can
fall back to regular TCP, as illustrated in <xref target="fig_syn" format=
"default" sectionFormat="of" derivedContent="Figure 17"/> and discussed in <xref
target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3
.1"/>.</t>
<figure anchor="fig_syn" align="left" suppress-title="false" pn="figure-17
">
<name slugifiedName="name-connection-setup-with-middl">Connection Setup
with Middleboxes That Strip Options from Packets</name>
<artwork align="left" name="" type="" alt="" pn="section-6-6.1">
Host A Host B
| Middlebox M |
| | |
| SYN (MP_CAPABLE) | SYN |
|-------------------|----------------&gt;|
| SYN/ACK |
|&lt;------------------------------------|
a) MP_CAPABLE option stripped on outgoing path
<t>MPTCP uses a single new TCP option "Kind", and all message types are Host A Host B
defined by "subtype" values (see <xref target="IANA"/>). This should reduce the | SYN (MP_CAPABLE) |
chances of only some types of MPTCP options being passed, and instead the key di |--------------------------------------&gt;|
ffering characteristics are different paths, and the presence of the SYN flag.</ | Middlebox M |
t> | | |
| SYN/ACK |SYN/ACK (MP_CAPABLE)|
<t>MPTCP SYN packets on the first subflow of a connection contain the MP |&lt;-----------------|--------------------|
_CAPABLE option (<xref target="sec_init"/>). If this is dropped, MPTCP SHOULD fa b) MP_CAPABLE option stripped on return path </artwork>
ll back to regular TCP. If packets with the MP_JOIN option (<xref target="sec_jo </figure>
in"/>) are dropped, the paths will simply not be used.</t> <t pn="section-6-7">Subflow SYNs contain the MP_JOIN option. If this optio
n is stripped on the outgoing path,
<t>If a middlebox strips options but otherwise passes the packets unchan the SYN will appear to be a regular SYN to Host B. Depending on whether there i
ged, MPTCP will behave safely. If an MP_CAPABLE option is dropped on either the s a listening socket on
outgoing or the return path, the initiating host can fall back to regular TCP, a the target port, Host B will reply with either a SYN/ACK or a RST (subflow conne
s illustrated in <xref target="fig_syn"/> and discussed in <xref target="sec_ini ction fails). When Host A
t"/>.</t> receives the SYN/ACK, it sends a RST because the SYN/ACK does not contain the MP
_JOIN option and its token.
<t>Subflow SYNs contain the MP_JOIN option. If this option is stripped Either way, the subflow setup fails but otherwise does not affect the MPTCP conn
on the outgoing path, ection as a whole.</t>
the SYN will appear to be a regular SYN to Host B.&nbsp; Depending on whether th <t pn="section-6-8">We now examine data flow with MPTCP, assuming that the
ere is a listening socket on flow is
the target port, Host B will reply either with SYN/ACK or RST (subflow connectio correctly set up, which implies that the options in the SYN
n fails). When Host A
receives the SYN/ACK it sends a RST because the SYN/ACK does not contain the MP_
JOIN option and its token.
Either way, the subflow setup fails, but otherwise does not affect the MPTCP con
nection as a whole.</t>
<figure align="center" anchor="fig_syn" title="Connection Setup with Mid
dleboxes that Strip Options from Packets">
<artwork align="left"><![CDATA[
Host A Host B
| Middlebox M |
| | |
| SYN(MP_CAPABLE) | SYN |
|-------------------|---------------->|
| SYN/ACK |
|<------------------------------------|
a) MP_CAPABLE option stripped on outgoing path
Host A Host B
| SYN(MP_CAPABLE) |
|------------------------------------>|
| Middlebox M |
| | |
| SYN/ACK |SYN/ACK(MP_CAPABLE)|
|<----------------|-------------------|
b) MP_CAPABLE option stripped on return path
]]></artwork>
</figure>
<t>We now examine data flow with MPTCP, assuming the flow is correctly s
et up, which implies the options in the SYN
packets were allowed through by the relevant middleboxes. If options are allowed through and there is no resegmentation or packets were allowed through by the relevant middleboxes. If options are allowed through and there is no resegmentation or
coalescing to TCP segments, Multipath TCP flows can proceed without problems.</t > coalescing to TCP segments, Multipath TCP flows can proceed without problems.</t >
<t pn="section-6-9">The case when options get stripped on data packets is
<t>The case when options get stripped on data packets has been discussed discussed
in the Fallback section. in <xref target="sec_fallback" format="default" sectionFormat="of" derived
If only some MPTCP options are stripped, behavior is not deterministic. Content="Section 3.7"/>.
If some data sequence mappings are lost, the connection can continue so long as If only some MPTCP options are stripped, behavior is not deterministic.
mappings exist for the subflow-level data (e.g., if multiple maps have been sent If some Data Sequence Mappings are lost, the connection can continue so long as
that reinforce each other). If some subflow-level space is left unmapped, howev mappings exist for the subflow-level data (e.g., if multiple maps have been sent
er, the subflow is treated as broken and is closed, through the process describe that reinforce each other). If some subflow-level space is left unmapped, howev
d in <xref target="sec_fallback"/>. MPTCP should survive with a loss of some Dat er, the subflow is treated as broken and is closed, using the process described
a ACKs, but performance will degrade as the fraction of stripped options increas in <xref target="sec_fallback" format="default" sectionFormat="of" derivedConten
es. t="Section 3.7"/>. MPTCP should survive with a loss of some Data ACKs, but perfo
rmance will degrade as the fraction of stripped options increases.
We do not expect such cases to appear in practice, though: most We do not expect such cases to appear in practice, though: most
middleboxes will either strip all options or let them all through.</t> middleboxes will either strip all options or let them all through.</t>
<t pn="section-6-10">We end this section with a list of middlebox classes,
<t>We end this section with a list of middlebox classes, their behavior, their behavior, and the elements in the MPTCP design
and the elements in the MPTCP design
that allow operation through such middleboxes. Issues surrounding dropping packe ts with options that allow operation through such middleboxes. Issues surrounding dropping packe ts with options
or stripping options were discussed above, and are not included here: or stripping options were discussed above and are not included here:
<list style="symbols"> </t>
<t>NATs <xref target="RFC3022"/> (Network Address (and Port) Translato <ul spacing="normal" bare="false" empty="false" pn="section-6-11">
rs) change the source address (and often source port) of packets. This means tha <li pn="section-6-11.1">NATs (Network Address (and port) Translators) <x
t a host will not know its ref target="RFC3022" format="default" sectionFormat="of" derivedContent="RFC3022
"/> change the source address (and
often the source port) of packets. This means that a host will not know
its
public-facing address for signaling in MPTCP. Therefore, MPTCP permits impli cit address addition via the MP_JOIN option, public-facing address for signaling in MPTCP. Therefore, MPTCP permits impli cit address addition via the MP_JOIN option,
and the handshake mechanism ensures that connection attempts to private addr and the handshake mechanism ensures that connection attempts to private addr
esses <xref target="RFC1918"/>, since they are authenticated, will only set up s esses <xref target="RFC1918" format="default" sectionFormat="of" derivedContent=
ubflows to the correct hosts. "RFC1918"/>, since they are authenticated, will only set up subflows to the corr
Explicit address removal is undertaken by an Address ID to allow no knowledg ect hosts.
e of the source address.</t> Explicit address removal is undertaken by an Address ID to allow no knowledg
e of the source address.</li>
<t>Performance Enhancing Proxies (PEPs) <xref target="RFC3135"/> might <li pn="section-6-11.2">Performance Enhancing Proxies (PEPs) <xref targe
proactively ACK data to increase performance. MPTCP, however, relies on accurat t="RFC3135" format="default" sectionFormat="of" derivedContent="RFC3135"/> might
e congestion control signals from the end host, and non-MPTCP-aware PEPs will no proactively ACK data to increase performance. MPTCP, however, relies on accurat
t be able to provide such signals. MPTCP will, therefore, fall back to single-pa e congestion control signals from the end host, and non‑MPTCP-aware PEPs will no
th TCP, or close the problematic subflow (see <xref target="sec_fallback"/>).</t t be able to provide such signals. MPTCP will, therefore, fall back to single-pa
> th TCP or close the problematic subflow (see <xref target="sec_fallback" format=
"default" sectionFormat="of" derivedContent="Section 3.7"/>).</li>
<t>Traffic Normalizers <xref target="norm"/> may not allow holes in se <li pn="section-6-11.3">Traffic normalizers <xref target="norm" format="
quence numbers, and may cache packets and retransmit the same data. default" sectionFormat="of" derivedContent="norm"/> may not
MPTCP looks like standard TCP on the wire, and will not retransmit different dat allow holes in sequence numbers, and they may cache packets and retransm
a on the same subflow sequence number. In the event of a retransmission, the sam it the same data.
e data will be retransmitted on the original TCP subflow even if it is additiona MPTCP looks like standard TCP on the wire and will not retransmit different data
lly retransmitted at the connection level on a different subflow.</t> on the same subflow sequence number. In the event of a retransmission, the same
data will be retransmitted on the original TCP subflow even if it is additional
<t>Firewalls <xref target="RFC2979"/> might perform initial sequence n ly retransmitted at the connection level on a different subflow.</li>
umber randomization on TCP connections. MPTCP uses relative <li pn="section-6-11.4">Firewalls <xref target="RFC2979" format="default
sequence numbers in data sequence mapping to cope with this. Like NATs, firewall " sectionFormat="of" derivedContent="RFC2979"/> might perform
s will not permit many incoming connections, so Initial Sequence Number (ISN) randomization on TCP connections. MPTCP us
es relative
sequence numbers in Data Sequence Mappings to cope with this. Like NATs, firewal
ls will not permit many incoming connections, so
MPTCP supports address signaling (ADD_ADDR) so that a multiaddressed host can in vite its peer behind the firewall/NAT to connect MPTCP supports address signaling (ADD_ADDR) so that a multiaddressed host can in vite its peer behind the firewall/NAT to connect
out to its additional interface.</t> out to its additional interface.</li>
<li pn="section-6-11.5">Intrusion Detection Systems / Intrusion Preventi
<t>Intrusion Detection/Prevention Systems (IDS/IPS) observe packet str on Systems (IDSs⁠/IPSs) observe packet streams for patterns and content that cou
eams for patterns and content that could threaten a network. MPTCP may require t ld threaten a network. MPTCP may require the
he instrumentation of additional paths, and an MPTCP-aware IDS or IPS would need to
instrumentation of additional paths, and an MPTCP-aware IDS/IPS would need to re read MPTCP tokens to correlate data from multiple subflows to maintain comparab
ad MPTCP tokens to correlate data from mutliple subflows to maintain comparable le visibility into all of the traffic between devices. Without such changes, an
visibility into all of the traffic between devices. Without such changes, an IDS IDS would get an incomplete view of the traffic, increasing the risk of missing
would get an incomplete view of the traffic, increasing the risk of missing tra traffic of interest (false negatives) and increasing the chances of erroneously
ffic of interest (false negatives), and increasing the chances of erroneously id identifying a subflow as a risk due to only seeing partial data (false positives
entifying a subflow as a risk due to only seeing partial data (false positives). ).</li>
</t> <li pn="section-6-11.6">Application-level middleboxes such as content-aw
are firewalls may
<t>Application-level middleboxes such as content-aware firewalls may a alter the payload within a subflow -- for example, rewriting URIs in
lter the payload within a subflow, such as rewriting URIs in HTTP traffic. MPTCP HTTP traffic. MPTCP will detect such changes using the checksum
will detect these using the checksum and close the affected subflow(s), if there are other subflows that can be used.
and close the affected subflow(s), if there are other subflows that can be used. If all subflows are affected, MPTCP
If all subflows are affected, multipath will fall back to TCP, allowing such middleboxes to change the payload. MPTCP-aw
will fall back to TCP, allowing such middleboxes to change the payload. MPTCP-aw are middleboxes should be able to adjust the payload and MPTCP metadata in order
are middleboxes should be able to adjust the payload and MPTCP metadata in order not to break the connection.</li>
not to break the connection.</t> </ul>
</list> <t pn="section-6-12">
In addition, all classes of middleboxes may affect TCP traffic in the fo llowing ways: In addition, all classes of middleboxes may affect TCP traffic in the fo llowing ways:
<list style="symbols">
<t>TCP options may be removed, or packets with unknown options dropped
, by many classes of middleboxes. It is intended
that the initial SYN exchange, with a TCP option, will be sufficient to identify
the path capabilities. If such a packet does
not get through, MPTCP will end up falling back to regular TCP.</t>
<t>Segmentation/Coalescing (e.g., TCP segmentation offloading) might c
opy options between packets and might
strip some options. MPTCP's data sequence mapping includes the relative subflow
sequence number instead of using the sequence
number in the segment. In this way, the mapping is independent of the packets th
at carry it.</t>
<t>The receive window may be shrunk by some middleboxes at the subflow
level. MPTCP will use the maximum window at data level, but will also obey
subflow-specific windows.</t>
</list>
</t> </t>
<ul spacing="normal" bare="false" empty="false" pn="section-6-13">
</section> <li pn="section-6-13.1">TCP options may be removed, or packets with unkn
own options dropped, by many classes of middleboxes. It is intended
<section anchor="Acknowledgments" title="Acknowledgments"> that the initial SYN exchange, with a TCP option, will be sufficient to identify
<!-- <t>The authors were originally supported by Trilogy (http://www.trilo the path's capabilities. If such a packet does
gy-project.org), a research project (ICT-216372) partially funded by the Europea not get through, MPTCP will end up falling back to regular TCP.</li>
n Community under its Seventh Framework Program.</t> <li pn="section-6-13.2">Segmentation/coalescing (e.g., TCP segmentation
<t>Alan Ford was originally supported by Roke Manor Research and later Cis offloading) might copy options between packets and might
co Systems.</t> --> strip some options. MPTCP's Data Sequence Mapping includes the relative subflow
<t>The authors gratefully acknowledge significant input into this document sequence number instead of using the sequence
from S&eacute;bastien Barr&eacute; and Andrew McDonald.</t> number in the segment. In this way, the mapping is independent of the packets th
<t>The authors also wish to acknowledge reviews and contributions from Ilj at carry it.</li>
itsch van Beijnum, Lars Eggert, Marcelo Bagnulo, Robert Hancock, Pasi Sarolahti, <li pn="section-6-13.3">The receive window may be shrunk by some middleb
Toby Moncaster, Philip Eardley, Sergio Lembo, Lawrence Conroy, Yoshifumi Nishid oxes at the
a, Bob Briscoe, Stein Gjessing, Andrew McGregor, Georg Hampel, Anumita Biswas, W subflow level. MPTCP will use the maximum window at the data level but w
es Eddy, Alexey Melnikov, Francis Dupont, Adrian Farrel, Barry Leiba, Robert Spa ill also obey
rks, Sean Turner, Stephen Farrell, Martin Stiemerling, Gregory Detal, Fabien Duc subflow-specific windows.</li>
hene, Xavier de Foy, Rahul Jadhav, Klemens Schragel, Mirja Kuehlewind, Sheng Jia </ul>
ng, Alissa Cooper, Ines Robles, Roman Danyliw, Adam Roach, Barry Leiba, Alexey M
elnikov, Eric Vyncke, and Ben Kaduk.</t>
</section>
<section anchor="IANA" title="IANA Considerations">
<t>This document obsoletes RFC6824 and as such IANA is requested to update
the TCP option space registry to point to this document for Multipath TCP, as f
ollows:</t>
<texttable anchor="table_tcpo" title="TCP Option Kind Numbers">
<ttcol align="center">Kind</ttcol>
<ttcol align="center">Length</ttcol>
<ttcol align="center">Meaning</ttcol>
<ttcol align="center">Reference</ttcol>
<c>30</c>
<c>N</c>
<c>Multipath TCP (MPTCP)</c>
<c>This document</c>
</texttable>
<section anchor="IANA_subtypes" title="MPTCP Option Subtypes">
<t>The 4-bit MPTCP subtype sub-registry ("MPTCP Option Subtypes" under the
"Transmission Control Protocol (TCP) Parameters" registry) was defined in RFC68
24. Since RFC6824 was an Experimental not Standards Track RFC, and since no furt
her entries have occurred beyond those pointing to RFC6824, IANA is requested to
replace the existing registry with <xref target="table_iana"/> and with the fol
lowing explanatory note.</t>
<t>Note: This registry specifies the MPTCP Option Subtypes for MPTCP v1, w
hich obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please refe
r to RFC6824.</t>
<texttable anchor="table_iana" title="MPTCP Option Subtypes">
<ttcol align="center">Value</ttcol>
<ttcol align="center">Symbol</ttcol>
<ttcol align="center">Name</ttcol>
<ttcol align="center">Reference</ttcol>
<c>0x0</c>
<c>MP_CAPABLE</c>
<c>Multipath Capable</c>
<c>This document, <xref target="sec_init"/></c>
<c>0x1</c>
<c>MP_JOIN</c>
<c>Join Connection</c>
<c>This document, <xref target="sec_join"/></c>
<c>0x2</c>
<c>DSS</c>
<c>Data Sequence Signal (Data ACK and data sequence mapping)</c>
<c>This document, <xref target="sec_generalop"/></c>
<c>0x3</c>
<c>ADD_ADDR</c>
<c>Add Address</c>
<c>This document, <xref target="sec_add_address"/></c>
<c>0x4</c>
<c>REMOVE_ADDR</c>
<c>Remove Address</c>
<c>This document, <xref target="sec_remove_addr"/></c>
<c>0x5</c>
<c>MP_PRIO</c>
<c>Change Subflow Priority</c>
<c>This document, <xref target="sec_policy"/></c>
<c>0x6</c>
<c>MP_FAIL</c>
<c>Fallback</c>
<c>This document, <xref target="sec_fallback"/></c>
<c>0x7</c>
<c>MP_FASTCLOSE</c>
<c>Fast Close</c>
<c>This document, <xref target="sec_fastclose"/></c>
<c>0x8</c>
<c>MP_TCPRST</c>
<c>Subflow Reset</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0xf</c>
<c>MP_EXPERIMENTAL</c>
<c>Reserved for private experiments</c>
<c></c>
</texttable>
<t>Values 0x9 through 0xe are currently unassigned. Option 0xf is reserved
for use by private experiments. Its use may be formalized in a future specifica
tion. Future assignments in this registry are to be defined by Standards Action
as defined by <xref target="RFC8126"/>. Assignments consist of the MPTCP subtyp
e's symbolic name and its associated value, and a reference to its specification
.</t>
</section> </section>
<section anchor="IANA" numbered="true" toc="include" removeInRFC="false" pn=
<section anchor="IANA_handshake" title="MPTCP Handshake Algorithms"> "section-7">
<name slugifiedName="name-iana-considerations">IANA Considerations</name>
<t>The "MPTCP Handshake Algorithms" sub-registry under the "Transmission C <t pn="section-7-1">This document obsoletes <xref target="RFC6824" format=
ontrol Protocol (TCP) Parameters" registry was defined in RFC6824. Since RFC6824 "default" sectionFormat="of" derivedContent="RFC6824"/>. As such, IANA has updat
was an Experimental not Standards Track RFC, and since no further entries have ed
occurred beyond those pointing to RFC6824, IANA is requested to replace the exis several registries to point to this document. In addition, this document
ting registry with <xref target="table_crypto"/> and with the following explanat creates one new registry. These topics are described in the following sub
ory note.</t> sections.</t>
<section anchor="IANA-TCP-Option-Kind" numbered="true" toc="include" remov
<t>Note: This registry specifies the MPTCP Handshake Algorithms for MPTCP eInRFC="false" pn="section-7.1">
v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please <name slugifiedName="name-tcp-option-kind-numbers">TCP Option Kind Numbe
refer to RFC6824.</t> rs</name>
<t pn="section-7.1-1">IANA has
<texttable anchor="table_crypto" title="MPTCP Handshake Algorithms"> updated the "TCP Option Kind Numbers" registry to point to this document
<ttcol align="center">Flag Bit</ttcol> for Multipath TCP, as shown in <xref target="table_tcpo" format="default"
<ttcol align="center">Meaning</ttcol> sectionFormat="of" derivedContent="Table 1"/>:</t>
<ttcol align="center">Reference</ttcol> <table anchor="table_tcpo" align="center" pn="table-1">
<name slugifiedName="name-tcp-option-kind-numbers-2">TCP Option Kind N
<c>A</c> umbers</name>
<c>Checksum required</c> <thead>
<c>This document, <xref target="sec_init"/></c> <tr>
<th align="center" colspan="1" rowspan="1">Kind</th>
<c>B</c> <th align="center" colspan="1" rowspan="1">Length</th>
<c>Extensibility</c> <th align="center" colspan="1" rowspan="1">Meaning</th>
<c>This document, <xref target="sec_init"/></c> <th align="center" colspan="1" rowspan="1">Reference</th>
</tr>
<c>C</c> </thead>
<c>Do not attempt to establish new subflows to the source address.</c> <tbody>
<c>This document, <xref target="sec_init"/></c> <tr>
<td align="center" colspan="1" rowspan="1">30</td>
<c>D-G</c> <td align="center" colspan="1" rowspan="1">N</td>
<c>Unassigned</c> <td align="center" colspan="1" rowspan="1">Multipath TCP (MPTCP)</
<c></c> td>
<td align="center" colspan="1" rowspan="1">RFC 8684</td>
<c>H</c> </tr>
<c>HMAC-SHA256</c> </tbody>
<c>This document, <xref target="sec_join"/></c> </table>
</texttable> </section>
<section anchor="IANA_subtypes" numbered="true" toc="include" removeInRFC=
<t>Note that the meanings of bits D through H can be dependent upon bit B, "false" pn="section-7.2">
depending on how Extensibility is defined in future specifications; see <name slugifiedName="name-mptcp-option-subtypes">MPTCP Option Subtypes</
<xref target="sec_init"/> for more information.</t> name>
<t pn="section-7.2-1">The 4-bit MPTCP subtype in the "MPTCP Option Subty
<t>Future assignments in this registry are also pes"
to be defined by Standards Action as defined by <xref target="RFC8126"/>. subregistry under the "Transmission Control Protocol (TCP) Parameters"
registry was defined in <xref target="RFC6824" format="default" sectionF
ormat="of" derivedContent="RFC6824"/>. Since <xref target="RFC6824" format="defa
ult" sectionFormat="of" derivedContent="RFC6824"/> is an
Experimental RFC and not a Standards Track RFC, and since no further
entries have occurred beyond those pointing to <xref target="RFC6824" fo
rmat="default" sectionFormat="of" derivedContent="RFC6824"/>, IANA has
replaced the existing registry with the contents of
<xref target="table_iana" format="default" sectionFormat="of" derivedCon
tent="Table 2"/> and with the following
explanatory note.</t>
<t pn="section-7.2-2">Note: This registry specifies the MPTCP Option Sub
types for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0
subtypes, please refer to <xref target="RFC6824" format="default" sectionFormat=
"of" derivedContent="RFC6824"/>.</t>
<table anchor="table_iana" align="center" pn="table-2">
<name slugifiedName="name-mptcp-option-subtypes-2">MPTCP Option Subtyp
es</name>
<thead>
<tr>
<th align="center" colspan="1" rowspan="1">Value</th>
<th align="center" colspan="1" rowspan="1">Symbol</th>
<th align="center" colspan="1" rowspan="1">Name</th>
<th align="center" colspan="1" rowspan="1">Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" colspan="1" rowspan="1">0x0</td>
<td align="center" colspan="1" rowspan="1">MP_CAPABLE</td>
<td align="center" colspan="1" rowspan="1">Multipath Capable</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t
d>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x1</td>
<td align="center" colspan="1" rowspan="1">MP_JOIN</td>
<td align="center" colspan="1" rowspan="1">Join Connection</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/></t
d>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x2</td>
<td align="center" colspan="1" rowspan="1">DSS</td>
<td align="center" colspan="1" rowspan="1">Data Sequence Signal (D
ata ACK and Data Sequence Mapping)</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"
/></td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x3</td>
<td align="center" colspan="1" rowspan="1">ADD_ADDR</td>
<td align="center" colspan="1" rowspan="1">Add Address</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.
4.1"/></td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x4</td>
<td align="center" colspan="1" rowspan="1">REMOVE_ADDR</td>
<td align="center" colspan="1" rowspan="1">Remove Address</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3.
4.2"/></td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x5</td>
<td align="center" colspan="1" rowspan="1">MP_PRIO</td>
<td align="center" colspan="1" rowspan="1">Change Subflow Priority
</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/
></td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x6</td>
<td align="center" colspan="1" rowspan="1">MP_FAIL</td>
<td align="center" colspan="1" rowspan="1">Fallback</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/
></td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x7</td>
<td align="center" colspan="1" rowspan="1">MP_FASTCLOSE</td>
<td align="center" colspan="1" rowspan="1">Fast Close</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5"
/></td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x8</td>
<td align="center" colspan="1" rowspan="1">MP_TCPRST</td>
<td align="center" colspan="1" rowspan="1">Subflow Reset</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0xf</td>
<td align="center" colspan="1" rowspan="1">MP_EXPERIMENTAL</td>
<td align="center" colspan="1" rowspan="1">Reserved for Private Us
e</td>
<td align="center" colspan="1" rowspan="1"/>
</tr>
</tbody>
</table>
<t pn="section-7.2-4">Values 0x9 through 0xe are currently unassigned. O
ption 0xf is reserved for use by private experiments. Its use may be formalized
in a future specification. Future assignments in this registry are to be defined
by Standards Action as defined by <xref target="RFC8126" format="default" secti
onFormat="of" derivedContent="RFC8126"/>. Assignments consist of the MPTCP subt
ype's symbolic name, its associated value, and a reference to its specification.
</t>
</section>
<section anchor="IANA_handshake" numbered="true" toc="include" removeInRFC
="false" pn="section-7.3">
<name slugifiedName="name-mptcp-handshake-algorithms">MPTCP Handshake Al
gorithms</name>
<t pn="section-7.3-1">The "MPTCP Handshake Algorithms" subregistry under
the
"Transmission Control Protocol (TCP) Parameters" registry was defined
in <xref target="RFC6824" format="default" sectionFormat="of" derivedCon
tent="RFC6824"/>. Since <xref target="RFC6824" format="default" sectionFormat="o
f" derivedContent="RFC6824"/> is an Experimental RFC and not
a Standards Track RFC, and since no further entries have occurred
beyond those pointing to <xref target="RFC6824" format="default" section
Format="of" derivedContent="RFC6824"/>, IANA has replaced
the existing registry with the contents of
<xref target="table_crypto" format="default" sectionFormat="of" derivedContent=
"Table 3"/> and with the following explanatory note.</t>
<t pn="section-7.3-2">Note: This registry specifies the MPTCP Handshake
Algorithms for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTC
P v0 subtypes, please refer to <xref target="RFC6824" format="default" sectionFo
rmat="of" derivedContent="RFC6824"/>.</t>
<table anchor="table_crypto" align="center" pn="table-3">
<name slugifiedName="name-mptcp-handshake-algorithms-2">MPTCP Handshak
e Algorithms</name>
<thead>
<tr>
<th align="center" colspan="1" rowspan="1">Flag Bit</th>
<th align="center" colspan="1" rowspan="1">Meaning</th>
<th align="center" colspan="1" rowspan="1">Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" colspan="1" rowspan="1">A</td>
<td align="center" colspan="1" rowspan="1">Checksum required</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t
d>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">B</td>
<td align="center" colspan="1" rowspan="1">Extensibility</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t
d>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">C</td>
<td align="center" colspan="1" rowspan="1">Do not attempt to estab
lish new subflows to the source address.</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></t
d>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">D-G</td>
<td align="center" colspan="1" rowspan="1">Unassigned</td>
<td align="center" colspan="1" rowspan="1"/>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">H</td>
<td align="center" colspan="1" rowspan="1">HMAC-SHA256</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/></t
d>
</tr>
</tbody>
</table>
<t pn="section-7.3-4">Note that the meanings of bits "D" through "H" can
be dependent upon bit "B",
depending on how the Extensibility parameter is defined in future specific
ations; see
<xref target="sec_init" format="default" sectionFormat="of" derivedContent
="Section 3.1"/> for more information.</t>
<t pn="section-7.3-5">Future assignments in this registry are also
to be defined by Standards Action as defined by <xref target="RFC8126" for
mat="default" sectionFormat="of" derivedContent="RFC8126"/>.
Assignments consist of the value of the flags, a symbolic name for the alg orithm, Assignments consist of the value of the flags, a symbolic name for the alg orithm,
and a reference to its specification.</t> and a reference to its specification.</t>
</section>
<section anchor="IANA_rst" title="MP_TCPRST Reason Codes">
<t>IANA is requested to create a further sub-registry, "MPTCP MP_TCPRST Re
ason Codes" under the "Transmission Control Protocol (TCP) Parameters" registry,
based on the reason code in MP_TCPRST (<xref target="sec_reset"/>) message. Ini
tial values for this registry are given in <xref target="table_rstcodes"/>; futu
re assignments are to be defined by Specification Required as defined by <xref t
arget="RFC8126"/>. Assignments consist of the value of the code, a short descrip
tion of its meaning, and a reference to its specification. The maximum value is
0xff.</t>
<t>As guidance to the Designated Expert <xref target="RFC8126"/>, assignme
nts should not normally be refused unless codepoint space is becoming scarce, pr
oviding that there is a clear distinction from other, already-existing codes, an
d also providing there is sufficient guidance for implementors both sending and
receiving these codes.</t>
<texttable anchor="table_rstcodes" title="MPTCP MP_TCPRST Reason Codes">
<ttcol align="center">Code</ttcol>
<ttcol align="center">Meaning</ttcol>
<ttcol align="center">Reference</ttcol>
<c>0x00</c>
<c>Unspecified TCP error</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0x01</c>
<c>MPTCP specific error</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0x02</c>
<c>Lack of resources</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0x03</c>
<c>Administratively prohibited</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0x04</c>
<c>Too much outstanding data</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0x05</c>
<c>Unacceptable performance</c>
<c>This document, <xref target="sec_reset"/></c>
<c>0x06</c>
<c>Middlebox interference</c>
<c>This document, <xref target="sec_reset"/></c>
</texttable>
</section> </section>
<section anchor="IANA_rst" numbered="true" toc="include" removeInRFC="fals
e" pn="section-7.4">
<name slugifiedName="name-mp_tcprst-reason-codes">MP_TCPRST Reason Codes
</name>
<t pn="section-7.4-1">IANA has created a further subregistry, "MPTCP MP_
TCPRST
Reason Codes" under the "Transmission Control Protocol (TCP)
Parameters" registry, based on the reason code in the MP_TCPRST (<xref t
arget="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.
6"/>) message. Initial values for this registry are given in <xref target="table
_rstcodes" format="default" sectionFormat="of" derivedContent="Table 4"/>; futur
e assignments are to be defined by Specification Required as defined by <xref ta
rget="RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>. A
ssignments consist of the value of the code, a short description of its meaning,
and a reference to its specification. The maximum value is 0xff.</t>
<table anchor="table_rstcodes" align="center" pn="table-4">
<name slugifiedName="name-mptcp-mp_tcprst-reason-code">MPTCP MP_TCPRST
Reason Codes</name>
<thead>
<tr>
<th align="center" colspan="1" rowspan="1">Code</th>
<th align="center" colspan="1" rowspan="1">Meaning</th>
<th align="center" colspan="1" rowspan="1">Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" colspan="1" rowspan="1">0x00</td>
<td align="center" colspan="1" rowspan="1">Unspecified error</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x01</td>
<td align="center" colspan="1" rowspan="1">MPTCP-specific error</t
d>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x02</td>
<td align="center" colspan="1" rowspan="1">Lack of resources</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x03</td>
<td align="center" colspan="1" rowspan="1">Administratively prohib
ited</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x04</td>
<td align="center" colspan="1" rowspan="1">Too much outstanding da
ta</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x05</td>
<td align="center" colspan="1" rowspan="1">Unacceptable performanc
e</td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
<tr>
<td align="center" colspan="1" rowspan="1">0x06</td>
<td align="center" colspan="1" rowspan="1">Middlebox interference<
/td>
<td align="center" colspan="1" rowspan="1">RFC 8684, <xref target=
"sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></
td>
</tr>
</tbody>
</table>
<t pn="section-7.4-3">As guidance to the designated expert <xref target=
"RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>, assign
ments should not normally be refused unless
codepoint space is becoming scarce, provided that there is a clear
distinction from other, already-existing codes and also provided that th
ere is sufficient guidance for implementers both sending and receiving these cod
es.</t>
</section>
</section> </section>
</middle> </middle>
<!-- *****BACK MATTER ***** -->
<back> <back>
<displayreference target="I-D.ananth-tcpm-tcpoptext" to="TCPLO"/>
<references title="Normative References"> <references pn="section-8">
&RFC0793; <name slugifiedName="name-references">References</name>
&RFC2104; <references pn="section-8.1">
&RFC2119; <name slugifiedName="name-normative-references">Normative References</na
&RFC5961; me>
&RFC6234; <reference anchor="RFC0793" target="https://www.rfc-editor.org/info/rfc7
&RFC8174; 93" quoteTitle="true" derivedAnchor="RFC0793">
<front>
<title>Transmission Control Protocol</title>
<author initials="J." surname="Postel" fullname="J. Postel">
<organization showOnFrontPage="true"/>
</author>
<date year="1981" month="September"/>
</front>
<seriesInfo name="STD" value="7"/>
<seriesInfo name="RFC" value="793"/>
<seriesInfo name="DOI" value="10.17487/RFC0793"/>
</reference>
<reference anchor="RFC2104" target="https://www.rfc-editor.org/info/rfc2
104" quoteTitle="true" derivedAnchor="RFC2104">
<front>
<title>HMAC: Keyed-Hashing for Message Authentication</title>
<author initials="H." surname="Krawczyk" fullname="H. Krawczyk">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Bellare" fullname="M. Bellare">
<organization showOnFrontPage="true"/>
</author>
<author initials="R." surname="Canetti" fullname="R. Canetti">
<organization showOnFrontPage="true"/>
</author>
<date year="1997" month="February"/>
<abstract>
<t>This document describes HMAC, a mechanism for message authentic
ation using cryptographic hash functions. HMAC can be used with any iterative cr
yptographic hash function, e.g., MD5, SHA-1, in combination with a secret shared
key. The cryptographic strength of HMAC depends on the properties of the under
lying hash function. This memo provides information for the Internet community.
This memo does not specify an Internet standard of any kind</t>
</abstract>
</front>
<seriesInfo name="RFC" value="2104"/>
<seriesInfo name="DOI" value="10.17487/RFC2104"/>
</reference>
<reference anchor="RFC2119" target="https://www.rfc-editor.org/info/rfc2
119" quoteTitle="true" derivedAnchor="RFC2119">
<front>
<title>Key words for use in RFCs to Indicate Requirement Levels</tit
le>
<author initials="S." surname="Bradner" fullname="S. Bradner">
<organization showOnFrontPage="true"/>
</author>
<date year="1997" month="March"/>
<abstract>
<t>In many standards track documents several words are used to sig
nify the requirements in the specification. These words are often capitalized.
This document defines these words as they should be interpreted in IETF document
s. This document specifies an Internet Best Current Practices for the Internet
Community, and requests discussion and suggestions for improvements.</t>
</abstract>
</front>
<seriesInfo name="BCP" value="14"/>
<seriesInfo name="RFC" value="2119"/>
<seriesInfo name="DOI" value="10.17487/RFC2119"/>
</reference>
<reference anchor="RFC5961" target="https://www.rfc-editor.org/info/rfc5
961" quoteTitle="true" derivedAnchor="RFC5961">
<front>
<title>Improving TCP's Robustness to Blind In-Window Attacks</title>
<author initials="A." surname="Ramaiah" fullname="A. Ramaiah">
<organization showOnFrontPage="true"/>
</author>
<author initials="R." surname="Stewart" fullname="R. Stewart">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Dalal" fullname="M. Dalal">
<organization showOnFrontPage="true"/>
</author>
<date year="2010" month="August"/>
<abstract>
<t>TCP has historically been considered to be protected against sp
oofed off-path packet injection attacks by relying on the fact that it is diffic
ult to guess the 4-tuple (the source and destination IP addresses and the source
and destination ports) in combination with the 32-bit sequence number(s). A co
mbination of increasing window sizes and applications using longer-term connecti
ons (e.g., H-323 or Border Gateway Protocol (BGP) [STANDARDS-TRACK]</t>
</abstract>
</front>
<seriesInfo name="RFC" value="5961"/>
<seriesInfo name="DOI" value="10.17487/RFC5961"/>
</reference>
<reference anchor="RFC6234" target="https://www.rfc-editor.org/info/rfc6
234" quoteTitle="true" derivedAnchor="RFC6234">
<front>
<title>US Secure Hash Algorithms (SHA and SHA-based HMAC and HKDF)</
title>
<author initials="D." surname="Eastlake 3rd" fullname="D. Eastlake 3
rd">
<organization showOnFrontPage="true"/>
</author>
<author initials="T." surname="Hansen" fullname="T. Hansen">
<organization showOnFrontPage="true"/>
</author>
<date year="2011" month="May"/>
<abstract>
<t>Federal Information Processing Standard, FIPS</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6234"/>
<seriesInfo name="DOI" value="10.17487/RFC6234"/>
</reference>
<reference anchor="RFC8174" target="https://www.rfc-editor.org/info/rfc8
174" quoteTitle="true" derivedAnchor="RFC8174">
<front>
<title>Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words</ti
tle>
<author initials="B." surname="Leiba" fullname="B. Leiba">
<organization showOnFrontPage="true"/>
</author>
<date year="2017" month="May"/>
<abstract>
<t>RFC 2119 specifies common key words that may be used in protoco
l specifications. This document aims to reduce the ambiguity by clarifying tha
t only UPPERCASE usage of the key words have the defined special meanings.</t>
</abstract>
</front>
<seriesInfo name="BCP" value="14"/>
<seriesInfo name="RFC" value="8174"/>
<seriesInfo name="DOI" value="10.17487/RFC8174"/>
</reference>
</references>
<references pn="section-8.2">
<name slugifiedName="name-informative-references">Informative References
</name>
<reference anchor="deployments" target="https://www.ietfjournal.org/mult
ipath-tcp-deployments/" quoteTitle="true" derivedAnchor="deployments">
<front>
<title abbrev="MPTCP Deployments">Multipath TCP Deployments</title>
<seriesInfo name="IETF Journal" value="2016"/>
<author initials="O." surname="Bonaventure" fullname="Olivier Bonave
nture">
<organization showOnFrontPage="true">Universite Catholique de Louv
ain</organization>
</author>
<author initials="S." surname="Seo" fullname="SungHoon Seo"/>
<date month="November" year="2016"/>
</front>
</reference>
<reference anchor="howhard" target="https://www.usenix.org/conference/ns
di12/technical-sessions/presentation/raiciu" quoteTitle="true" derivedAnchor="ho
whard">
<front>
<title abbrev="How Hard Can It Be? Designing and Implementing a Depl
oyable Multipath TCP">How Hard Can It Be? Designing and Implementing a Deployabl
e Multipath TCP</title>
<seriesInfo name="Usenix Symposium on Networked Systems Design and I
mplementation" value="2012"/>
<author initials="C." surname="Raiciu" fullname="Costin Raiciu">
<organization showOnFrontPage="true">Universitatea Politehnica Buc
uresti</organization>
</author>
<author initials="C." surname="Paasch" fullname="Christoph Paasch">
<organization showOnFrontPage="true">Universite Catholique de Louv
ain</organization>
</author>
<author initials="S." surname="Barre" fullname="Sebastien Barre">
<organization showOnFrontPage="true">Universite Catholique de Louv
ain</organization>
</author>
<author initials="A." surname="Ford" fullname="Alan Ford">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Honda" fullname="Michio Honda">
<organization showOnFrontPage="true">Keio University</organization
>
</author>
<author initials="F." surname="Duchene" fullname="Fabien Duchene">
<organization showOnFrontPage="true">Universite Catholique de Louv
ain</organization>
</author>
<author initials="O." surname="Bonaventure" fullname="Olivier Bonave
nture">
<organization showOnFrontPage="true">Universite Catholique de Louv
ain</organization>
</author>
<author initials="M." surname="Handley" fullname="Mark Handley">
<organization showOnFrontPage="true">University College London</or
ganization>
</author>
<date month="April" year="2012"/>
</front>
</reference>
<reference anchor="norm" target="https://www.usenix.org/legacy/events/se
c01/full_papers/handley/handley.pdf" quoteTitle="true" derivedAnchor="norm">
<front>
<title abbrev="Network Intrusion Detection: Evasion, Traffic Normali
zation, and End-to-End Protocol Semantics">Network Intrusion Detection: Evasion,
Traffic Normalization, and End-to-End Protocol Semantics</title>
<seriesInfo name="Usenix Security Symposium" value="2001"/>
<author initials="M." surname="Handley" fullname="Mark Handley">
<organization showOnFrontPage="true">ACIRI</organization>
</author>
<author initials="V." surname="Paxson" fullname="Vern Paxson">
<organization showOnFrontPage="true">ACIRI</organization>
</author>
<author initials="C." surname="Kreibich" fullname="Christian Kreibic
h">
<organization showOnFrontPage="true">Technische Universitat Munche
n</organization>
</author>
<date month="August" year="2001"/>
</front>
</reference>
<reference anchor="RFC1122" target="https://www.rfc-editor.org/info/rfc1
122" quoteTitle="true" derivedAnchor="RFC1122">
<front>
<title>Requirements for Internet Hosts - Communication Layers</title
>
<author initials="R." surname="Braden" fullname="R. Braden" role="ed
itor">
<organization showOnFrontPage="true"/>
</author>
<date year="1989" month="October"/>
<abstract>
<t>This RFC is an official specification for the Internet communit
y. It incorporates by reference, amends, corrects, and supplements the primary
protocol standards documents relating to hosts. [STANDARDS-TRACK]</t>
</abstract>
</front>
<seriesInfo name="STD" value="3"/>
<seriesInfo name="RFC" value="1122"/>
<seriesInfo name="DOI" value="10.17487/RFC1122"/>
</reference>
<reference anchor="RFC1918" target="https://www.rfc-editor.org/info/rfc1
918" quoteTitle="true" derivedAnchor="RFC1918">
<front>
<title>Address Allocation for Private Internets</title>
<author initials="Y." surname="Rekhter" fullname="Y. Rekhter">
<organization showOnFrontPage="true"/>
</author>
<author initials="B." surname="Moskowitz" fullname="B. Moskowitz">
<organization showOnFrontPage="true"/>
</author>
<author initials="D." surname="Karrenberg" fullname="D. Karrenberg">
<organization showOnFrontPage="true"/>
</author>
<author initials="G. J." surname="de Groot" fullname="G. J. de Groot
">
<organization showOnFrontPage="true"/>
</author>
<author initials="E." surname="Lear" fullname="E. Lear">
<organization showOnFrontPage="true"/>
</author>
<date year="1996" month="February"/>
<abstract>
<t>This document describes address allocation for private internet
s. This document specifies an Internet Best Current Practices for the Internet
Community, and requests discussion and suggestions for improvements.</t>
</abstract>
</front>
<seriesInfo name="BCP" value="5"/>
<seriesInfo name="RFC" value="1918"/>
<seriesInfo name="DOI" value="10.17487/RFC1918"/>
</reference>
<reference anchor="RFC2018" target="https://www.rfc-editor.org/info/rfc2
018" quoteTitle="true" derivedAnchor="RFC2018">
<front>
<title>TCP Selective Acknowledgment Options</title>
<author initials="M." surname="Mathis" fullname="M. Mathis">
<organization showOnFrontPage="true"/>
</author>
<author initials="J." surname="Mahdavi" fullname="J. Mahdavi">
<organization showOnFrontPage="true"/>
</author>
<author initials="S." surname="Floyd" fullname="S. Floyd">
<organization showOnFrontPage="true"/>
</author>
<author initials="A." surname="Romanow" fullname="A. Romanow">
<organization showOnFrontPage="true"/>
</author>
<date year="1996" month="October"/>
<abstract>
<t>This memo proposes an implementation of SACK and discusses its
performance and related issues. [STANDARDS-TRACK]</t>
</abstract>
</front>
<seriesInfo name="RFC" value="2018"/>
<seriesInfo name="DOI" value="10.17487/RFC2018"/>
</reference>
<reference anchor="RFC2979" target="https://www.rfc-editor.org/info/rfc2
979" quoteTitle="true" derivedAnchor="RFC2979">
<front>
<title>Behavior of and Requirements for Internet Firewalls</title>
<author initials="N." surname="Freed" fullname="N. Freed">
<organization showOnFrontPage="true"/>
</author>
<date year="2000" month="October"/>
<abstract>
<t>This memo defines behavioral characteristics of and interoperab
ility requirements for Internet firewalls. This memo provides information for t
he Internet community.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="2979"/>
<seriesInfo name="DOI" value="10.17487/RFC2979"/>
</reference>
<reference anchor="RFC2992" target="https://www.rfc-editor.org/info/rfc2
992" quoteTitle="true" derivedAnchor="RFC2992">
<front>
<title>Analysis of an Equal-Cost Multi-Path Algorithm</title>
<author initials="C." surname="Hopps" fullname="C. Hopps">
<organization showOnFrontPage="true"/>
</author>
<date year="2000" month="November"/>
<abstract>
<t>Equal-cost multi-path (ECMP) is a routing technique for routing
packets along multiple paths of equal cost. The forwarding engine identifies p
aths by next-hop. When forwarding a packet the router must decide which next-ho
p (path) to use. This document gives an analysis of one method for making that
decision. The analysis includes the performance of the algorithm and the disrup
tion caused by changes to the set of next-hops. This memo provides information
for the Internet community.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="2992"/>
<seriesInfo name="DOI" value="10.17487/RFC2992"/>
</reference>
<reference anchor="RFC3022" target="https://www.rfc-editor.org/info/rfc3
022" quoteTitle="true" derivedAnchor="RFC3022">
<front>
<title>Traditional IP Network Address Translator (Traditional NAT)</
title>
<author initials="P." surname="Srisuresh" fullname="P. Srisuresh">
<organization showOnFrontPage="true"/>
</author>
<author initials="K." surname="Egevang" fullname="K. Egevang">
<organization showOnFrontPage="true"/>
</author>
<date year="2001" month="January"/>
<abstract>
<t>The NAT operation described in this document extends address tr
anslation introduced in RFC 1631 and includes a new type of network address and
TCP/UDP port translation. In addition, this document corrects the Checksum adju
stment algorithm published in RFC 1631 and attempts to discuss NAT operation and
limitations in detail. This memo provides information for the Internet communi
ty.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="3022"/>
<seriesInfo name="DOI" value="10.17487/RFC3022"/>
</reference>
<reference anchor="RFC3135" target="https://www.rfc-editor.org/info/rfc3
135" quoteTitle="true" derivedAnchor="RFC3135">
<front>
<title>Performance Enhancing Proxies Intended to Mitigate Link-Relat
ed Degradations</title>
<author initials="J." surname="Border" fullname="J. Border">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Kojo" fullname="M. Kojo">
<organization showOnFrontPage="true"/>
</author>
<author initials="J." surname="Griner" fullname="J. Griner">
<organization showOnFrontPage="true"/>
</author>
<author initials="G." surname="Montenegro" fullname="G. Montenegro">
<organization showOnFrontPage="true"/>
</author>
<author initials="Z." surname="Shelby" fullname="Z. Shelby">
<organization showOnFrontPage="true"/>
</author>
<date year="2001" month="June"/>
<abstract>
<t>This document is a survey of Performance Enhancing Proxies (PEP
s) often employed to improve degraded TCP performance caused by characteristics
of specific link environments, for example, in satellite, wireless WAN, and wire
less LAN environments. This memo provides information for the Internet communit
y.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="3135"/>
<seriesInfo name="DOI" value="10.17487/RFC3135"/>
</reference>
<reference anchor="RFC4086" target="https://www.rfc-editor.org/info/rfc4
086" quoteTitle="true" derivedAnchor="RFC4086">
<front>
<title>Randomness Requirements for Security</title>
<author initials="D." surname="Eastlake 3rd" fullname="D. Eastlake 3
rd">
<organization showOnFrontPage="true"/>
</author>
<author initials="J." surname="Schiller" fullname="J. Schiller">
<organization showOnFrontPage="true"/>
</author>
<author initials="S." surname="Crocker" fullname="S. Crocker">
<organization showOnFrontPage="true"/>
</author>
<date year="2005" month="June"/>
<abstract>
<t>Security systems are built on strong cryptographic algorithms t
hat foil pattern analysis attempts. However, the security of these systems is d
ependent on generating secret quantities for passwords, cryptographic keys, and
similar quantities. The use of pseudo-random processes to generate secret quant
ities can result in pseudo-security. A sophisticated attacker may find it easier
to reproduce the environment that produced the secret quantities and to search
the resulting small set of possibilities than to locate the quantities in the wh
ole of the potential number space.</t>
<t>Choosing random quantities to foil a resourceful and motivated
adversary is surprisingly difficult. This document points out many pitfalls in
using poor entropy sources or traditional pseudo-random number generation techni
ques for generating such quantities. It recommends the use of truly random hard
ware techniques and shows that the existing hardware on many systems can be used
for this purpose. It provides suggestions to ameliorate the problem when a hard
ware solution is not available, and it gives examples of how large such quantiti
es need to be for some applications. This document specifies an Internet Best C
urrent Practices for the Internet Community, and requests discussion and suggest
ions for improvements.</t>
</abstract>
</front>
<seriesInfo name="BCP" value="106"/>
<seriesInfo name="RFC" value="4086"/>
<seriesInfo name="DOI" value="10.17487/RFC4086"/>
</reference>
<reference anchor="RFC4987" target="https://www.rfc-editor.org/info/rfc4
987" quoteTitle="true" derivedAnchor="RFC4987">
<front>
<title>TCP SYN Flooding Attacks and Common Mitigations</title>
<author initials="W." surname="Eddy" fullname="W. Eddy">
<organization showOnFrontPage="true"/>
</author>
<date year="2007" month="August"/>
<abstract>
<t>This document describes TCP SYN flooding attacks, which have be
en well-known to the community for several years. Various countermeasures again
st these attacks, and the trade-offs of each, are described. This document arch
ives explanations of the attack and common defense techniques for the benefit of
TCP implementers and administrators of TCP servers or networks, but does not ma
ke any standards-level recommendations. This memo provides information for the
Internet community.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="4987"/>
<seriesInfo name="DOI" value="10.17487/RFC4987"/>
</reference>
<reference anchor="RFC5681" target="https://www.rfc-editor.org/info/rfc5
681" quoteTitle="true" derivedAnchor="RFC5681">
<front>
<title>TCP Congestion Control</title>
<author initials="M." surname="Allman" fullname="M. Allman">
<organization showOnFrontPage="true"/>
</author>
<author initials="V." surname="Paxson" fullname="V. Paxson">
<organization showOnFrontPage="true"/>
</author>
<author initials="E." surname="Blanton" fullname="E. Blanton">
<organization showOnFrontPage="true"/>
</author>
<date year="2009" month="September"/>
<abstract>
<t>This document defines TCP's four intertwined congestion control
algorithms: slow start, congestion avoidance, fast retransmit, and fast recover
y. In addition, the document specifies how TCP should begin transmission after
a relatively long idle period, as well as discussing various acknowledgment gene
ration methods. This document obsoletes RFC 2581. [STANDARDS-TRACK]</t>
</abstract>
</front>
<seriesInfo name="RFC" value="5681"/>
<seriesInfo name="DOI" value="10.17487/RFC5681"/>
</reference>
<reference anchor="RFC6181" target="https://www.rfc-editor.org/info/rfc6
181" quoteTitle="true" derivedAnchor="RFC6181">
<front>
<title>Threat Analysis for TCP Extensions for Multipath Operation wi
th Multiple Addresses</title>
<author initials="M." surname="Bagnulo" fullname="M. Bagnulo">
<organization showOnFrontPage="true"/>
</author>
<date year="2011" month="March"/>
<abstract>
<t>Multipath TCP (MPTCP for short) describes the extensions propos
ed for TCP so that endpoints of a given TCP connection can use multiple paths to
exchange data. Such extensions enable the exchange of segments using different
source-destination address pairs, resulting in the capability of using multiple
paths in a significant number of scenarios. Some level of multihoming and mobi
lity support can be achieved through these extensions. However, the support for
multiple IP addresses per endpoint may have implications on the security of the
resulting MPTCP. This note includes a threat analysis for MPTCP. This document
is not an Internet Standards Track specification; it is published for informati
onal purposes.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6181"/>
<seriesInfo name="DOI" value="10.17487/RFC6181"/>
</reference>
<reference anchor="RFC6182" target="https://www.rfc-editor.org/info/rfc6
182" quoteTitle="true" derivedAnchor="RFC6182">
<front>
<title>Architectural Guidelines for Multipath TCP Development</title
>
<author initials="A." surname="Ford" fullname="A. Ford">
<organization showOnFrontPage="true"/>
</author>
<author initials="C." surname="Raiciu" fullname="C. Raiciu">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Handley" fullname="M. Handley">
<organization showOnFrontPage="true"/>
</author>
<author initials="S." surname="Barre" fullname="S. Barre">
<organization showOnFrontPage="true"/>
</author>
<author initials="J." surname="Iyengar" fullname="J. Iyengar">
<organization showOnFrontPage="true"/>
</author>
<date year="2011" month="March"/>
<abstract>
<t>Hosts are often connected by multiple paths, but TCP restricts
communications to a single path per transport connection. Resource usage within
the network would be more efficient were these multiple paths able to be used c
oncurrently. This should enhance user experience through improved resilience to
network failure and higher throughput.</t>
<t>This document outlines architectural guidelines for the develop
ment of a Multipath Transport Protocol, with references to how these architectur
al components come together in the development of a Multipath TCP (MPTCP). This
document lists certain high-level design decisions that provide foundations for
the design of the MPTCP protocol, based upon these architectural requirements.
This document is not an Internet Standards Track specification; it is publishe
d for informational purposes.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6182"/>
<seriesInfo name="DOI" value="10.17487/RFC6182"/>
</reference>
<reference anchor="RFC6356" target="https://www.rfc-editor.org/info/rfc6
356" quoteTitle="true" derivedAnchor="RFC6356">
<front>
<title>Coupled Congestion Control for Multipath Transport Protocols<
/title>
<author initials="C." surname="Raiciu" fullname="C. Raiciu">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Handley" fullname="M. Handley">
<organization showOnFrontPage="true"/>
</author>
<author initials="D." surname="Wischik" fullname="D. Wischik">
<organization showOnFrontPage="true"/>
</author>
<date year="2011" month="October"/>
<abstract>
<t>Often endpoints are connected by multiple paths, but communicat
ions are usually restricted to a single path per connection. Resource usage wit
hin the network would be more efficient were it possible for these multiple path
s to be used concurrently. Multipath TCP is a proposal to achieve multipath tra
nsport in TCP.</t>
<t>New congestion control algorithms are needed for multipath tran
sport protocols such as Multipath TCP, as single path algorithms have a series o
f issues in the multipath context. One of the prominent problems is that runnin
g existing algorithms such as standard TCP independently on each path would give
the multipath flow more than its fair share at a bottleneck link traversed by m
ore than one of its subflows. Further, it is desirable that a source with multi
ple paths available will transfer more traffic using the least congested of the
paths, achieving a property called "resource pooling" where a bundle of links ef
fectively behaves like one shared link with bigger capacity. This would increas
e the overall efficiency of the network and also its robustness to failure.</t>
<t>This document presents a congestion control algorithm that coup
les the congestion control algorithms running on different subflows by linking t
heir increase functions, and dynamically controls the overall aggressiveness of
the multipath flow. The result is a practical algorithm that is fair to TCP at
bottlenecks while moving traffic away from congested links. This document defin
es an Experimental Protocol for the Internet community.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6356"/>
<seriesInfo name="DOI" value="10.17487/RFC6356"/>
</reference>
<reference anchor="RFC6528" target="https://www.rfc-editor.org/info/rfc6
528" quoteTitle="true" derivedAnchor="RFC6528">
<front>
<title>Defending against Sequence Number Attacks</title>
<author initials="F." surname="Gont" fullname="F. Gont">
<organization showOnFrontPage="true"/>
</author>
<author initials="S." surname="Bellovin" fullname="S. Bellovin">
<organization showOnFrontPage="true"/>
</author>
<date year="2012" month="February"/>
<abstract>
<t>This document specifies an algorithm for the generation of TCP
Initial Sequence Numbers (ISNs), such that the chances of an off-path attacker g
uessing the sequence numbers in use by a target connection are reduced. This do
cument revises (and formally obsoletes) RFC 1948, and takes the ISN generation a
lgorithm originally proposed in that document to Standards Track, formally updat
ing RFC 793. [STANDARDS-TRACK]</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6528"/>
<seriesInfo name="DOI" value="10.17487/RFC6528"/>
</reference>
<reference anchor="RFC6824" target="https://www.rfc-editor.org/info/rfc6
824" quoteTitle="true" derivedAnchor="RFC6824">
<front>
<title>TCP Extensions for Multipath Operation with Multiple Addresse
s</title>
<author initials="A." surname="Ford" fullname="A. Ford">
<organization showOnFrontPage="true"/>
</author>
<author initials="C." surname="Raiciu" fullname="C. Raiciu">
<organization showOnFrontPage="true"/>
</author>
<author initials="M." surname="Handley" fullname="M. Handley">
<organization showOnFrontPage="true"/>
</author>
<author initials="O." surname="Bonaventure" fullname="O. Bonaventure
">
<organization showOnFrontPage="true"/>
</author>
<date year="2013" month="January"/>
<abstract>
<t>TCP/IP communication is currently restricted to a single path p
er connection, yet multiple paths often exist between peers. The simultaneous u
se of these multiple paths for a TCP/IP session would improve resource usage wit
hin the network and, thus, improve user experience through higher throughput and
improved resilience to network failure.</t>
<t>Multipath TCP provides the ability to simultaneously use multip
le paths between peers. This document presents a set of extensions to tradition
al TCP to support multipath operation. The protocol offers the same type of ser
vice to applications as TCP (i.e., reliable bytestream), and it provides the com
ponents necessary to establish and use multiple TCP flows across potentially dis
joint paths. This document defines an Experimental Protocol for the Internet c
ommunity.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6824"/>
<seriesInfo name="DOI" value="10.17487/RFC6824"/>
</reference>
<reference anchor="RFC6897" target="https://www.rfc-editor.org/info/rfc6
897" quoteTitle="true" derivedAnchor="RFC6897">
<front>
<title>Multipath TCP (MPTCP) Application Interface Considerations</t
itle>
<author initials="M." surname="Scharf" fullname="M. Scharf">
<organization showOnFrontPage="true"/>
</author>
<author initials="A." surname="Ford" fullname="A. Ford">
<organization showOnFrontPage="true"/>
</author>
<date year="2013" month="March"/>
<abstract>
<t>Multipath TCP (MPTCP) adds the capability of using multiple pat
hs to a regular TCP session. Even though it is designed to be totally backward
compatible to applications, the data transport differs compared to regular TCP,
and there are several additional degrees of freedom that applications may wish t
o exploit. This document summarizes the impact that MPTCP may have on applicati
ons, such as changes in performance. Furthermore, it discusses compatibility is
sues of MPTCP in combination with non-MPTCP-aware applications. Finally, the doc
ument describes a basic application interface that is a simple extension of TCP'
s interface for MPTCP-aware applications.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="6897"/>
<seriesInfo name="DOI" value="10.17487/RFC6897"/>
</reference>
<reference anchor="RFC7323" target="https://www.rfc-editor.org/info/rfc7
323" quoteTitle="true" derivedAnchor="RFC7323">
<front>
<title>TCP Extensions for High Performance</title>
<author initials="D." surname="Borman" fullname="D. Borman">
<organization showOnFrontPage="true"/>
</author>
<author initials="B." surname="Braden" fullname="B. Braden">
<organization showOnFrontPage="true"/>
</author>
<author initials="V." surname="Jacobson" fullname="V. Jacobson">
<organization showOnFrontPage="true"/>
</author>
<author initials="R." surname="Scheffenegger" fullname="R. Scheffene
gger" role="editor">
<organization showOnFrontPage="true"/>
</author>
<date year="2014" month="September"/>
<abstract>
<t>This document specifies a set of TCP extensions to improve perf
ormance over paths with a large bandwidth * delay product and to provide reliabl
e operation over very high-speed paths. It defines the TCP Window Scale (WS) op
tion and the TCP Timestamps (TS) option and their semantics. The Window Scale o
ption is used to support larger receive windows, while the Timestamps option can
be used for at least two distinct mechanisms, Protection Against Wrapped Sequen
ces (PAWS) and Round-Trip Time Measurement (RTTM), that are also described herei
n.</t>
<t>This document obsoletes RFC 1323 and describes changes from it.
</t>
</abstract>
</front>
<seriesInfo name="RFC" value="7323"/>
<seriesInfo name="DOI" value="10.17487/RFC7323"/>
</reference>
<reference anchor="RFC7413" target="https://www.rfc-editor.org/info/rfc7
413" quoteTitle="true" derivedAnchor="RFC7413">
<front>
<title>TCP Fast Open</title>
<author initials="Y." surname="Cheng" fullname="Y. Cheng">
<organization showOnFrontPage="true"/>
</author>
<author initials="J." surname="Chu" fullname="J. Chu">
<organization showOnFrontPage="true"/>
</author>
<author initials="S." surname="Radhakrishnan" fullname="S. Radhakris
hnan">
<organization showOnFrontPage="true"/>
</author>
<author initials="A." surname="Jain" fullname="A. Jain">
<organization showOnFrontPage="true"/>
</author>
<date year="2014" month="December"/>
<abstract>
<t>This document describes an experimental TCP mechanism called TC
P Fast Open (TFO). TFO allows data to be carried in the SYN and SYN-ACK packets
and consumed by the receiving end during the initial connection handshake, and
saves up to one full round-trip time (RTT) compared to the standard TCP, which r
equires a three-way handshake (3WHS) to complete before data can be exchanged.
However, TFO deviates from the standard TCP semantics, since the data in the SYN
could be replayed to an application in some rare circumstances. Applications s
hould not use TFO unless they can tolerate this issue, as detailed in the Applic
ability section.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="7413"/>
<seriesInfo name="DOI" value="10.17487/RFC7413"/>
</reference>
<reference anchor="RFC7430" target="https://www.rfc-editor.org/info/rfc7
430" quoteTitle="true" derivedAnchor="RFC7430">
<front>
<title>Analysis of Residual Threats and Possible Fixes for Multipath
TCP (MPTCP)</title>
<author initials="M." surname="Bagnulo" fullname="M. Bagnulo">
<organization showOnFrontPage="true"/>
</author>
<author initials="C." surname="Paasch" fullname="C. Paasch">
<organization showOnFrontPage="true"/>
</author>
<author initials="F." surname="Gont" fullname="F. Gont">
<organization showOnFrontPage="true"/>
</author>
<author initials="O." surname="Bonaventure" fullname="O. Bonaventure
">
<organization showOnFrontPage="true"/>
</author>
<author initials="C." surname="Raiciu" fullname="C. Raiciu">
<organization showOnFrontPage="true"/>
</author>
<date year="2015" month="July"/>
<abstract>
<t>This document analyzes the residual threats for Multipath TCP (
MPTCP) and explores possible solutions to address them.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="7430"/>
<seriesInfo name="DOI" value="10.17487/RFC7430"/>
</reference>
<reference anchor="RFC8041" target="https://www.rfc-editor.org/info/rfc8
041" quoteTitle="true" derivedAnchor="RFC8041">
<front>
<title>Use Cases and Operational Experience with Multipath TCP</titl
e>
<author initials="O." surname="Bonaventure" fullname="O. Bonaventure
">
<organization showOnFrontPage="true"/>
</author>
<author initials="C." surname="Paasch" fullname="C. Paasch">
<organization showOnFrontPage="true"/>
</author>
<author initials="G." surname="Detal" fullname="G. Detal">
<organization showOnFrontPage="true"/>
</author>
<date year="2017" month="January"/>
<abstract>
<t>This document discusses both use cases and operational experien
ce with Multipath TCP (MPTCP) in real networks. It lists several prominent use
cases where Multipath TCP has been considered and is being used. It also gives
insight to some heuristics and decisions that have helped to realize these use c
ases and suggests possible improvements.</t>
</abstract>
</front>
<seriesInfo name="RFC" value="8041"/>
<seriesInfo name="DOI" value="10.17487/RFC8041"/>
</reference>
<reference anchor="RFC8126" target="https://www.rfc-editor.org/info/rfc8
126" quoteTitle="true" derivedAnchor="RFC8126">
<front>
<title>Guidelines for Writing an IANA Considerations Section in RFCs
</title>
<author initials="M." surname="Cotton" fullname="M. Cotton">
<organization showOnFrontPage="true"/>
</author>
<author initials="B." surname="Leiba" fullname="B. Leiba">
<organization showOnFrontPage="true"/>
</author>
<author initials="T." surname="Narten" fullname="T. Narten">
<organization showOnFrontPage="true"/>
</author>
<date year="2017" month="June"/>
<abstract>
<t>Many protocols make use of points of extensibility that use con
stants to identify various protocol parameters. To ensure that the values in th
ese fields do not have conflicting uses and to promote interoperability, their a
llocations are often coordinated by a central record keeper. For IETF protocols
, that role is filled by the Internet Assigned Numbers Authority (IANA).</t>
<t>To make assignments in a given registry prudently, guidance des
cribing the conditions under which new values should be assigned, as well as whe
n and how modifications to existing values can be made, is needed. This documen
t defines a framework for the documentation of these guidelines by specification
authors, in order to assure that the provided guidance for the IANA Considerati
ons is clear and addresses the various issues that are likely in the operation o
f a registry.</t>
<t>This is the third edition of this document; it obsoletes RFC 52
26.</t>
</abstract>
</front>
<seriesInfo name="BCP" value="26"/>
<seriesInfo name="RFC" value="8126"/>
<seriesInfo name="DOI" value="10.17487/RFC8126"/>
</reference>
<reference anchor="I-D.ananth-tcpm-tcpoptext" quoteTitle="true" target="
https://tools.ietf.org/html/draft-ananth-tcpm-tcpoptext-00" derivedAnchor="TCPLO
">
<front>
<title>TCP option space extension</title>
<author initials="A" surname="Ramaiah" fullname="Anantha Ramaiah">
<organization showOnFrontPage="true"/>
</author>
<date month="March" day="26" year="2012"/>
<abstract>
<t>The document goals are as follows: Firstly, this document summa
rizes the motivations for extending TCP option space. Secondly, It tries to sum
marize the various known issues that needs to be taken into account while extend
ing the TCP option space. Thirdly, it briefly provides a short summary of the v
arious TCP option space proposals that has been proposed so far. Some additiona
l proposals which includes variations to the existing proposals are also present
ed. The goal of this document is to rejuvenate the discussions on this topic and
eventually to converge on a scheme for extending TCP option space.</t>
</abstract>
</front>
<seriesInfo name="Internet-Draft" value="draft-ananth-tcpm-tcpoptext-0
0"/>
<format type="TXT" target="http://www.ietf.org/internet-drafts/draft-a
nanth-tcpm-tcpoptext-00.txt"/>
<refcontent>Work in Progress</refcontent>
</reference>
</references>
</references> </references>
<section anchor="app_options" numbered="true" toc="include" removeInRFC="fal
se" pn="section-appendix.a">
<name slugifiedName="name-notes-on-use-of-tcp-options">Notes on Use of TCP
Options</name>
<t pn="section-appendix.a-1">The TCP option space is limited due to the le
ngth of the Data Offset field in the TCP header (4 bits), which defines the TCP
header length in 32-bit words. With the standard TCP header being 20 bytes, this
leaves a maximum of 40 bytes for options, and many of these may already be used
by options such as timestamp and SACK.</t>
<t pn="section-appendix.a-2">We performed a brief study on the commonly us
ed TCP options in SYN,
data, and pure ACK packets and found that there is enough room
to fit all the options discussed in this document.</t>
<t pn="section-appendix.a-3">SYN packets typically include the following o
ptions: Maximum Segment Size (MSS) (4 bytes),
window scale (3 bytes), SACK permitted (2 bytes), and timestamp
(10 bytes). The sum of these options is 19 bytes. Some operating
systems appear to pad each option up to a word boundary, thus using 24
bytes (a brief survey suggests that Windows XP and Mac OS X do this, where
as Linux does not).
<references title="Informative References"> Optimistically, therefore, we have 21 bytes available, or 16 if options ha
&RFC1122; ve to be
&RFC7323; word-aligned. In either case, however, the SYN versions of
&RFC1918; MP_CAPABLE (12 bytes) and MP_JOIN (12 or 16 bytes) will fit in this remain
&RFC2018; ing space.</t>
&RFC5681; <t pn="section-appendix.a-4">Note that due to the use of a 64-bit data-lev
&RFC2979; el sequence space, it is
&RFC2992; feasible that MPTCP will not require the timestamp option for
&RFC3022; protection against wrapped sequence numbers (per the Protection
&RFC3135; Against Wrapped Sequences (PAWS) mechanism, as described in <xref target="
&RFC4086; RFC7323" format="default" sectionFormat="of" derivedContent="RFC7323"/>), since
&RFC4987; the data-level sequence space has far less
&RFC8126; chance of wrapping. Confirmation of the validity of this optimization is
&RFC6181; left for further study.</t>
&RFC6356; <t pn="section-appendix.a-5">TCP data packets typically carry timestamp op
&RFC6897; tions in every packet,
&RFC6182; taking 10 bytes (or 12, with padding). That leaves 30 bytes (or 28, if
&RFC6528; word-aligned). The DSS option varies in length, depending on (1) whether t
&RFC7413; he
&RFC7430; Data Sequence Mapping, DATA_ACK, or both are included, (2) whether the
&RFC8041; sequence numbers in use are 4 or 8 octets, and (3) whether the
checksum is present. The maximum size of the DSS option is 28 bytes, so ev
<!-- &TCPLO; draft-ananth-tcpm-tcpoptext-00; Expired--> en that will fit in the available space. But unless a connection is both bidirec
tional and high-bandwidth, it is unlikely that all that option space will be req
<reference anchor='TCPLO'> uired on each DSS option.</t>
<front> <t pn="section-appendix.a-6">Within the DSS option, it is not necessary to
<title>TCP option space extension</title> include the Data Sequence Mapping and DATA_ACK in each packet, and in many case
s it may be possible to alternate their presence (so long as the mapping covers
<author initials='A' surname='Ramaiah' fullname='Anantha Ramaiah'> the data being sent in the subsequent packet). It would also be possible to alte
<organization /> rnate between 4-byte and 8-byte sequence numbers in each option.</t>
</author> <t pn="section-appendix.a-7">On subflow and connection setup, an MPTCP opt
ion is also set on the third packet (an ACK). These are 20 bytes (for MP_CAPABLE
<date month='March' day='26' year='2012' /> ) and 24 bytes (for MP_JOIN), both of which will fit in the available option spa
ce.</t>
<abstract><t>The document goals are as follows: Firstly, this document summarize <t pn="section-appendix.a-8">Pure ACKs in TCP typically contain only times
s the motivations for extending TCP option space. Secondly, It tries to summari tamps (10 bytes). Here, Multipath TCP typically
ze the various known issues that needs to be taken into account while extending
the TCP option space. Thirdly, it briefly provides a short summary of the vario
us TCP option space proposals that has been proposed so far. Some additional pr
oposals which includes variations to the existing proposals are also presented.
The goal of this document is to rejuvenate the discussions on this topic and eve
ntually to converge on a scheme for extending TCP option space.</t></abstract>
</front>
<seriesInfo name='Work in' value='Progress' />
</reference>
<reference anchor='norm' target="http://www.usenix.org/events/sec01/full_papers/
handley/handley.pdf"><front><title abbrev="Network Intrusion Detection: Evasion,
Traffic Normalization, and End-to-End Protocol Semantics ">Network Intrusion De
tection: Evasion, Traffic Normalization, and End-to-End Protocol Semantics</titl
e><author initials='M.' surname='Handley' fullname='Mark Handley'><organization>
ACIRI</organization></author><author initials='V.' surname='Paxson' fullname='Ve
rn Paxson'><organization>ACIRI</organization></author><author initials='C.' surn
ame='Kreibich' fullname='Christian Kreibich'><organization>Technische Universita
t Munchen</organization></author><date year="2001"/></front><seriesInfo name="Us
enix Security" value="2001"/></reference>
<reference anchor='howhard' target="https://www.usenix.org/conference/nsdi12/how
-hard-can-it-be-designing-and-implementing-deployable-multipath-tcp">
<front><title abbrev="How Hard Can It Be? Designing and Implementing a Deployabl
e Multipath TCP">How Hard Can It Be? Designing and Implementing a Deployable Mul
tipath TCP</title>
<author initials='C.' surname='Raiciu' fullname='Costin Raiciu'><organization>Un
iversitatea Politehnica Bucuresti</organization></author>
<author initials='C.' surname='Paasch' fullname='Christoph Paasch'><organization
>Universite Catholique de Louvain</organization></author>
<author initials='S.' surname='Barre' fullname='Sebastien Barre'><organization>U
niversite Catholique de Louvain</organization></author>
<author initials='A.' surname='Ford' fullname='Alan Ford'><organization/></autho
r>
<author initials='M.' surname='Honda' fullname='Michio Honda'><organization>Keio
University</organization></author>
<author initials='F.' surname='Duchene' fullname='Fabien Duchene'><organization>
Universite Catholique de Louvain</organization></author>
<author initials='O.' surname='Bonaventure' fullname='Olivier Bonaventure'><orga
nization>Universite Catholique de Louvain</organization></author>
<author initials='M.' surname='Handley' fullname='Mark Handley'><organization>Un
iversity College London</organization></author>
<date year="2012" />
</front>
<seriesInfo name="Usenix Symposium on Networked Systems Design and Implementatio
n" value="2012"/>
</reference>
<reference anchor='deployments' target="https://www.ietfjournal.org/multipath-tc
p-deployments/"><front><title abbrev="MPTCP Deployments">Multipath TCP Deploymen
ts</title><author initials='O.' surname='Bonaventure' fullname='Olivier Bonavent
ure'><organization>Universite Catholique de Louvain</organization></author><auth
or initials='S.' surname='Seo' fullname='SungHoon Seo'></author><date day="1" mo
nth="November" year="2016"/></front><seriesInfo name="IETF Journal" value="2016"
/></reference>
</references>
<section title="Notes on Use of TCP Options" anchor="app_options">
<t>The TCP option space is limited due to the length of the Data Offset fi
eld in the TCP header (4 bits), which defines the TCP header length in 32-bit wo
rds. With the standard TCP header being 20 bytes, this leaves a maximum of 40 by
tes for options, and many of these may already be used by options such as timest
amp and SACK.</t>
<t>We have performed a brief study on the commonly used TCP options in SYN
, data, and pure ACK packets, and found that there is enough room to fit all the
options we propose using in this document.</t>
<t>SYN packets typically include Maximum Segment Size (MSS) (4 bytes), win
dow scale (3 bytes), SACK permitted (2 bytes), and timestamp (10 bytes) options.
Together these sum to 19 bytes. Some operating systems appear to pad each optio
n up to a word boundary, thus using 24 bytes (a brief survey suggests Windows XP
and Mac OS X do this, whereas Linux does not).
Optimistically, therefore, we have 21 bytes spare, or 16 if it has to be w
ord-aligned. In either case, however, the SYN versions of Multipath Capable (12
bytes) and Join (12 or 16 bytes) options will fit in this remaining space.</t>
<t>Note that due to the use of a 64-bit data-level sequence space, it is f
easible that MPTCP will not require the timestamp option for protection against
wrapped sequence numbers (PAWS <xref target="RFC7323"/>), since the data-level s
equence space has far less chance of wrapping. Confirmation of the validity of t
his optimisation is for further study.</t>
<t>TCP data packets typically carry timestamp options in every packet, tak
ing 10 bytes (or 12 with padding). That leaves 30 bytes (or 28, if word-aligned)
. The Data Sequence Signal (DSS) option varies in length depending on whether th
e data sequence mapping and DATA_ACK are included, and whether the sequence numb
ers in use are 4 or 8 octets. The maximum size of the DSS option is 28 bytes, so
even that will fit in the available space. But unless a connection is both bidi
rectional and high-bandwidth, it is unlikely that all that option space will be
required on each DSS option.</t>
<t>Within the DSS option, it is not necessary to include the data sequence
mapping and DATA_ACK in each packet, and in many cases it may be possible to al
ternate their presence (so long as the mapping covers the data being sent in the
following packet). It would also be possible to alternate between 4- and 8-byte
sequence numbers in each option.</t>
<t>On subflow and connection setup, an MPTCP option is also set on the thi
rd packet (an ACK). These are 20 bytes (for Multipath Capable) and 24 bytes (for
Join), both of which will fit in the available option space.</t>
<t>Pure ACKs in TCP typically contain only timestamps (10 bytes). Here, Mu
ltipath TCP typically
needs to encode only the DATA_ACK (maximum of 12 bytes). Occasionally, ACKs will contain SACK information. Depending needs to encode only the DATA_ACK (maximum of 12 bytes). Occasionally, ACKs will contain SACK information. Depending
on the number of lost packets, SACK may utilize the entire option space. If a DA TA_ACK had to be on the number of lost packets, SACK may utilize the entire option space. If a DA TA_ACK had to be
included, then it is probably necessary to reduce the number of SACK blocks to a ccommodate the included, then it is probably necessary to reduce the number of SACK blocks to a ccommodate the
DATA_ACK. However, the presence of the DATA_ACK is unlikely to be necessary in a case where SACK is DATA_ACK. However, the presence of the DATA_ACK is unlikely to be necessary in a case where SACK is
in use, since until at least some of the SACK blocks have been retransmitted, th e cumulative in use, since until at least some of the SACK blocks have been retransmitted, th e cumulative
data-level ACK will not be moving forward (or if it does, due to retransmissions on another path, data-level ACK will not be moving forward (or if it does, due to retransmissions on another path,
then that path can also be used to transmit the new DATA_ACK).</t> then that path can also be used to transmit the new DATA_ACK).</t>
<t pn="section-appendix.a-9">The ADD_ADDR option can be between 16 and 30
<t>The ADD_ADDR option can be between 16 and 30 bytes, depending on whethe bytes, depending on
r IPv4 or IPv6 is used, and whether or not the port number is present. It is unl (1) whether IPv4 or IPv6 is used and (2) whether or not the port number is
ikely that such signaling would fit in a data packet (although if there is space present. It is unlikely that such signaling would fit in a data packet
, it is fine to include it). It is recommended to use duplicate ACKs with no oth (although if there is space, it is fine to include it). It is
er payload or options in order to transmit these rare signals. Note this is the recommended that duplicate ACKs not be used with any other payload or opti
reason for mandating that duplicate ACKs with MPTCP options are not taken as a s ons, in
ignal of congestion.</t> order to transmit these rare signals. Note that this is the reason for
mandating that duplicate ACKs with MPTCP options not be taken as a signal
of congestion.</t>
</section> </section>
<section anchor="app_tfo" numbered="true" toc="include" removeInRFC="false"
<section title="TCP Fast Open and MPTCP" anchor="app_tfo"> pn="section-appendix.b">
<t>TCP Fast Open (TFO) is an experimental TCP extension, described in <name slugifiedName="name-tcp-fast-open-and-mptcp">TCP Fast Open and MPTCP
<xref target="RFC7413"/>, which has been introduced to allow sending data </name>
<t pn="section-appendix.b-1">TCP Fast Open (TFO) is an experimental TCP ex
tension, described in
<xref target="RFC7413" format="default" sectionFormat="of" derivedContent=
"RFC7413"/>, which has been introduced to
allow the sending of data
one RTT earlier than with regular TCP. This is one RTT earlier than with regular TCP. This is
considered a valuable gain as very short connections are very common, considered a valuable gain, as very short connections are very common,
especially for HTTP request/response schemes. It achieves this by sending especially for HTTP request/response schemes. It achieves this by sending
the SYN-segment together with the application's data and allowing the list the SYN segment together with the application's data and allowing the list
ener to reply ener to reply
immediately with data after the SYN/ACK. <xref target="RFC7413"/> secures immediately with data after the SYN/ACK. <xref target="RFC7413" format="de
this mechanism, by using a new TCP option that includes a cookie which fault" sectionFormat="of" derivedContent="RFC7413"/> secures
this mechanism by using a new TCP option that includes a cookie that
is negotiated in a preceding connection.</t> is negotiated in a preceding connection.</t>
<t pn="section-appendix.b-2">When using TFO in conjunction with MPTCP, the
re are two key
points to take into account, as detailed below.</t>
<section anchor="tfocookie" numbered="true" toc="include" removeInRFC="fal
se" pn="section-b.1">
<name slugifiedName="name-tfo-cookie-request-with-mpt">TFO Cookie Reques
t with MPTCP</name>
<t pn="section-b.1-1">When a TFO initiator first connects to a listener,
it cannot immediately
include data in the SYN for security reasons <xref target="RFC7413" fo
rmat="default" sectionFormat="of" derivedContent="RFC7413"/>.
Instead, it requests a cookie that will be used in subsequent
connections. This is done with the TCP cookie request/response options
,
of 2 bytes and 6-18 bytes, respectively (depending on the chosen cooki
e length).</t>
<t pn="section-b.1-2">TFO and MPTCP can be combined, provided that the t
otal length of all the
options does not exceed the maximum 40 bytes possible in TCP:
<t>When using TCP Fast Open in conjunction with MPTCP, there are two key </t>
points to take into account, detailed hereafter.</t> <ul spacing="normal" bare="false" empty="false" pn="section-b.1-3">
<li pn="section-b.1-3.1">In the SYN: MPTCP uses a 4-byte MP_CAPABLE op
<section title="TFO cookie request with MPTCP" anchor="tfocookie"> tion. The sum
<t>When a TFO initiator first connects to a listener, it cannot immedia of the MPTCP and TFO options is 6 bytes. With typical TCP options usin
tely g up
include data in the SYN for security reasons <xref target="RFC7413"/>. to 19 bytes in the SYN (24 bytes if options are padded at a word bound
Instead, it requests a cookie that will be used in subsequent ary),
connections. This is done with the TCP cookie request/response options, there is enough space to combine the MP_CAPABLE with the TFO cookie re
of respectively 2 bytes and 6-18 bytes (depending on the chosen cookie quest.</li>
length).</t> <li pn="section-b.1-3.2">In the SYN + ACK: MPTCP uses a 12-byte MP_CAP
ABLE option, but
<t>TFO and MPTCP can be combined provided that the total length of all now the TFO option can be as long as 18 bytes. Since the maximum optio
the n length
options does not exceed the maximum 40 bytes possible in TCP: may be exceeded, it is up to the listener to avoid this problem by usi
ng a
<list style="symbols"> shorter cookie.
<t>In the SYN: MPTCP uses a 4-bytes long MP_CAPABLE option. The MPTCP As an example, if we consider that 19 bytes are used for classical
and TFO options sum up to 6 bytes. With typical TCP-options using up TCP options, the maximum possible cookie length would be
to 19 bytes in the SYN (24 bytes if options are padded at a word bounda 7 bytes. Note that, for the SYN packet, the same limitation applies to
ry), subsequent
there is enough space to combine the MP_CAPABLE with the TFO Cookie Req connections (because the initiator then echoes
uest.</t> the cookie back to the listener). Finally, if the security impact of r
educing
<t>In the SYN+ACK: MPTCP uses a 12-bytes long MP_CAPABLE option, but the cookie size is not deemed acceptable, the listener can reduce the
now TFO can be as long as 18 bytes. Since the maximum option length amount of space used by other TCP options by omitting the TCP timestam
may be exceeded, it is up to the listener to solve this by using a ps (as
shorter cookie. outlined in <xref target="app_options" format="default" sectionFormat=
As an example, if we consider that 19 bytes are used for classical "of" derivedContent="Appendix A"/>).</li>
TCP options, the maximum possible cookie length would be </ul>
of 7 bytes. Note that the same limitation applies to subsequent </section>
connections, for the SYN packet (because the initiator then echoes back <section anchor="tfodata" numbered="true" toc="include" removeInRFC="false
the cookie to the listener). Finally, if the security impact of reducin " pn="section-b.2">
g <name slugifiedName="name-data-sequence-mapping-under">Data Sequence Map
the cookie size is not deemed acceptable, the listener can reduce the ping under TFO</name>
amount of other TCP-options by omitting the TCP timestamps (as <t pn="section-b.2-1">In the TCP establishment phase, MPTCP uses a key e
outlined in <xref target="app_options"/>).</t> xchange that is
</list></t> used to generate the Initial Data Sequence Numbers (IDSNs). In particu
</section> lar,
the SYN with MP_CAPABLE occupies the first octet of data sequence
<section title="Data sequence mapping under TFO" anchor="tfodata"> space. With TFO, one way to handle the data sent together with the SYN
<t>MPTCP uses, in the TCP establishment phase, a key exchange that is would be to consider an implicit DSS mapping that covers that SYN segm
used to generate the Initial Data Sequence Numbers (IDSNs). In particul ent
ar, (since there is not enough space in the SYN to include a DSS option).
the SYN with MP_CAPABLE occupies the first octet of the data sequence The problem with that approach is that if a middlebox modifies the TFO
space. With TFO, one way to handle the data sent together with the SYN data, this will not be noticed by MPTCP because of the absence of a
would be to consider an implicit DSS mapping that covers that SYN segme DSS checksum. For example, a TCP‑aware (but not MPTCP-aware) middlebox
nt could
(since there is not enough space in the SYN to include a DSS option). insert bytes at the beginning of the stream and adapt the TCP checksum
The problem with that approach is that if a middlebox modifies the TFO and sequence numbers accordingly. With an implicit mapping, this infor
data, this will not be noticed by MPTCP because of the absence of a mation would
DSS-checksum. For example, a TCP (but not MPTCP)-aware middlebox could give to the initiator and listener a different view of the DSS
insert bytes at the beginning of the stream and adapt the TCP checksum mapping; there would be no
and sequence numbers accordingly. With an implicit mapping, this would way to detect this inconsistency, because the DSS checksum is not pres
give to initiator and listener a different view on the DSS-mapping, wit ent.</t>
h no <t pn="section-b.2-2">To solve this issue, the TFO data must not be cons
way to detect this inconsistency as the DSS checksum is not present.</t idered part of the
> data sequence number space: the SYN with MP_CAPABLE still occupies
the first octet of data sequence space, but then the first non-TFO
<t>To solve this, the TFO data must not be considered part of the data byte occupies the second octet. This guarantees that, if the
Data Sequence Number space: the SYN with MP_CAPABLE still occupies use of the DSS checksum is negotiated, all data in the data sequence
the first octet of data sequence space, but then the first non-TFO number space is checksummed. We also note that this does not entail
data byte occupies the second octet. This guarantees that, if the a loss of functionality, because TFO data is always only sent on the
use of DSS-checksum is negotiated, all data in the data sequence initial subflow, before any attempt to create additional subflows.</t>
number space is checksummed. We also note that this does not entail </section>
a loss of functionality, because TFO-data is always only sent on the <section anchor="tfoexamples" numbered="true" toc="include" removeInRFC="f
initial subflow before any attempt to create additional subflows.</t> alse" pn="section-b.3">
</section> <name slugifiedName="name-connection-establishment-ex">Connection Establ
ishment Examples</name>
<section title="Connection establishment examples" anchor="tfoexamples"> <t pn="section-b.3-1">A few examples of possible "TFO + MPTCP"
<t>The following shows a few examples of possible TFO+MPTCP establishment scenarios are shown below.</t>
establishment scenarios.</t> <t pn="section-b.3-2">Before an initiator can send data together with th
e SYN, it must request
<t>Before an initiator can send data together with the SYN, it must re a cookie from the listener, as shown in <xref target="fig_tfocookie" f
quest ormat="default" sectionFormat="of" derivedContent="Figure 18"/>. (Note: The se
a cookie to the listener, as shown in <xref target="fig_tfocookie"/>. quence number
This is done by simply combining the TFO and MPTCP options.</t> and length are annotated in <xref target="fig_tfocookie" format="default" sectio
nFormat="of" derivedContent="Figure 18"/> as
<figure align="center" anchor="fig_tfocookie" title="Cookie request - Seq(Length) (e.g., "S. 0(0)") and used as such in the subsequent figures
sequence number and length are annotated as Seq(Length) and used hereafter in th (e.g., "S  0(20)" in <xref target="fig_tfodata" format="default" section
e figures."> Format="of" derivedContent="Figure 19"/>).) This is done by simply combining the
<artwork align="left"><![CDATA[ TFO and MPTCP options.</t>
initiator listener <figure anchor="fig_tfocookie" align="left" suppress-title="false" pn="f
| | igure-18">
| S Seq=0(Length=0) <MP_CAPABLE>, <TFO cookie request> | <name slugifiedName="name-cookie-request">Cookie Request</name>
| -----------------------------------------------------------> | <artwork align="left" name="" type="" alt="" pn="section-b.3-3.1">
| | initiator listener
| S. 0(0) ack 1 <MP_CAPABLE>, <TFO cookie> | | |
| <----------------------------------------------------------- | | S Seq=0(Length=0) &lt;MP_CAPABLE&gt;, &lt;TFO cookie request&gt; |
| | | --------------------------------------------------------&gt; |
| . 0(0) ack 1 <MP_CAPABLE> | | |
| -----------------------------------------------------------> | | S. 0(0) ack 1 &lt;MP_CAPABLE&gt;, &lt;TFO cookie&gt; |
| | | &lt;-------------------------------------------------------- |
]]></artwork> | |
</figure> | . 0(0) ack 1 &lt;MP_CAPABLE&gt; |
| --------------------------------------------------------&gt; |
<t>Once this is done, the received cookie can be used for TFO, as show | | </artwork>
n </figure>
in <xref target="fig_tfodata"/>. In this example, the initiator first <t pn="section-b.3-4">Once this is done, the received cookie can be used
sends 20 bytes in the SYN. The listener immediately replies with 100 by for TFO, as shown
tes in <xref target="fig_tfodata" format="default" sectionFormat="of" deri
following the SYN-ACK upon which the initiator replies with 20 more byt vedContent="Figure 19"/>. In this example, the initiator first
es. sends 20 bytes in the SYN. The listener immediately replies with 100 b
Note that the last segment in the figure ytes
following the SYN-ACK, to which the initiator replies with 20 more byt
es.
Note that the last segment in the figure
has a TCP sequence number of 21, while the DSS subflow sequence has a TCP sequence number of 21, while the DSS subflow sequence
number is 1 (because the TFO data is not part of the data sequence number is 1 (because the TFO data is not part of the data sequence
number space, as explained in Section <xref target="tfodata"/>.</t> number space, as explained in <xref target="tfodata" format="default"
sectionFormat="of" derivedContent="Appendix B.2"/>.</t>
<figure align="center" anchor="fig_tfodata" title="The listener support <figure anchor="fig_tfodata" align="left" suppress-title="false" pn="fig
s TFO"> ure-19">
<artwork align="left"><![CDATA[ <name slugifiedName="name-the-listener-supports-tfo">The Listener Supp
initiator listener orts TFO</name>
| | <artwork align="left" name="" type="" alt="" pn="section-b.3-5.1">
| S 0(20) <MP_CAPABLE>, <TFO cookie> | initiator listener
| -----------------------------------------------------------> | | |
| | | S 0(20) &lt;MP_CAPABLE&gt;, &lt;TFO cookie&gt; |
| S. 0(0) ack 21 <MP_CAPABLE> | | --------------------------------------------------------&gt; |
| <----------------------------------------------------------- | | |
| | | S. 0(0) ack 21 &lt;MP_CAPABLE&gt; |
| . 1(100) ack 21 <DSS ack=1 seq=1 ssn=1 dlen=100> | | &lt;-------------------------------------------------------- |
| <----------------------------------------------------------- | | |
| | | . 1(100) ack 21 &lt;DSS ack=1 seq=1 ssn=1 dlen=100&gt; |
| . 21(0) ack 1 <MP_CAPABLE> | | &lt;-------------------------------------------------------- |
| -----------------------------------------------------------> | | |
| | | . 21(0) ack 1 &lt;MP_CAPABLE&gt; |
| . 21(20) ack 101 <DSS ack=101 seq=1 ssn=1 dlen=20> | | --------------------------------------------------------&gt; |
| -----------------------------------------------------------> | | |
| | | . 21(20) ack 101 &lt;DSS ack=101 seq=1 ssn=1 dlen=20&gt; |
]]></artwork> | --------------------------------------------------------&gt; |
</figure> | | </artwork>
</figure>
<t>In <xref target="fig_tfofallback"/>, the listener does not support <t pn="section-b.3-6">In <xref target="fig_tfofallback" format="default"
TFO. The initiator detects sectionFormat="of" derivedContent="Figure 20"/>, the listener does not support
that no state is created in the listener (as no data is acked), and no TFO. The initiator detects
w that no state is created in the listener (as no data is ACKed) and now
sends the MP_CAPABLE in the third ack, in order for the listener to sends the MP_CAPABLE in the third packet, in order for the listener to
build its MPTCP context at then end of the establishment. Now, the build its MPTCP context at the end of the establishment. Now, the
tfo data, retransmitted, becomes part of the data sequence mapping TFO data, when retransmitted, becomes part of the Data Sequence Mappin
because it is effectively sent (in fact re-sent) after the g
because it is effectively sent (in fact re‑sent) after the
establishment.</t> establishment.</t>
<figure anchor="fig_tfofallback" align="left" suppress-title="false" pn=
<figure align="center" anchor="fig_tfofallback" title="The listener doe "figure-20">
s not support TFO"> <name slugifiedName="name-the-listener-does-not-suppo">The Listener Do
<artwork align="left"><![CDATA[ es Not Support TFO</name>
initiator listener <artwork align="left" name="" type="" alt="" pn="section-b.3-7.1">
| | initiator listener
| S 0(20) <MP_CAPABLE>, <TFO cookie> | | |
| -----------------------------------------------------------> | | S 0(20) &lt;MP_CAPABLE&gt;, &lt;TFO cookie&gt; |
| | | --------------------------------------------------------&gt; |
| S. 0(0) ack 1 <MP_CAPABLE> | | |
| <----------------------------------------------------------- | | S. 0(0) ack 1 &lt;MP_CAPABLE&gt; |
| | | &lt;-------------------------------------------------------- |
| . 1(0) ack 1 <MP_CAPABLE> | | |
| -----------------------------------------------------------> | | . 1(0) ack 1 &lt;MP_CAPABLE&gt; |
| | | --------------------------------------------------------&gt; |
| . 1(20) ack 1 <DSS ack=1 seq=1 ssn=1 dlen=20> | | |
| -----------------------------------------------------------> | | . 1(20) ack 1 &lt;DSS ack=1 seq=1 ssn=1 dlen=20&gt; |
| | | --------------------------------------------------------&gt; |
| . 0(0) ack 21 <DSS ack=21 seq=1 ssn=1 dlen=0> | | |
| <----------------------------------------------------------- | | . 0(0) ack 21 &lt;DSS ack=21 seq=1 ssn=1 dlen=0&gt; |
| | | &lt;-------------------------------------------------------- |
]]></artwork> | | </artwork>
</figure> </figure>
<t pn="section-b.3-8">It is also possible that the listener acknowledges
<t>It is also possible that the listener acknowledges only part of the only part of the TFO
TFO data, as illustrated in <xref target="fig_tfopartial" format="default"
data, as illustrated in <xref target="fig_tfopartial"/>. The sectionFormat="of" derivedContent="Figure 21"/>. The
initiator will simply retransmit the missing data together with a DSS-m initiator will simply retransmit the missing data together with a
apping.</t> DSS mapping.</t>
<figure anchor="fig_tfopartial" align="left" suppress-title="false" pn="
<figure align="center" anchor="fig_tfopartial" title="Partial data ackn figure-21">
owledgement"> <name slugifiedName="name-partial-data-acknowledgment">Partial Data Ac
<artwork align="left"><![CDATA[ knowledgment</name>
initiator listener <artwork align="left" name="" type="" alt="" pn="section-b.3-9.1">
| | initiator listener
| S 0(1000) <MP_CAPABLE>, <TFO cookie> | | |
| -----------------------------------------------------------> | | S 0(1000) &lt;MP_CAPABLE&gt;, &lt;TFO cookie&gt; |
| | | --------------------------------------------------------&gt; |
| S. 0(0) ack 501 <MP_CAPABLE> | | |
| <----------------------------------------------------------- | | S. 0(0) ack 501 &lt;MP_CAPABLE&gt; |
| | | &lt;-------------------------------------------------------- |
| . 501(0) ack 1 <MP_CAPABLE> | | |
| -----------------------------------------------------------> | | . 501(0) ack 1 &lt;MP_CAPABLE&gt; |
| | | --------------------------------------------------------&gt; |
| . 501(500) ack 1 <DSS ack=1 seq=1 ssn=1 dlen=500> | | |
| -----------------------------------------------------------> | | . 501(500) ack 1 &lt;DSS ack=1 seq=1 ssn=1 dlen=500&gt; |
| | | --------------------------------------------------------&gt; |
]]></artwork> | | </artwork>
</figure> </figure>
</section> </section>
</section> </section>
<section anchor="app_tcb" numbered="true" toc="include" removeInRFC="false"
<section title="Control Blocks" anchor="app_tcb"> pn="section-appendix.c">
<t>Conceptually, an MPTCP connection can be represented as an MPTCP protocol con <name slugifiedName="name-control-blocks">Control Blocks</name>
trol <t pn="section-appendix.c-1">Conceptually, an MPTCP connection can be repr
esented as an MPTCP protocol control
block (PCB) that contains several variables that track the progress and the block (PCB) that contains several variables that track the progress and the
state of the MPTCP connection and a set of linked TCP control blocks state of the MPTCP connection and a set of linked TCP control blocks
that correspond to the subflows that have been established.</t> that correspond to the subflows that have been established.</t>
<t pn="section-appendix.c-2">RFC 793 <xref target="RFC0793" format="defaul
<t>RFC 793 <xref target="RFC0793"/> specifies several state variables. Whenever t" sectionFormat="of" derivedContent="RFC0793"/> specifies several state variabl
possible, we reuse es. Whenever possible, we reuse
the same terminology as RFC 793 to describe the state variables that are the same terminology as RFC 793 to describe the state variables that are
maintained by MPTCP.</t> maintained by MPTCP.</t>
<section numbered="true" toc="include" removeInRFC="false" pn="section-c.1
<section title="MPTCP Control Block"> ">
<t>The MPTCP control block contains the following variable per connection.</t> <name slugifiedName="name-mptcp-control-block">MPTCP Control Block</name
>
<section title="Authentication and Metadata"> <t pn="section-c.1-1">The MPTCP control block contains the following var
<t><list style="hanging"> iables per connection.</t>
<t hangText="Local.Token (32 bits):"> This is the token chosen by the local host <section numbered="true" toc="include" removeInRFC="false" pn="section-c
on .1.1">
<name slugifiedName="name-authentication-and-metadata">Authentication
and Metadata</name>
<dl newline="false" spacing="normal" indent="3" pn="section-c.1.1-1">
<dt pn="section-c.1.1-1.1">Local.Token (32 bits):</dt>
<dd pn="section-c.1.1-1.2"> This is the token chosen by the local ho
st on
this MPTCP connection. The token must be unique among all established this MPTCP connection. The token must be unique among all established
MPTCP connections, and is generated from the local key.</t> MPTCP connections and is generated from the local key.</dd>
<t hangText="Local.Key (64 bits):"> This is the key sent by the local host on th <dt pn="section-c.1.1-1.3">Local.Key (64 bits):</dt>
is <dd pn="section-c.1.1-1.4"> This is the key sent by the local host o
MPTCP connection.</t> n this
<t hangText="Remote.Token (32 bits):"> This is the token chosen by the remote ho MPTCP connection.</dd>
st on <dt pn="section-c.1.1-1.5">Remote.Token (32 bits):</dt>
this MPTCP connection, generated from the remote key.</t> <dd pn="section-c.1.1-1.6"> This is the token chosen by the remote h
<t hangText="Remote.Key (64 bits):"> This is the key chosen by the remote host o ost on
n this MPTCP connection, generated from the remote key.</dd>
this MPTCP connection</t> <dt pn="section-c.1.1-1.7">Remote.Key (64 bits):</dt>
<t hangText="MPTCP.Checksum (flag):"> This flag is set to true if at least one o <dd pn="section-c.1.1-1.8"> This is the key chosen by the remote hos
f the t on
hosts has set the A bit in the MP_CAPABLE options exchanged during connection es this MPTCP connection.</dd>
tablishment, <dt pn="section-c.1.1-1.9">MPTCP.Checksum (flag):</dt>
and is set to false otherwise. If this flag is set, the checksum must be comput <dd pn="section-c.1.1-1.10"> This flag is set to true if at least on
ed in e of the
all DSS options.</t> hosts has set the "A" bit in the MP_CAPABLE options exchanged during
</list></t> connection establishment; otherwise,
</section> it is set to false. If this flag is set, the checksum must be computed in
all DSS options.</dd>
<section title="Sending Side"> </dl>
<t><list style="hanging"> </section>
<t hangText="SND.UNA (64 bits):"> This is the data sequence number of the next b <section numbered="true" toc="include" removeInRFC="false" pn="section-c
yte to be .1.2">
<name slugifiedName="name-sending-side">Sending Side</name>
<dl newline="false" spacing="normal" indent="3" pn="section-c.1.2-1">
<dt pn="section-c.1.2-1.1">SND.UNA (64 bits):</dt>
<dd pn="section-c.1.2-1.2"> This is the data sequence number of the
next byte to be
acknowledged, at the MPTCP connection level. This variable is updated acknowledged, at the MPTCP connection level. This variable is updated
upon reception of a DSS option containing a DATA_ACK.</t> upon reception of a DSS option containing a DATA_ACK.</dd>
<t hangText="SND.NXT (64 bits):"> This is the data sequence number of the next b <dt pn="section-c.1.2-1.3">SND.NXT (64 bits):</dt>
yte to be <dd pn="section-c.1.2-1.4"> This is the data sequence number of the
sent. SND.NXT is used to determine the value of the DSN in the DSS option.</t> next byte to be
<t hangText="SND.WND (32 bits with RFC 7323, 16 bits otherwise):"> This is the s sent. SND.NXT is used to determine the value of the DSN in the DSS option.</dd>
ending window. MPTCP <dt pn="section-c.1.2-1.5">SND.WND (32 bits):</dt>
maintains the sending window at the MPTCP connection level and the same <dd pn="section-c.1.2-1.6"> This is the send window.  32 bits if the
window is shared by all subflows. All subflows use the MPTCP connection features in RFC
level SND.WND to compute the SEQ.WND value that is sent in each 7323 are used; 16 bits otherwise. MPTCP maintains the send window at
transmitted segment.</t> the MPTCP connection level, and the same
</list></t> window is shared by all subflows. All subflows use the MPTCP connection-level
</section> SND.WND to compute the SEQ.WND value that is sent in each
transmitted segment.</dd>
<section title="Receiving Side"> </dl>
<t><list style="hanging"> </section>
<t hangText="RCV.NXT (64 bits):"> This is the data sequence number of the next b <section numbered="true" toc="include" removeInRFC="false" pn="section-c
yte that .1.3">
<name slugifiedName="name-receiving-side">Receiving Side</name>
<dl newline="false" spacing="normal" indent="3" pn="section-c.1.3-1">
<dt pn="section-c.1.3-1.1">RCV.NXT (64 bits):</dt>
<dd pn="section-c.1.3-1.2"> This is the data sequence number of the
next byte that
is expected on the MPTCP connection. This state variable is modified is expected on the MPTCP connection. This state variable is modified
upon reception of in-order data. The value of RCV.NXT is used to specify upon reception of in-order data. The value of RCV.NXT is used to specify
the DATA_ACK that is sent in the DSS option on all subflows.</t> the DATA_ACK that is sent in the DSS option on all subflows.</dd>
<t hangText="RCV.WND (32 bits with RFC 7323, 16 bits otherwise):"> This is the c <dt pn="section-c.1.3-1.3">RCV.WND (32 bits):</dt>
onnection-level <dd pn="section-c.1.3-1.4"> This is the connection-level receive win
receive window, which is the maximum of the RCV.WND on all the subflows.</t> dow, which is the
</list></t> maximum of the RCV.WND on all the subflows.  32 bits if the features
</section> in RFC 7323 are used; 16 bits otherwise.</dd>
</section> </dl>
</section>
<section title="TCP Control Blocks"> </section>
<t>The MPTCP control block also contains a list of the TCP control blocks <section numbered="true" toc="include" removeInRFC="false" pn="section-c.2
">
<name slugifiedName="name-tcp-control-blocks">TCP Control Blocks</name>
<t pn="section-c.2-1">The MPTCP control block also contains a list of th
e TCP control blocks
that are associated with the MPTCP connection.</t> that are associated with the MPTCP connection.</t>
<t pn="section-c.2-2">Note that the TCP control block on the TCP subflow
<t>Note that the TCP control block on the TCP subflows does not contain the s does not contain the
RCV.WND and SND.WND state variables as these are maintained at the MPTCP RCV.WND and SND.WND state variables, as these are maintained at the MPTCP
connection level and not at the subflow level.</t> connection level and not at the subflow level.</t>
<t pn="section-c.2-3">Inside each TCP control block, the following state
<t>Inside each TCP control block, the following state variables are defined.</t> variables are defined.</t>
<section numbered="true" toc="include" removeInRFC="false" pn="section-c
<section title="Sending Side"> .2.1">
<t><list style="hanging"> <name slugifiedName="name-sending-side-2">Sending Side</name>
<t hangText="SND.UNA (32 bits):"> This is the sequence number of the next byte t <dl newline="false" spacing="normal" indent="3" pn="section-c.2.1-1">
o be <dt pn="section-c.2.1-1.1">SND.UNA (32 bits):</dt>
<dd pn="section-c.2.1-1.2"> This is the sequence number of the next
byte to be
acknowledged on the subflow. This variable is updated upon reception of acknowledged on the subflow. This variable is updated upon reception of
each TCP acknowledgment on the subflow.</t> each TCP acknowledgment on the subflow.</dd>
<t hangText="SND.NXT (32 bits):"> This is the sequence number of the next byte t <dt pn="section-c.2.1-1.3">SND.NXT (32 bits):</dt>
o be <dd pn="section-c.2.1-1.4"> This is the sequence number of the next
byte to be
sent on the subflow. SND.NXT is used to set the value of SEG.SEQ upon sent on the subflow. SND.NXT is used to set the value of SEG.SEQ upon
transmission of the next segment.</t> transmission of the next segment.</dd>
</list></t> </dl>
</section> </section>
<section numbered="true" toc="include" removeInRFC="false" pn="section-c
<section title="Receiving Side"> .2.2">
<t><list style="hanging"> <name slugifiedName="name-receiving-side-2">Receiving Side</name>
<t hangText="RCV.NXT (32 bits):"> This is the sequence number of the next byte t <dl newline="false" spacing="normal" indent="3" pn="section-c.2.2-1">
hat <dt pn="section-c.2.2-1.1">RCV.NXT (32 bits):</dt>
<dd pn="section-c.2.2-1.2"> This is the sequence number of the next
byte that
is expected on the subflow. This state variable is modified upon is expected on the subflow. This state variable is modified upon
reception of in-order segments. The value of RCV.NXT is copied to the reception of in-order segments. The value of RCV.NXT is copied to the
SEG.ACK field of the next segments transmitted on the subflow.</t> SEG.ACK field of the next segments transmitted on the subflow.</dd>
<t hangText="RCV.WND (32 bits with RFC 7323, 16 bits otherwise):"> This is the <dt pn="section-c.2.2-1.3">RCV.WND (32 bits):</dt>
subflow-level receive window that is updated with the window field from the <dd pn="section-c.2.2-1.4">This is the subflow-level receive window
segments received on this subflow.</t> that is updated with
</list></t> the window field from the segments received on this subflow.  32
</section> bits if the features in RFC 7323 are used; 16 bits otherwise.</dd>
</section> </dl>
</section>
</section> </section>
</section>
<section title="Finite State Machine" anchor="app_fsm"> <section anchor="app_fsm" numbered="true" toc="include" removeInRFC="false"
<t>The diagram in <xref target="fig_fsm"/> shows the Finite State Machine pn="section-appendix.d">
for connection-level closure. This illustrates how the DATA_FIN connection-leve <name slugifiedName="name-finite-state-machine">Finite State Machine</name
l signal (indicated in the diagram as the DFIN flag on a DATA_ACK) interacts wit >
h subflow-level FINs, and permits "break-before-make" handover between subflows. <t pn="section-appendix.d-1">The diagram in <xref target="fig_fsm" format=
</t> "default" sectionFormat="of" derivedContent="Figure 22"/> shows the
Finite State Machine for connection-level closure. This illustrates how
<figure align="center" anchor="fig_fsm" title="Finite State Machine for Co the DATA_FIN connection-level signal (indicated in the diagram as the
nnection Closure"> DFIN flag on a DATA_ACK) (1) interacts with subflow-level FINs and (2) per
<artwork align="left"><![CDATA[ mits break-before-make handover between subflows.</t>
+---------+ <figure anchor="fig_fsm" align="left" suppress-title="false" pn="figure-22
| M_ESTAB | ">
+---------+ <name slugifiedName="name-finite-state-machine-for-co">Finite State Mach
M_CLOSE | | rcv DATA_FIN ine for Connection Closure</name>
------- | | ------- <artwork align="left" name="" type="" alt="" pn="section-appendix.d-2.1"
+---------+ snd DATA_FIN / \ snd DATA_ACK[DFIN] +---------+ >
| M_FIN |<----------------- ------------------->| M_CLOSE | +---------+
| WAIT-1 |--------------------------- | WAIT | | M_ESTAB |
+---------+ rcv DATA_FIN \ +---------+ +---------+
| rcv DATA_ACK[DFIN] ------- | M_CLOSE | M_CLOSE | | rcv DATA_FIN
| -------------- snd DATA_ACK | ------- | ------- | | -------
| CLOSE all subflows | snd DATA_FIN | +---------+ snd DATA_FIN / \ snd DATA_ACK[DFIN] +-------+
V V V | M_FIN |&lt;----------------- -------------------&gt;|M_CLOSE|
+-----------+ +-----------+ +-----------+ | WAIT-1 |--------------------------- | WAIT |
|M_FINWAIT-2| | M_CLOSING | | M_LAST-ACK| +---------+ rcv DATA_FIN \ +-------+
+-----------+ +-----------+ +-----------+ | rcv DATA_ACK[DFIN] ------- | M_CLOSE |
| rcv DATA_ACK[DFIN] | rcv DATA_ACK[DFIN] | | -------------- snd DATA_ACK | ------- |
| rcv DATA_FIN -------------- | -------------- | | CLOSE all subflows | snd DATA_FIN |
| ------- CLOSE all subflows | CLOSE all subflows | V V V
| snd DATA_ACK[DFIN] V delete MPTCP PCB V +-----------+ +-----------+ +----------+
\ +-----------+ +---------+ |M_FINWAIT-2| | M_CLOSING | |M_LAST-ACK|
------------------------>|M_TIME WAIT|----------------->| M_CLOSED| +-----------+ +-----------+ +----------+
+-----------+ +---------+ | rcv DATA_ACK[DFIN] | rcv DATA_ACK[DFIN] |
All subflows in CLOSED | rcv DATA_FIN -------------- | -------------- |
------------ | ------- CLOSE all subflows | CLOSE all subflows |
delete MPTCP PCB | snd DATA_ACK[DFIN] V delete MPTCP PCB V
]]></artwork> \ +-----------+ +--------+
------------------------&gt;|M_TIME WAIT|----------------&gt;|M_CLOSED|
+-----------+ +--------+
All subflows in CLOSED
------------
delete MPTCP PCB </artwork>
</figure> </figure>
</section> </section>
<section anchor="app_changelog" numbered="true" toc="include" removeInRFC="f
alse" pn="section-appendix.e">
<name slugifiedName="name-changes-from-rfc-6824">Changes from RFC 6824</na
me>
<t pn="section-appendix.e-1">This appendix lists the key technical changes
between <xref target="RFC6824" format="default" sectionFormat="of" derivedConte
nt="RFC6824"/>,
which specifies MPTCP v0; and this document, which obsoletes <xref target=
"RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> and spe
cifies MPTCP v1. Note that this specification is not backward compatible with <x
ref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824
"/>.
<section title="Changes from RFC6824" anchor="app_changelog"> </t>
<t>This section lists the key technical changes between RFC6824, specifyin <ul spacing="normal" bare="false" empty="false" pn="section-appendix.e-2">
g MPTCP v0, and this document, which obsoletes RFC6824 and specifies MPTCP v1. N <li pn="section-appendix.e-2.1">This document incorporates lessons learn
ote that this specification is not backwards compatible with RFC6824. ed from the various implementations, deployments, and experiments gathered in th
e documents "Use Cases and Operational Experience with Multipath TCP" <xref targ
<list style="symbols"> et="RFC8041" format="default" sectionFormat="of" derivedContent="RFC8041"/> and
<t>The document incorporates lessons learnt from the various implementat the IETF Journal article "Multipath TCP Deployments" <xref target="deployments"
ions, deployments and experiments gathered in the documents "Use Cases and Opera format="default" sectionFormat="of" derivedContent="deployments"/>.</li>
tional Experience with Multipath TCP" <xref target="RFC8041"/> and the IETF Jour <li pn="section-appendix.e-2.2">Connection initiation, through the excha
nal article "Multipath TCP Deployments" <xref target="deployments"/>.</t> nge of the MP_CAPABLE
<t>Connection initiation, through the exchange of the MP_CAPABLE MPTCP o MPTCP option, is different from <xref target="RFC6824" format="default"
ption, is different from RFC6824. The SYN no longer includes the initiator's key sectionFormat="of" derivedContent="RFC6824"/>. The SYN no longer
, allowing the MP_CAPABLE option on the SYN to be shorter in length, and to avoi includes the initiator's key, to allow the MP_CAPABLE option on the SYN
d duplicating the sending of keying material.</t> to be shorter in length and to avoid duplicating the sending of keying material.
<t>This also ensures reliable delivery of the key on the MP_CAPABLE opti </li>
on by allowing its transmission to be combined with data and thus using TCP's in <li pn="section-appendix.e-2.3">This also ensures reliable delivery of t
-built reliability mechanism. If the initiator does not immediately have data to he key on the MP_CAPABLE
send, the MP_CAPABLE option with the keys will be repeated on the first data pa option by allowing its transmission to be combined with data and thus
cket. If the other end is first to send, then the presence of the DSS option imp using TCP's built-in reliability mechanism. If the initiator does not
licitly confirms the receipt of the MP_CAPABLE.</t> immediately have data to send, the MP_CAPABLE option with the keys
<t>In the Flags field of MP_CAPABLE, C is now assigned to mean that the will be repeated on the first data packet. If the other end is the first
sender of this option will not accept additional MPTCP subflows to the source ad to send, then the presence of the DSS option implicitly confirms the receipt of
dress and port. This is an efficiency improvement, for example where the sender the MP_CAPABLE.</li>
is behind a strict NAT.</t> <li pn="section-appendix.e-2.4">In the Flags field of MP_CAPABLE, "C" is
<t>In the Flags field of MP_CAPABLE, H now indicates the use of HMAC-SHA now assigned to mean that
256 (rather than HMAC-SHA1).</t> the sender of this option will not accept additional MPTCP subflows to
<t>Connection initiation also defines the procedure for version negotiat the source address and port. This improves efficiency -- for example,
ion, for implementations that support both v0 (RFC6824) and v1 (this document).< in cases where the sender is behind a strict NAT.</li>
/t> <li pn="section-appendix.e-2.5">In the Flags field of MP_CAPABLE, "H" no
<t>The HMAC-SHA256 (rather than HMAC-SHA1) algorithm is used, as the alg w indicates the use of HMAC-SHA256 (rather than HMAC-SHA1).</li>
orithm provides better security. It is used to generate the token in the MP_JOIN <li pn="section-appendix.e-2.6">Connection initiation also defines the p
and ADD_ADDR messages, and to set the initial data sequence number.</t> rocedure for version negotiation, for implementations that support both v0 <xref
<t>A new subflow-level option exists to signal reasons for sending a RST target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>
on a subflow (MP_TCPRST <xref target="sec_reset"/>), which can help an implemen and v1 (this document).</li>
tation decide whether to attempt later re-connection.</t> <li pn="section-appendix.e-2.7">The HMAC-SHA256 (rather than HMAC-SHA1)
<t>The MP_PRIO option (<xref target="sec_policy"/>), which is used to si algorithm is used, as it provides better security. It is used to generate the to
gnal a change of priority for a subflow, no longer includes the AddrID field. It ken in the MP_JOIN and ADD_ADDR messages and to set the IDSN.</li>
s purpose was to allow the changed priority to be applied on a subflow other tha <li pn="section-appendix.e-2.8">A new subflow-level option exists to sig
n the one it was sent on. However, it has been realised that this could be used nal reasons for sending a
by a man-in-the-middle to divert all traffic on to its own path, and MP_PRIO doe RST on a subflow (MP_TCPRST (<xref target="sec_reset" format="default" s
s not include a token or other security mechanism.</t> ectionFormat="of" derivedContent="Section 3.6"/>)); this can help an implementat
<t>The ADD_ADDR option (<xref target="sec_add_address"/>), which is used ion decide whether to attempt later reconnection.</li>
to inform the other host about another potential address, is different in sever <li pn="section-appendix.e-2.9">The MP_PRIO option (<xref target="sec_po
al ways. It now includes an HMAC of the added address, for enhanced security. In licy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>),
addition, reliability for the ADD_ADDR option has been added: the IPVer field i which is used to signal a change of priority for a subflow, no longer
s replaced with a flag field, and one flag is assigned (E) which is used as an ' includes the AddrID field. Its purpose was to allow the changed
Echo' so a host can indicate that it has received the option.</t> priority to be applied on a subflow other than the one it was sent
<t>An additional way of performing a Fast Close is described, by sending on. However, it was determined that this could be used by a
a MP_FASTCLOSE option on a RST on all subflows. This allows the host to tear do man-in-the-middle to divert all traffic onto its own path, and MP_PRIO
wn the subflows and the connection immediately.</t> does not include a token or other type of security mechanism.</li>
<t>In the IANA registry a new MPTCP subtype option, MP_EXPERIMENTAL, is <li pn="section-appendix.e-2.10">The ADD_ADDR option (<xref target="sec_
reserved for private experiments. However, the document doesn't define how to us add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/
e the subtype option.</t> >), which is used to inform the other host about another potential address, is d
<t>A new Appendix discusses the usage of both the MPTCP and TCP Fast Ope ifferent in several ways. It now includes an HMAC of the added address, for enha
n on the same packet (<xref target="app_tfo"/>).</t> nced security. In addition, reliability for the ADD_ADDR option has been added:
</list></t> the IPVer field is replaced with a flag field, and one flag is assigned ("E") th
at is used as an "echo" so a host can indicate that it has received the option.<
/li>
<li pn="section-appendix.e-2.11">This document describes an additional w
ay of performing a Fast
Close -- by sending an MP_FASTCLOSE option on a RST on all subflows. Thi
s allows the host to tear down the subflows and the connection immediately.</li>
<li pn="section-appendix.e-2.12">IANA has reserved the MPTCP option subt
ype of value 0xf for
Private Use (<xref target="IANA_subtypes" format="default" sectionFormat
="of" derivedContent="Section 7.2"/>). This document doesn't define how to use t
hat value.</li>
<li pn="section-appendix.e-2.13">This document adds a new appendix (<xre
f target="app_tfo" format="default" sectionFormat="of" derivedContent="Appendix
B"/>), which discusses the usage of both MPTCP options
and TFO options on the same packet.</li>
</ul>
</section>
<section anchor="Acknowledgments" numbered="false" toc="include" removeInRFC
="false" pn="section-appendix.f">
<name slugifiedName="name-acknowledgments">Acknowledgments</name>
<t pn="section-appendix.f-1">The authors gratefully acknowledge significan
t input into this
document from <contact fullname="Sebastien Barre"/> and <contact fullname=
"Andrew McDonald"/>.</t>
<t pn="section-appendix.f-2">The authors also wish to acknowledge reviews
and contributions from
<contact fullname="Iljitsch van Beijnum"/>, <contact fullname="Lars
Eggert"/>, <contact fullname="Marcelo Bagnulo"/>, <contact fullname="Robert Hanc
ock"/>, <contact fullname="Pasi Sarolahti"/>,
<contact fullname="Toby Moncaster"/>, <contact fullname="Philip Eard
ley"/>, <contact fullname="Sergio Lembo"/>, <contact fullname="Lawrence Conroy"/
>, <contact fullname="Yoshifumi Nishida"/>,
<contact fullname="Bob Briscoe"/>, <contact fullname="Stein Gjessing"/>,
<contact fullname="Andrew McGregor"/>, <contact fullname="Georg Hamp
el"/>, <contact fullname="Anumita Biswas"/>, <contact fullname="Wes Eddy"/
>, <contact fullname="Alexey Melnikov"/>, <contact fullname="Francis Dupont"/>,
<contact fullname="Adrian Farrel"/>,
<contact fullname="Barry Leiba"/>, <contact fullname="Robert Sparks"/>,
<contact fullname="Sean Turner"/>, <contact fullname="Stephen Farrel
l"/>, <contact fullname="Martin Stiemerling"/>, <contact fullname="Gregory Detal
"/>, <contact fullname="Fabien Duchene"/>,
<contact fullname="Xavier de Foy"/>, <contact fullname="Rahul Jadhav"/>,
<contact fullname="Klemens Schragel"/>, <contact fullname="Mirja Küh
lewind"/>, <contact fullname="Sheng Jiang"/>, <contact fullname="Alissa Cooper"/
>, <contact fullname="Ines Robles"/>, <contact fullname="Roman Danyliw"/>, <cont
act fullname="Adam Roach"/>,
<contact fullname="Eric Vyncke"/>, and <contact fullname="Ben Kaduk"/>.</t
>
</section>
<section anchor="authors-addresses" numbered="false" removeInRFC="false" toc
="include" pn="section-appendix.g">
<name slugifiedName="name-authors-addresses">Authors' Addresses</name>
<author fullname="Alan Ford" initials="A." surname="Ford">
<organization showOnFrontPage="true">Pexip</organization>
<address>
<email>alan.ford@gmail.com</email>
</address>
</author>
<author fullname="Costin Raiciu" initials="C." surname="Raiciu">
<organization abbrev="U. Politehnica of Bucharest" showOnFrontPage="true
">University Politehnica of Bucharest</organization>
<address>
<postal>
<street>Splaiul Independentei 313</street>
<city>Bucharest</city>
<country>Romania</country>
</postal>
<email>costin.raiciu@cs.pub.ro</email>
</address>
</author>
<author fullname="Mark Handley" initials="M." surname="Handley">
<organization abbrev="U. College London" showOnFrontPage="true">Universi
ty College London</organization>
<address>
<postal>
<street>Gower Street</street>
<city>London</city>
<code>WC1E 6BT</code>
<country>United Kingdom</country>
</postal>
<email>m.handley@cs.ucl.ac.uk</email>
</address>
</author>
<author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure"
>
<organization abbrev="U. catholique de Louvain" ascii="Universite cathol
ique de Louvain" showOnFrontPage="true">Université catholique de Louvain</orga
nization>
<address>
<postal>
<street>Pl. Ste Barbe, 2</street>
<code>1348</code>
<city>Louvain-la-Neuve</city>
<country>Belgium</country>
</postal>
<email>olivier.bonaventure@uclouvain.be</email>
</address>
</author>
<author fullname="Christoph Paasch" initials="C." surname="Paasch">
<organization abbrev="Apple, Inc." showOnFrontPage="true">Apple, Inc.</o
rganization>
<address>
<postal>
<street/>
<city>Cupertino</city>
<region>CA</region>
<country>United States of America</country>
</postal>
<email>cpaasch@apple.com</email>
</address>
</author>
</section> </section>
</back> </back>
</rfc> </rfc>
 End of changes. 319 change blocks. 
3231 lines changed or deleted 5040 lines changed or added

This html diff was produced by rfcdiff 1.45. The latest version is available from http://tools.ietf.org/tools/rfcdiff/