<?xmlversion="1.0" encoding="US-ASCII"?> <!-- Convert to HTML and Text with xml2rfc: http://xml2rfc.ietf.org. --> <!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ <!ENTITY RFC5533 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5533.xml"> <!ENTITY RFC5062 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5062.xml"> <!ENTITY RFC5061 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5061.xml"> <!ENTITY RFC4960 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4960.xml"> <!ENTITY RFC4987 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4987.xml"> <!ENTITY RFC6234 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6234.xml"> <!ENTITY RFC4086 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4086.xml"> <!ENTITY RFC5681 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5681.xml"> <!ENTITY RFC2119 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml"> <!ENTITY RFC2992 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2992.xml"> <!ENTITY RFC2979 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2979.xml"> <!ENTITY RFC2104 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2104.xml"> <!ENTITY RFC2018 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2018.xml"> <!ENTITY RFC1918 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1918.xml"> <!ENTITY RFC0793 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0793.xml"> <!ENTITY RFC7323 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7323.xml"> <!ENTITY RFC1122 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1122.xml"> <!ENTITY RFC3135 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3135.xml"> <!ENTITY RFC3022 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3022.xml"> <!ENTITY RFC6181 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6181.xml"> <!ENTITY RFC6182 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6182.xml"> <!ENTITY RFC6356 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6356.xml"> <!ENTITY RFC6555 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6555.xml"> <!ENTITY RFC8126 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8126.xml"> <!ENTITY RFC6897 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6897.xml"> <!ENTITY RFC6528 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6528.xml"> <!ENTITY RFC5961 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5961.xml"> <!ENTITY RFC7413 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7413.xml"> <!ENTITY RFC7430 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7430.xml"> <!ENTITY RFC8174 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8174.xml"> <!ENTITY RFC8041 SYSTEM "https://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.8041.xml"> ]> <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> <?rfc strict="no" ?> <?rfc toc="yes"?> <?rfc tocdepth="4"?> <?rfc symrefs="yes"?> <?rfc sortrefs="yes" ?> <?rfc compact="yes" ?> <?rfc subcompact="no" ?> <?rfc rfcedstyle="yes"?>version='1.0' encoding='utf-8'?> <rfc xmlns:xi="http://www.w3.org/2001/XInclude" version="3" category="std" consensus="true" docName="draft-ietf-mptcp-rfc6824bis-18" indexInclude="true" ipr="trust200902"obsoletes="6824">number="8684" obsoletes="6824" prepTime="2020-03-30T17:51:35" scripts="Common,Latin" sortRefs="true" submissionType="IETF" symRefs="true" tocDepth="3" tocInclude="true" xml:lang="en"> <link href="https://datatracker.ietf.org/doc/draft-ietf-mptcp-rfc6824bis-18" rel="prev"/> <link href="https://dx.doi.org/10.17487/rfc8684" rel="alternate"/> <link href="urn:issn:2070-1721" rel="alternate"/> <front> <title abbrev="Multipath TCP">TCP Extensions for Multipath Operation with Multiple Addresses</title> <seriesInfo name="RFC" value="8684" stream="IETF"/> <author fullname="Alan Ford" initials="A." surname="Ford"><organization>Pexip</organization><organization showOnFrontPage="true">Pexip</organization> <address><!-- <postal> <street>Beech Court</street> <city>Hurst</city> <region>Berkshire</region> <code>RG10 0RQ</code> <country>UK</country> </postal> --><email>alan.ford@gmail.com</email> </address> </author> <author fullname="Costin Raiciu" initials="C." surname="Raiciu"> <organization abbrev="U.PolitechnicaPolitehnica ofBucharest">UniversityBucharest" showOnFrontPage="true">University Politehnica of Bucharest</organization> <address> <postal> <street>Splaiul Independentei 313</street> <city>Bucharest</city> <country>Romania</country> </postal> <email>costin.raiciu@cs.pub.ro</email> </address> </author> <author fullname="Mark Handley" initials="M." surname="Handley"> <organization abbrev="U. CollegeLondon">UniversityLondon" showOnFrontPage="true">University College London</organization> <address> <postal> <street>Gower Street</street> <city>London</city> <code>WC1E 6BT</code><country>UK</country><country>United Kingdom</country> </postal> <email>m.handley@cs.ucl.ac.uk</email> </address> </author> <author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure"> <organization abbrev="U. catholique deLouvain">UniversitéLouvain" ascii="Universite catholique de Louvain" showOnFrontPage="true">Université catholique de Louvain</organization> <address> <postal> <street>Pl. Ste Barbe, 2</street> <code>1348</code> <city>Louvain-la-Neuve</city> <country>Belgium</country> </postal> <email>olivier.bonaventure@uclouvain.be</email> </address> </author> <author fullname="Christoph Paasch" initials="C." surname="Paasch"> <organization abbrev="Apple,Inc.">Apple,Inc." showOnFrontPage="true">Apple, Inc.</organization> <address> <postal><street></street><street/> <city>Cupertino</city><country>US</country><region>CA</region> <country>United States of America</country> </postal> <email>cpaasch@apple.com</email> </address> </author> <dateyear="2019" /> <area>General</area> <workgroup>Internet Engineering Task Force</workgroup> <keyword>tcp extensions multipath multihomed subflow</keyword> <abstract> <t>TCP/IPmonth="03" year="2020"/> <keyword>tcp</keyword> <keyword>extensions</keyword> <keyword>multipath</keyword> <keyword>multihomed</keyword> <keyword>subflow</keyword> <abstract pn="section-abstract"> <t pn="section-abstract-1">TCP/IP communication is currently restricted to a single path per connection, yet multiple paths often exist between peers. The simultaneous use of these multiple paths for a TCP/IP session would improve resource usage within the networkand, thus,and thus improve user experience through higher throughput and improved resilience to network failure.</t><t>Multipath<t pn="section-abstract-2">Multipath TCP provides the ability to simultaneously use multiple paths between peers. This document presents a set of extensions to traditional TCP to support multipath operation. The protocol offers the same type of service to applications as TCP (i.e., a reliable bytestream), and it provides the components necessary to establish and use multiple TCP flows across potentially disjoint paths.</t><t>This<t pn="section-abstract-3">This document specifies v1 of Multipath TCP, obsoleting v0 as specified inRFC6824,RFC 6824, through clarifications and modifications primarily driven by deployment experience.</t> </abstract></front> <middle><boilerplate> <sectiontitle="Introduction" anchor="sec_intro"> <t>Multipath TCP (MPTCP) is a setanchor="status-of-memo" numbered="false" removeInRFC="false" toc="exclude" pn="section-boilerplate.1"> <name slugifiedName="name-status-of-this-memo">Status ofextensions to regular TCP <xref target="RFC0793"/> to provide a Multipath TCP <xref target="RFC6182"/> service, which enables a transport connection to operate across multiple paths simultaneously.Thisdocument presents the protocol changes required to add multipath capability to TCP; specifically, those for signaling and setting up multiple paths ("subflows"), managing these subflows, reassembly of data, and termination of sessions.Memo</name> <t pn="section-boilerplate.1-1"> This isnot the only information required to create a Multipath TCP implementation, however.an Internet Standards Track document. </t> <t pn="section-boilerplate.1-2"> This document iscomplemented by three others: <list style="symbols"> <t>Architecture <xref target="RFC6182"/>, which explains the motivations behind Multipath TCP, containsadiscussionproduct ofhigh-level design decisions on which this design is based, and an explanationthe Internet Engineering Task Force (IETF). It represents the consensus ofa functional separation through which an extensible MPTCP implementation can be developed.</t> <t>Congestion control <xref target="RFC6356"/> presents a safe congestion control algorithmthe IETF community. It has received public review and has been approved forcouplingpublication by thebehaviorInternet Engineering Steering Group (IESG). Further information on Internet Standards is available in Section 2 of RFC 7841. </t> <t pn="section-boilerplate.1-3"> Information about themultiple paths in order to "do no harm"current status of this document, any errata, and how toother network users.</t> <t>Application considerations <xref target="RFC6897"/> discusses what impact MPTCP will haveprovide feedback onapplications, what applications will want to do with MPTCP,it may be obtained at <eref target="https://www.rfc-editor.org/info/rfc8684" brackets="none"/>. </t> </section> <section anchor="copyright" numbered="false" removeInRFC="false" toc="exclude" pn="section-boilerplate.2"> <name slugifiedName="name-copyright-notice">Copyright Notice</name> <t pn="section-boilerplate.2-1"> Copyright (c) 2020 IETF Trust and the persons identified asa consequence of these factors, what API extensions an MPTCP implementation should present.</t> </list>the document authors. All rights reserved. </t> <t pn="section-boilerplate.2-2"> This document isan update to,subject to BCP 78 andobsoletes,thev0 specification of Multipath TCP (RFC6824). This document specifies MPTCP v1, which is not backward compatible with MPTCP v0. This document additionally defines version negotiation procedures for implementations that support both versions. </t> <section title="Design Assumptions" anchor="sec_assum"> <t>In orderIETF Trust's Legal Provisions Relating tolimit the potentially huge design space, the mptcp working group imposed two key constraints on the Multipath TCP design presentedIETF Documents (<eref target="https://trustee.ietf.org/license-info" brackets="none"/>) inthis document: <list style="symbols"> <t>It must be backwards-compatible with current, regular TCP, to increase its chances of deployment.</t> <t>It can be assumed that one or both hosts are multihomed and multiaddressed.</t> </list> </t> <t>To simplify the design, we assume thateffect on thepresencedate ofmultiple addresses at a host is sufficient to indicate the existencepublication ofmultiple paths. These paths need not be entirely disjoint:this document. Please review these documents carefully, as theymay share one or many routers between them. Even in such a situation, making use of multiple paths is beneficial, improving resource utilizationdescribe your rights andresiliencerestrictions with respect toa subset of node failures. The congestion control algorithms defined in <xref target="RFC6356"/> ensurethisdoes not act detrimentally. Furthermore, there may be some scenarios where different TCP ports on a single host can provide disjoint paths (suchdocument. Code Components extracted from this document must include Simplified BSD License text asthrough certain Equal-Cost Multipath (ECMP) implementations <xref target="RFC2992"/>), and so the MPTCP design also supports the use of ports in path identifiers.</t> <t>There are three aspects to the backwards-compatibility listed above (discussed in more detaildescribed in<xref target="RFC6182"/>): <list style="hanging"> <t hangText="External Constraints:"> The protocol must function through the vast majoritySection 4.e ofexisting middleboxes such as NATs, firewalls, and proxies,the Trust Legal Provisions and are provided without warranty assuch must resemble existingdescribed in the Simplified BSD License. </t> </section> </boilerplate> <toc> <section anchor="toc" numbered="false" removeInRFC="false" toc="exclude" pn="section-toc.1"> <name slugifiedName="name-table-of-contents">Table of Contents</name> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1"> <li pn="section-toc.1-1.1"> <t keepWithNext="true" pn="section-toc.1-1.1.1"><xref derivedContent="1" format="counter" sectionFormat="of" target="section-1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-introduction">Introduction</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.1.2"> <li pn="section-toc.1-1.1.2.1"> <t keepWithNext="true" pn="section-toc.1-1.1.2.1.1"><xref derivedContent="1.1" format="counter" sectionFormat="of" target="section-1.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-design-assumptions">Design Assumptions</xref></t> </li> <li pn="section-toc.1-1.1.2.2"> <t keepWithNext="true" pn="section-toc.1-1.1.2.2.1"><xref derivedContent="1.2" format="counter" sectionFormat="of" target="section-1.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-multipath-tcp-in-the-networ">Multipath TCPas far as possible onin thewire. Furthermore, the protocol must not assumeNetworking Stack</xref></t> </li> <li pn="section-toc.1-1.1.2.3"> <t keepWithNext="true" pn="section-toc.1-1.1.2.3.1"><xref derivedContent="1.3" format="counter" sectionFormat="of" target="section-1.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-terminology">Terminology</xref></t> </li> <li pn="section-toc.1-1.1.2.4"> <t keepWithNext="true" pn="section-toc.1-1.1.2.4.1"><xref derivedContent="1.4" format="counter" sectionFormat="of" target="section-1.4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mptcp-concept">MPTCP Concept</xref></t> </li> <li pn="section-toc.1-1.1.2.5"> <t keepWithNext="true" pn="section-toc.1-1.1.2.5.1"><xref derivedContent="1.5" format="counter" sectionFormat="of" target="section-1.5"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-requirements-language">Requirements Language</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.2"> <t keepWithNext="true" pn="section-toc.1-1.2.1"><xref derivedContent="2" format="counter" sectionFormat="of" target="section-2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-operation-overview">Operation Overview</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.2.2"> <li pn="section-toc.1-1.2.2.1"> <t keepWithNext="true" pn="section-toc.1-1.2.2.1.1"><xref derivedContent="2.1" format="counter" sectionFormat="of" target="section-2.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-initiating-an-mptcp-connect">Initiating an MPTCP Connection</xref></t> </li> <li pn="section-toc.1-1.2.2.2"> <t keepWithNext="true" pn="section-toc.1-1.2.2.2.1"><xref derivedContent="2.2" format="counter" sectionFormat="of" target="section-2.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-associating-a-new-subflow-w">Associating a New Subflow with an Existing MPTCP Connection</xref></t> </li> <li pn="section-toc.1-1.2.2.3"> <t keepWithNext="true" pn="section-toc.1-1.2.2.3.1"><xref derivedContent="2.3" format="counter" sectionFormat="of" target="section-2.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-informing-the-other-host-ab">Informing thesegments it sendsOther Host about Another Potential Address</xref></t> </li> <li pn="section-toc.1-1.2.2.4"> <t keepWithNext="true" pn="section-toc.1-1.2.2.4.1"><xref derivedContent="2.4" format="counter" sectionFormat="of" target="section-2.4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-data-transfer-using-mptcp">Data Transfer Using MPTCP</xref></t> </li> <li pn="section-toc.1-1.2.2.5"> <t keepWithNext="true" pn="section-toc.1-1.2.2.5.1"><xref derivedContent="2.5" format="counter" sectionFormat="of" target="section-2.5"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-requesting-a-change-in-a-pa">Requesting a Change in a Path's Priority</xref></t> </li> <li pn="section-toc.1-1.2.2.6"> <t keepWithNext="true" pn="section-toc.1-1.2.2.6.1"><xref derivedContent="2.6" format="counter" sectionFormat="of" target="section-2.6"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-closing-an-mptcp-connection">Closing an MPTCP Connection</xref></t> </li> <li pn="section-toc.1-1.2.2.7"> <t keepWithNext="true" pn="section-toc.1-1.2.2.7.1"><xref derivedContent="2.7" format="counter" sectionFormat="of" target="section-2.7"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-notable-features">Notable Features</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.3"> <t keepWithNext="true" pn="section-toc.1-1.3.1"><xref derivedContent="3" format="counter" sectionFormat="of" target="section-3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mptcp-operations-an-overvie">MPTCP Operations: An Overview</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.3.2"> <li pn="section-toc.1-1.3.2.1"> <t keepWithNext="true" pn="section-toc.1-1.3.2.1.1"><xref derivedContent="3.1" format="counter" sectionFormat="of" target="section-3.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-connection-initiation">Connection Initiation</xref></t> </li> <li pn="section-toc.1-1.3.2.2"> <t keepWithNext="true" pn="section-toc.1-1.3.2.2.1"><xref derivedContent="3.2" format="counter" sectionFormat="of" target="section-3.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-starting-a-new-subflow">Starting a New Subflow</xref></t> </li> <li pn="section-toc.1-1.3.2.3"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.1"><xref derivedContent="3.3" format="counter" sectionFormat="of" target="section-3.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mptcp-operation-and-data-tr">MPTCP Operation and Data Transfer</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.3.2.3.2"> <li pn="section-toc.1-1.3.2.3.2.1"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.1.1"><xref derivedContent="3.3.1" format="counter" sectionFormat="of" target="section-3.3.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-data-sequence-mapping">Data Sequence Mapping</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.2"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.2.1"><xref derivedContent="3.3.2" format="counter" sectionFormat="of" target="section-3.3.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-data-acknowledgments">Data Acknowledgments</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.3"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.3.1"><xref derivedContent="3.3.3" format="counter" sectionFormat="of" target="section-3.3.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-closing-a-connection">Closing a Connection</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.4"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.4.1"><xref derivedContent="3.3.4" format="counter" sectionFormat="of" target="section-3.3.4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-receiver-considerations">Receiver Considerations</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.5"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.5.1"><xref derivedContent="3.3.5" format="counter" sectionFormat="of" target="section-3.3.5"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-sender-considerations">Sender Considerations</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.6"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.6.1"><xref derivedContent="3.3.6" format="counter" sectionFormat="of" target="section-3.3.6"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-reliability-and-retransmiss">Reliability and Retransmissions</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.7"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.7.1"><xref derivedContent="3.3.7" format="counter" sectionFormat="of" target="section-3.3.7"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-congestion-control-consider">Congestion Control Considerations</xref></t> </li> <li pn="section-toc.1-1.3.2.3.2.8"> <t keepWithNext="true" pn="section-toc.1-1.3.2.3.2.8.1"><xref derivedContent="3.3.8" format="counter" sectionFormat="of" target="section-3.3.8"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-subflow-policy">Subflow Policy</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.3.2.4"> <t keepWithNext="true" pn="section-toc.1-1.3.2.4.1"><xref derivedContent="3.4" format="counter" sectionFormat="of" target="section-3.4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-address-knowledge-exchange-">Address Knowledge Exchange (Path Management)</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.3.2.4.2"> <li pn="section-toc.1-1.3.2.4.2.1"> <t keepWithNext="true" pn="section-toc.1-1.3.2.4.2.1.1"><xref derivedContent="3.4.1" format="counter" sectionFormat="of" target="section-3.4.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-address-advertisement">Address Advertisement</xref></t> </li> <li pn="section-toc.1-1.3.2.4.2.2"> <t keepWithNext="true" pn="section-toc.1-1.3.2.4.2.2.1"><xref derivedContent="3.4.2" format="counter" sectionFormat="of" target="section-3.4.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-remove-address">Remove Address</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.3.2.5"> <t keepWithNext="true" pn="section-toc.1-1.3.2.5.1"><xref derivedContent="3.5" format="counter" sectionFormat="of" target="section-3.5"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-fast-close">Fast Close</xref></t> </li> <li pn="section-toc.1-1.3.2.6"> <t keepWithNext="true" pn="section-toc.1-1.3.2.6.1"><xref derivedContent="3.6" format="counter" sectionFormat="of" target="section-3.6"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-subflow-reset">Subflow Reset</xref></t> </li> <li pn="section-toc.1-1.3.2.7"> <t keepWithNext="true" pn="section-toc.1-1.3.2.7.1"><xref derivedContent="3.7" format="counter" sectionFormat="of" target="section-3.7"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-fallback">Fallback</xref></t> </li> <li pn="section-toc.1-1.3.2.8"> <t keepWithNext="true" pn="section-toc.1-1.3.2.8.1"><xref derivedContent="3.8" format="counter" sectionFormat="of" target="section-3.8"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-error-handling">Error Handling</xref></t> </li> <li pn="section-toc.1-1.3.2.9"> <t keepWithNext="true" pn="section-toc.1-1.3.2.9.1"><xref derivedContent="3.9" format="counter" sectionFormat="of" target="section-3.9"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-heuristics">Heuristics</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.3.2.9.2"> <li pn="section-toc.1-1.3.2.9.2.1"> <t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.1.1"><xref derivedContent="3.9.1" format="counter" sectionFormat="of" target="section-3.9.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-port-usage">Port Usage</xref></t> </li> <li pn="section-toc.1-1.3.2.9.2.2"> <t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.2.1"><xref derivedContent="3.9.2" format="counter" sectionFormat="of" target="section-3.9.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-delayed-subflow-start-and-s">Delayed Subflow Start and Subflow Symmetry</xref></t> </li> <li pn="section-toc.1-1.3.2.9.2.3"> <t keepWithNext="true" pn="section-toc.1-1.3.2.9.2.3.1"><xref derivedContent="3.9.3" format="counter" sectionFormat="of" target="section-3.9.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-failure-handling">Failure Handling</xref></t> </li> </ul> </li> </ul> </li> <li pn="section-toc.1-1.4"> <t keepWithNext="true" pn="section-toc.1-1.4.1"><xref derivedContent="4" format="counter" sectionFormat="of" target="section-4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-semantic-issues">Semantic Issues</xref></t> </li> <li pn="section-toc.1-1.5"> <t keepWithNext="true" pn="section-toc.1-1.5.1"><xref derivedContent="5" format="counter" sectionFormat="of" target="section-5"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-security-considerations">Security Considerations</xref></t> </li> <li pn="section-toc.1-1.6"> <t keepWithNext="true" pn="section-toc.1-1.6.1"><xref derivedContent="6" format="counter" sectionFormat="of" target="section-6"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-interactions-with-middlebox">Interactions with Middleboxes</xref></t> </li> <li pn="section-toc.1-1.7"> <t keepWithNext="true" pn="section-toc.1-1.7.1"><xref derivedContent="7" format="counter" sectionFormat="of" target="section-7"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-iana-considerations">IANA Considerations</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.7.2"> <li pn="section-toc.1-1.7.2.1"> <t keepWithNext="true" pn="section-toc.1-1.7.2.1.1"><xref derivedContent="7.1" format="counter" sectionFormat="of" target="section-7.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-tcp-option-kind-numbers">TCP Option Kind Numbers</xref></t> </li> <li pn="section-toc.1-1.7.2.2"> <t keepWithNext="true" pn="section-toc.1-1.7.2.2.1"><xref derivedContent="7.2" format="counter" sectionFormat="of" target="section-7.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mptcp-option-subtypes">MPTCP Option Subtypes</xref></t> </li> <li pn="section-toc.1-1.7.2.3"> <t keepWithNext="true" pn="section-toc.1-1.7.2.3.1"><xref derivedContent="7.3" format="counter" sectionFormat="of" target="section-7.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mptcp-handshake-algorithms">MPTCP Handshake Algorithms</xref></t> </li> <li pn="section-toc.1-1.7.2.4"> <t keepWithNext="true" pn="section-toc.1-1.7.2.4.1"><xref derivedContent="7.4" format="counter" sectionFormat="of" target="section-7.4"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mp_tcprst-reason-codes">MP_TCPRST Reason Codes</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.8"> <t keepWithNext="true" pn="section-toc.1-1.8.1"><xref derivedContent="8" format="counter" sectionFormat="of" target="section-8"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-references">References</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.8.2"> <li pn="section-toc.1-1.8.2.1"> <t keepWithNext="true" pn="section-toc.1-1.8.2.1.1"><xref derivedContent="8.1" format="counter" sectionFormat="of" target="section-8.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-normative-references">Normative References</xref></t> </li> <li pn="section-toc.1-1.8.2.2"> <t keepWithNext="true" pn="section-toc.1-1.8.2.2.1"><xref derivedContent="8.2" format="counter" sectionFormat="of" target="section-8.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-informative-references">Informative References</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.9"> <t keepWithNext="true" pn="section-toc.1-1.9.1"><xref derivedContent="Appendix A" format="default" sectionFormat="of" target="section-appendix.a"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-notes-on-use-of-tcp-options">Notes onthe wire arrive unmodified at the destination: they may be split or coalesced;Use of TCPoptions may be removed or duplicated. </t>Options</xref></t> </li> <li pn="section-toc.1-1.10"> <thangText="Application Constraints:"> The protocol must be usablekeepWithNext="true" pn="section-toc.1-1.10.1"><xref derivedContent="Appendix B" format="default" sectionFormat="of" target="section-appendix.b"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-tcp-fast-open-and-mptcp">TCP Fast Open and MPTCP</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.10.2"> <li pn="section-toc.1-1.10.2.1"> <t keepWithNext="true" pn="section-toc.1-1.10.2.1.1"><xref derivedContent="B.1" format="counter" sectionFormat="of" target="section-b.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-tfo-cookie-request-with-mpt">TFO Cookie Request withno change to existing applications that use the commonMPTCP</xref></t> </li> <li pn="section-toc.1-1.10.2.2"> <t keepWithNext="true" pn="section-toc.1-1.10.2.2.1"><xref derivedContent="B.2" format="counter" sectionFormat="of" target="section-b.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-data-sequence-mapping-under">Data Sequence Mapping under TFO</xref></t> </li> <li pn="section-toc.1-1.10.2.3"> <t keepWithNext="true" pn="section-toc.1-1.10.2.3.1"><xref derivedContent="B.3" format="counter" sectionFormat="of" target="section-b.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-connection-establishment-ex">Connection Establishment Examples</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.11"> <t keepWithNext="true" pn="section-toc.1-1.11.1"><xref derivedContent="Appendix C" format="default" sectionFormat="of" target="section-appendix.c"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-control-blocks">Control Blocks</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.11.2"> <li pn="section-toc.1-1.11.2.1"> <t keepWithNext="true" pn="section-toc.1-1.11.2.1.1"><xref derivedContent="C.1" format="counter" sectionFormat="of" target="section-c.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-mptcp-control-block">MPTCP Control Block</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.11.2.1.2"> <li pn="section-toc.1-1.11.2.1.2.1"> <t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.1.1"><xref derivedContent="C.1.1" format="counter" sectionFormat="of" target="section-c.1.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-authentication-and-metadata">Authentication and Metadata</xref></t> </li> <li pn="section-toc.1-1.11.2.1.2.2"> <t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.2.1"><xref derivedContent="C.1.2" format="counter" sectionFormat="of" target="section-c.1.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-sending-side">Sending Side</xref></t> </li> <li pn="section-toc.1-1.11.2.1.2.3"> <t keepWithNext="true" pn="section-toc.1-1.11.2.1.2.3.1"><xref derivedContent="C.1.3" format="counter" sectionFormat="of" target="section-c.1.3"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-receiving-side">Receiving Side</xref></t> </li> </ul> </li> <li pn="section-toc.1-1.11.2.2"> <t keepWithNext="true" pn="section-toc.1-1.11.2.2.1"><xref derivedContent="C.2" format="counter" sectionFormat="of" target="section-c.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-tcp-control-blocks">TCP Control Blocks</xref></t> <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-toc.1-1.11.2.2.2"> <li pn="section-toc.1-1.11.2.2.2.1"> <t keepWithNext="true" pn="section-toc.1-1.11.2.2.2.1.1"><xref derivedContent="C.2.1" format="counter" sectionFormat="of" target="section-c.2.1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-sending-side-2">Sending Side</xref></t> </li> <li pn="section-toc.1-1.11.2.2.2.2"> <t keepWithNext="true" pn="section-toc.1-1.11.2.2.2.2.1"><xref derivedContent="C.2.2" format="counter" sectionFormat="of" target="section-c.2.2"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-receiving-side-2">Receiving Side</xref></t> </li> </ul> </li> </ul> </li> <li pn="section-toc.1-1.12"> <t keepWithNext="true" pn="section-toc.1-1.12.1"><xref derivedContent="Appendix D" format="default" sectionFormat="of" target="section-appendix.d"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-finite-state-machine">Finite State Machine</xref></t> </li> <li pn="section-toc.1-1.13"> <t keepWithNext="true" pn="section-toc.1-1.13.1"><xref derivedContent="Appendix E" format="default" sectionFormat="of" target="section-appendix.e"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-changes-from-rfc-6824">Changes from RFC 6824</xref></t> </li> <li pn="section-toc.1-1.14"> <t keepWithNext="true" pn="section-toc.1-1.14.1"><xref derivedContent="" format="none" sectionFormat="of" target="section-appendix.f"/><xref derivedContent="" format="title" sectionFormat="of" target="name-acknowledgments">Acknowledgments</xref></t> </li> <li pn="section-toc.1-1.15"> <t keepWithNext="true" pn="section-toc.1-1.15.1"><xref derivedContent="" format="none" sectionFormat="of" target="section-appendix.g"/><xref derivedContent="" format="title" sectionFormat="of" target="name-authors-addresses">Authors' Addresses</xref></t> </li> </ul> </section> </toc> </front> <middle> <section anchor="sec_intro" numbered="true" toc="include" removeInRFC="false" pn="section-1"> <name slugifiedName="name-introduction">Introduction</name> <t pn="section-1-1">Multipath TCPAPI (although it(MPTCP) isreasonable that not all features would be availablea set of extensions tosuch legacy applications). Furthermore, the protocol must provide the same service model asregular TCP <xref target="RFC0793" format="default" sectionFormat="of" derivedContent="RFC0793"/> to provide a Multipath TCP service <xref target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/>, which enables a transport connection to operate across multiple paths simultaneously. This document presents theapplication.</t> <t hangText="Fallback:"> Theprotocolshould be ablechanges required tofall backadd multipath capability tostandardTCPwith no interference from-- specifically, those for signaling and setting up multiple paths ("subflows"), managing these subflows, reassembly of data, and termination of sessions. This is not theuser, to be ableonly information required tocommunicate with legacy hosts.</t> </list> </t> <t>The complementary application considerationscreate a Multipath TCP implementation, however. This document<xref target="RFC6897"/> discussesis complemented by three others: </t> <ul spacing="normal" bare="false" empty="false" pn="section-1-2"> <li pn="section-1-2.1"> <xref target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> (MPTCP architecture), which explains thenecessary featuresmotivations behind Multipath TCP, contains a discussion of high-level design decisions on which this design is based, and provides anAPI to provide backwards-compatibility, as well as API extensions to convey the behaviorexplanation of a functional separation through which an extensible MPTCPatimplementation can be developed.</li> <li pn="section-1-2.2"> <xref target="RFC6356" format="default" sectionFormat="of" derivedContent="RFC6356"/> (congestion control), which presents alevel ofsafe congestion controland information equivalent to that available with regular, single-path TCP.</t> <t>Further discussion ofalgorithm for coupling thedesign constraints and associated design decisions are given inbehavior of theMPTCP Architecture document <xref target="RFC6182"/> and in <xref target="howhard"/>.</t> </section> <section title="Multipath TCPmultiple paths inthe Networking Stack" anchor="sec_layers"> <t>MPTCP operates at the transport layer and aimsorder tobe transparent"do no harm" toboth higherother network users.</li> <li pn="section-1-2.3"> <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="RFC6897"/> (application considerations), which discusses what impact MPTCP will have on applications, what applications will want to do with MPTCP, andlower layers. It isas asetconsequence ofadditional features on topthese factors, what API extensions an MPTCP implementation should present.</li> </ul> <t pn="section-1-3"> This document obsoletes the v0 specification ofstandard TCP;Multipath TCP <xreftarget="fig_arch" /> illustrates this layering.target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>. This document specifies MPTCP v1, which isdesignednot backward compatible with MPTCP v0. This document additionally defines version negotiation procedures for implementations that support both versions. </t> <section anchor="sec_assum" numbered="true" toc="include" removeInRFC="false" pn="section-1.1"> <name slugifiedName="name-design-assumptions">Design Assumptions</name> <t pn="section-1.1-1">In order to limit the potentially huge design space, the MPTCP Working Group imposed two key constraints on the Multipath TCP design presented in this document: </t> <ul spacing="normal" bare="false" empty="false" pn="section-1.1-2"> <li pn="section-1.1-2.1">It must beusable by legacy applications with no changes; detailed discussion of its interactionsbackward compatible withapplications is given in <xref target="RFC6897"/>.</t> <figure align="center" anchor="fig_arch" title="Comparison of Standard TCP and MPTCP Protocol Stacks"> <artwork align="left"><![CDATA[ +-------------------------------+ | Application | +---------------+ +-------------------------------+ | Application | | MPTCP | +---------------+ + - - - - - - - + - - - - - - - + | TCP | | Subflow (TCP) | Subflow (TCP) | +---------------+ +-------------------------------+ | IP | | IP | IP | +---------------+ +-------------------------------+ ]]></artwork> </figure> </section> <section title="Terminology"> <t>This document makes use of a numbercurrent, regular TCP, to increase its chances oftermsdeployment.</li> <li pn="section-1.1-2.2">It can be assumed thatare either MPTCP-specificone orhave defined meaning in the context of MPTCP, as follows: <list style="hanging"> <t hangText="Path:"> A sequence of links between a sender and a receiver, defined in this context by a 4-tuple of sourceboth hosts are multihomed anddestination address/port pairs.</t>multiaddressed.</li> </ul> <thangText="Subflow:"> A flow of TCP segments operating over an individual path, which forms partpn="section-1.1-3">To simplify the design, we assume that the presence of multiple addresses at alarger MPTCP connection. A subflowhost isstarted and terminated similarsufficient toa regular TCP connection.</t> <t hangText="(MPTCP) Connection:"> A setindicate the existence of multiple paths. These paths need not be entirely disjoint: they may share one ormore subflows, over which an application can communicate between two hosts. There is a one-to-one mappingmany routers between them. Even in such aconnection and an application socket.</t> <t hangText="Data-level:"> The payload datasituation, making use of multiple paths isnominally transferred overbeneficial, improving resource utilization and resilience to aconnection, whichsubset of node failures. The congestion control algorithm defined inturn is transported over subflows. Thus,<xref target="RFC6356" format="default" sectionFormat="of" derivedContent="RFC6356"/> ensures that theterm "data-level" is synonymous with "connection level", in contrast to "subflow-level", which refers to propertiesuse ofan individual subflow.</t> <t hangText="Token:"> A locally unique identifier given to a multipath connection by a host. May alsomultiple paths does not act detrimentally. Furthermore, there may bereferred to assome scenarios where different TCP ports on a"Connection ID".</t> <t hangText="Host:"> An endsingle hostoperating an MPTCP implementation, and either initiating or accepting an MPTCP connection.</t> </list> In addition to these terms, note that MPTCP's interpretation of, and effect on, regular single-path TCP semantics are discussed incan provide disjoint paths (such as through certain Equal-Cost Multipath (ECMP) implementations <xreftarget="sec_semantics"/>.</t> </section> <section title="MPTCP Concept" anchor="sec_operation"> <t>This section provides a high-level summary of normal operation of MPTCP,target="RFC2992" format="default" sectionFormat="of" derivedContent="RFC2992"/>), andis illustrated byso thescenario shown in <xref target="fig_scenario"/>. A detailed description of operation is given in <xref target="sec_protocol"/>. <list style="symbols"> <t>To a non-MPTCP-aware application,MPTCPwill behavedesign also supports thesame as normal TCP. Extended APIs could provide additional control to MPTCP-aware applications <xref target="RFC6897"/>. An application begins by opening a TCP socketuse of ports inthe normal way. MPTCP signaling and operationpath identifiers.</t> <t pn="section-1.1-4">There arehandled by the MPTCP implementation. </t> <t>An MPTCP connection begins similarlythree aspects toa regular TCP connection. This is illustratedthe backward compatibility listed above (discussed in more detail in <xreftarget="fig_scenario"/> where an MPTCP connection is established between addresses A1target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/>): </t> <dl newline="false" spacing="normal" indent="3" pn="section-1.1-5"> <dt pn="section-1.1-5.1">External Constraints:</dt> <dd pn="section-1.1-5.2"> The protocol must function through the vast majority of existing middleboxes such as NATs, firewalls, andB1 on Hosts Aproxies, andB, respectively.</t> <t>If extra paths are available, additionalas such must resemble existing TCPsessions (termed MPTCP "subflows") are createdas far as possible onthese paths, and are combined withtheexisting session, which continues to appear as a single connection towire. Furthermore, theapplications at both ends. The creation ofprotocol must not assume that theadditional TCP session is illustrated between Address A2 on Host A and Address B1segments it sends onHost B.</t> <t>MPTCP identifies multiple paths bythepresence of multiple addresseswire arrive unmodified athosts. Combinations of these multiple addresses equate to the additional paths. Intheexample, other potential paths that coulddestination: they may beset up are A1<->B2 and A2<->B2. Although this additional session is shown as being initiated from A2, it could equally have been initiated from B1split orB2.</t> <t>The discovery and setup of additional subflows willcoalesced; TCP options may beachieved through a path management method; this document describes a mechanism by which a host can initiate new subflows by using its own additional addresses,removed orby signaling its available addressesduplicated. </dd> <dt pn="section-1.1-5.3">Application Constraints:</dt> <dd pn="section-1.1-5.4"> The protocol must be usable with no change to existing applications that use theother host.</t> <t>MPTCP adds connection-level sequence numberscommon TCP API (although it is reasonable that not all features would be available toallowsuch legacy applications). Furthermore, thereassembly of segments arriving on multiple subflows with differing network delays. </t> <t>Subflows are terminatedprotocol must provide the same service model as regular TCPconnections, with a four-way FIN handshake.to the application.</dd> <dt pn="section-1.1-5.5">Fallback:</dt> <dd pn="section-1.1-5.6"> The protocol should be able to fall back to standard TCP with no interference from the user, to be able to communicate with legacy hosts.</dd> </dl> <t pn="section-1.1-6">The complementary application considerations document <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="RFC6897"/> discusses the necessary features of an API to provide backward compatibility, as well as API extensions to convey the behavior of MPTCPconnection is terminated byat aconnection-level FIN.</t> </list> </t> <?rfc needLines='17'?>level of control and information equivalent to that available with regular, single-path TCP.</t> <t pn="section-1.1-7">Further discussion of the design constraints and associated design decisions is given in the MPTCP architecture document <xref target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> and in <xref target="howhard" format="default" sectionFormat="of" derivedContent="howhard"/>.</t> </section> <section anchor="sec_layers" numbered="true" toc="include" removeInRFC="false" pn="section-1.2"> <name slugifiedName="name-multipath-tcp-in-the-networ">Multipath TCP in the Networking Stack</name> <t pn="section-1.2-1">MPTCP operates at the transport layer and aims to be transparent to both higher and lower layers. It is a set of additional features on top of standard TCP; <xref target="fig_arch" format="default" sectionFormat="of" derivedContent="Figure 1"/> illustrates this layering. MPTCP is designed to be usable by legacy applications with no changes; detailed discussion of its interactions with applications is given in <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="RFC6897"/>.</t> <figurealign="center" anchor="fig_scenario" title="Exampleanchor="fig_arch" align="left" suppress-title="false" pn="figure-1"> <name slugifiedName="name-comparison-of-standard-tcp-">Comparison of Standard TCP and MPTCPUsage Scenario">Protocol Stacks</name> <artworkalign="left"><![CDATA[ Host A Host B ------------------------ ------------------------ Address A1 Address A2 Address B1 Address B2 ---------- ---------- ---------- ---------- | | | | | (initial connection setup) | | |----------------------------------->| | |<-----------------------------------| | | |align="left" name="" type="" alt="" pn="section-1.2-2.1"> +-------------------------------+ | Application | +---------------+ +-------------------------------+ |(additional subflow setup)Application | ||--------------------->|MPTCP | +---------------+ + - - - - - - - + - - - - - - - + ||<---------------------|TCP | | Subflow (TCP) | Subflow (TCP) | +---------------+ +-------------------------------+ | IP | | IP | IP |]]></artwork>+---------------+ +-------------------------------+ </artwork> </figure> </section> <sectiontitle="Requirements Language"> <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" in thisnumbered="true" toc="include" removeInRFC="false" pn="section-1.3"> <name slugifiedName="name-terminology">Terminology</name> <t pn="section-1.3-1">This document makes use of a number of terms that areto be interpreted as describedeither MPTCP specific or have defined meaning inBCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> when,the context of MPTCP, as follows: </t> <dl newline="false" spacing="normal" indent="3" pn="section-1.3-2"> <dt pn="section-1.3-2.1">Path:</dt> <dd pn="section-1.3-2.2"> A sequence of links between a sender andonly when, they appeara receiver, defined inall capitals, as shown here.</t> </section> </section> <section title="Operation Overview" anchor="sec_overview"> <t>This section presentsthis context by asingle description4-tuple ofcommon MPTCP operation, with reference to the protocol operation. This is a high-level overview of the key functions; the full specification follows in <xref target="sec_protocol"/>. Extensibilitysource andnegotiated features are not discussed here. Considerable reference is made to symbolic namesdestination address/port pairs.</dd> <dt pn="section-1.3-2.3">Subflow:</dt> <dd pn="section-1.3-2.4"> A flow ofMPTCP options throughout this section -- these are subtypesTCP segments operating over an individual path, which forms part ofthe IANA-assigneda larger MPTCPoption (see <xref target="IANA"/>),connection. A subflow is started andtheir formats are defined in the detailed protocol specification that follows in <xref target="sec_protocol"/>.</t> <t>A Multipath TCP connection providesterminated similarly to abidirectional bytestreamregular TCP connection.</dd> <dt pn="section-1.3-2.5">(MPTCP) Connection:</dt> <dd pn="section-1.3-2.6"> A set of one or more subflows, over which an application can communicate between twohosts communicating like normal TCP and, thus, does not require any change to the applications. However, Multipath TCP enableshosts. There is a one‑to‑one mapping between a connection and an application socket.</dd> <dt pn="section-1.3-2.7">Data-level:</dt> <dd pn="section-1.3-2.8"> The payload data is nominally transferred over a connection, which in turn is transported over subflows. Thus, thehosts to use different pathsterm "data-level" is synonymous withdifferent IP addresses"connection-level", in contrast toexchange packets belonging"subflow-level", which refers tothe MPTCP connection.properties of an individual subflow.</dd> <dt pn="section-1.3-2.9">Token:</dt> <dd pn="section-1.3-2.10"> AMultipath TCP connection appears likelocally unique identifier given to anormal TCPmultipath connection by a host. May also be referred to as a "Connection ID".</dd> <dt pn="section-1.3-2.11">Host:</dt> <dd pn="section-1.3-2.12"> An end host operating anapplication. However, to the network layer, eachMPTCPsubflow looks like a regular TCP flow whose segments carry a new TCP option type. Multipath TCP manages the creation, removal,implementation, andutilization of these subflowseither initiating or accepting an MPTCP connection.</dd> </dl> <t pn="section-1.3-3"> In addition tosend data. The number of subflowsthese terms, note thatare managed within a Multipath TCP connection is not fixedMPTCP's interpretation of, andit can fluctuate during the lifetime of the Multipatheffect on, regular single-path TCPconnection.</t> <t>All MPTCP operationssemantics aresignaled with a TCP option -- a single numerical type for MPTCP, with "sub-types" for each MPTCP message. What follows isdiscussed in <xref target="sec_semantics" format="default" sectionFormat="of" derivedContent="Section 4"/>.</t> </section> <section anchor="sec_operation" numbered="true" toc="include" removeInRFC="false" pn="section-1.4"> <name slugifiedName="name-mptcp-concept">MPTCP Concept</name> <t pn="section-1.4-1">This section provides a high-level summary ofthe purpose and rationalenormal operation ofthese messages.</t> <section title="Initiating anMPTCP; this type of scenario is illustrated in <xref target="fig_scenario" format="default" sectionFormat="of" derivedContent="Figure 2"/>. A detailed description of how MPTCPConnection"> <t>Thisoperates is given in <xref target="sec_protocol" format="default" sectionFormat="of" derivedContent="Section 3"/>. </t> <figure anchor="fig_scenario" align="left" suppress-title="false" pn="figure-2"> <name slugifiedName="name-example-mptcp-usage-scenari">Example MPTCP Usage Scenario</name> <artwork align="left" name="" type="" alt="" pn="section-1.4-2.1"> Host A Host B ------------------------ ------------------------ Address A1 Address A2 Address B1 Address B2 ---------- ---------- ---------- ---------- | | | | | (initial connection setup) | | |----------------------------------->| | |<-----------------------------------| | | | | | | (additional subflow setup) | | |--------------------->| | | |<---------------------| | | | | | | | | | </artwork> </figure> <ul spacing="normal" bare="false" empty="false" pn="section-1.4-3"> <li pn="section-1.4-3.1">To a non-MPTCP-aware application, MPTCP will behave the samesignalingasfor initiating anormal TCP. Extended APIs could provide additional control to MPTCP-aware applications <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="RFC6897"/>. An application begins by opening a TCPconnection, but the SYN, SYN/ACK, and initial ACK (and data) packets also carrysocket in theMP_CAPABLE option. This option has a variable lengthnormal way. MPTCP signaling andserves multiple purposes. Firstly, it verifies whether the remote host supports Multipath TCP; secondly, this option allowsoperation are handled by thehosts to exchange some informationMPTCP implementation. </li> <li pn="section-1.4-3.2">An MPTCP connection begins similarly toauthenticate the establishment of additional subflows. Further details are givena regular TCP connection. This is illustrated in <xreftarget="sec_init"/>.</t> <figure><artwork align="left"><![CDATA[ Host A Host B ------ ------ MP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE (+ data) -> [A's key, B's key, flags, (data-level details)] ]]></artwork></figure> <t>Retransmission of the ACK + MP_CAPABLE can occur if ittarget="fig_scenario" format="default" sectionFormat="of" derivedContent="Figure 2"/>, where an MPTCP connection isnot known if it has been received. The following diagrams show all possible exchanges for the initial subflow setup to ensure this reliability.</t> <figure><artwork align="left"><![CDATA[ Hostestablished between addresses A1 and B1 on Hosts A(with dataand B, respectively.</li> <li pn="section-1.4-3.3">If extra paths are available, additional TCP sessions (termed MPTCP "subflows") are created on these paths and are combined with the existing session, which continues tosend immediately) Host B ------ ------ MP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE + data -> [A's key, B's key, flags, data-level details] Host A (with dataappear as a single connection tosend later) Host B ------ ------ MP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE -> [A's key, B's key, flags] ACK + MP_CAPABLE + data -> [A's key, B's key, flags, data-level details]the applications at both ends. The creation of the additional TCP session is illustrated between Address A2 on Host A and Address B1 on HostB (sending first) ------ ------ MP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE -> [A's key, B's key, flags] <- ACK + DSS + data [data-level details] ]]></artwork></figure> </section> <section title="Associating a New Subflow with an Existing MPTCP Connection"> <t>The exchange of keys inB.</li> <li pn="section-1.4-3.4">MPTCP identifies multiple paths by theMP_CAPABLE handshake provides material that can be usedpresence of multiple addresses at hosts. Combinations of these multiple addresses equate toauthenticatetheendpoints when new subflows willadditional paths. In the example, other potential paths that could be setup. Additional subflows begin in the same wayup are A1<->B2 and A2<->B2. Although this additional session is shown asinitiating a normal TCP connection, but the SYN, SYN/ACK,being initiated from A2, it could equally have been initiated from B1 or B2.</li> <li pn="section-1.4-3.5">The discovery andACK packets also carry the MP_JOIN option. </t> <t>Host A initiatessetup of additional subflows will be achieved through a path management method; this document describes a mechanism by which a host can initiate newsubflow between one ofsubflows by using its own additional addressesand one of Host B's addresses. The token -- generated fromor by signaling its available addresses to thekey -- is usedother host.</li> <li pn="section-1.4-3.6">MPTCP adds connection-level sequence numbers toidentify whichallow the reassembly of segments arriving on multiple subflows with differing network delays. </li> <li pn="section-1.4-3.7">Subflows are terminated as regular TCP connections, with a four‑way FIN handshake. The MPTCP connectionit is joining, and the HMACisused for authentication.terminated by a connection-level FIN.</li> </ul> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-1.5"> <name slugifiedName="name-requirements-language">Requirements Language</name> <t pn="section-1.5-1"> TheHash-based Message Authentication Code (HMAC) uses the keys exchanged in the MP_CAPABLE handshake,key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", "<bcp14>MAY</bcp14>", andthe random numbers (nonces) exchanged"<bcp14>OPTIONAL</bcp14>" inthese MP_JOIN options. MP_JOIN also contains flags and an Address ID that can be usedthis document are toreferbe interpreted as described in BCP 14 <xref target="RFC2119" format="default" sectionFormat="of" derivedContent="RFC2119"/> <xref target="RFC8174" format="default" sectionFormat="of" derivedContent="RFC8174"/> when, and only when, they appear in all capitals, as shown here. </t> </section> </section> <section anchor="sec_overview" numbered="true" toc="include" removeInRFC="false" pn="section-2"> <name slugifiedName="name-operation-overview">Operation Overview</name> <t pn="section-2-1">This section presents a single description of common MPTCP operation, with reference to thesource address without the sender needing to know if it has been changed byprotocol operation. This is aNAT. Further details arehigh-level overview of the key functions; the full specification follows in <xreftarget="sec_join"/>.</t> <figure><artwork align="left"><![CDATA[ Host A Host B ------ ------ MP_JOIN -> [B's token, A's nonce, A's Address ID, flags] <- MP_JOIN [B's HMAC, B's nonce, B's Address ID, flags] ACK + MP_JOIN -> [A's HMAC] <- ACK ]]></artwork></figure> </section> <section title="Informing the Other Host about Another Potential Address"> <t>The set of IP addresses associatedtarget="sec_protocol" format="default" sectionFormat="of" derivedContent="Section 3"/>. Extensibility and negotiated features are not discussed here. Considerable reference is made toa multihomed host may change during the lifetimesymbolic names ofanMPTCPconnection. MPTCP supports the addition and removaloptions throughout this section -- these are subtypes ofaddresses on a host both implicitlythe IANA‑assigned MPTCP option (see <xref target="IANA" format="default" sectionFormat="of" derivedContent="Section 7"/>), andexplicitly. If Host A has establishedtheir formats are defined in the detailed protocol specification provided in <xref target="sec_protocol" format="default" sectionFormat="of" derivedContent="Section 3"/>.</t> <t pn="section-2-2">A Multipath TCP connection provides asubflow starting at address/port pair IP#-A1bidirectional bytestream between two hosts communicating like normal TCP andwantsthus does not require any change toopen a second subflow starting at address/port pair IP#-A2, it simply initiatestheestablishment ofapplications. However, Multipath TCP enables thesubflow as explained above. The remote host will then be implicitly informed abouthosts to use different paths with different IP addresses to exchange packets belonging to thenew address.</t> <t>In some circumstances,MPTCP connection. A Multipath TCP connection appears like ahost may wantnormal TCP connection toadvertisean application. However, to theremote host the availability of an address without establishingnetwork layer, each MPTCP subflow looks like anew subflow, for example, whenregular TCP flow whose segments carry aNAT prevents setup in one direction. Innew TCP option type. Multipath TCP manages theexample below, Host A informs Host B about its alternative IP address/port pair (IP#-A2). Host B may later send an MP_JOINcreation, removal, and utilization of these subflows tothis new address.send data. TheADD_ADDR option containsnumber of subflows that are managed within aHMAC to authenticateMultipath TCP connection is not fixed, and it can fluctuate during theaddress as having been sent fromlifetime of theoriginatorMultipath TCP connection.</t> <t pn="section-2-3">All MPTCP operations are signaled with a TCP option -- a single numerical type for MPTCP, with "subtypes" for each MPTCP message. What follows is a summary of theconnection. The receiverpurpose and rationale ofthisthese messages.</t> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.1"> <name slugifiedName="name-initiating-an-mptcp-connect">Initiating an MPTCP Connection</name> <t pn="section-2.1-1">This is the same signaling as for initiating a normal TCP connection, but the SYN, SYN/ACK, and initial ACK (and data) packets also carry the MP_CAPABLE option. This optionechoeshas a variable length and serves multiple purposes. Firstly, itback toverifies whether theclientremote host supports Multipath TCP; secondly, this option allows the hosts toindicate successful receipt.exchange some information to authenticate the establishment of additional subflows. Further details are given in <xreftarget="sec_add_address"/>.</t> <figure><artwork align="left"><![CDATA[target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.1-2"> Host A Host B ------ ------ADD_ADDR -> [Echo-flag=0, IP#-A2, IP#-A2's Address ID, HMAC of IP#-A2] <- ADD_ADDR [Echo-flag=1, IP#-A2, IP#-A2's Address ID, HMAC of IP#-A2] ]]></artwork></figure> <t>There is a corresponding signal for address removal, making useMP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE (+ data) -> [A's key, B's key, flags, (data-level details)] </artwork> <t pn="section-2.1-3">Retransmission of theAddress ID thatACK + MP_CAPABLE can occur if it issignaled innot known if it has been received. The following diagrams show all possible exchanges for theadd address handshake. Further details in <xref target="sec_remove_addr"/>.</t> <figure><artwork align="left"><![CDATA[initial subflow setup to ensure this reliability.</t> <artwork align="left" name="" type="" alt="" pn="section-2.1-4"> Host A (with data to send immediately) Host B ------ ------REMOVE_ADDR -> [IP#-A2's Address ID] ]]></artwork></figure> </section> <section title="Data Transfer Using MPTCP"> <t>To ensure reliable, in-order delivery ofMP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE + dataover subflows that may appear and disappear at any time, MPTCP uses a 64-bit-> [A's key, B's key, flags, data-level details] Host A (with datasequence number (DSN)tonumber allsend later) Host B ------ ------ MP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE -> [A's key, B's key, flags] ACK + MP_CAPABLE + datasent over the MPTCP connection. Each subflow has its own 32-bit sequence number space, utilising the regular TCP sequence number header, and-> [A's key, B's key, flags, data-level details] Host A Host B (sending first) ------ ------ MP_CAPABLE -> [flags] <- MP_CAPABLE [B's key, flags] ACK + MP_CAPABLE -> [A's key, B's key, flags] <- ACK + DSS + data [data-level details] </artwork> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.2"> <name slugifiedName="name-associating-a-new-subflow-w">Associating a New Subflow with an Existing MPTCPoption maps the subflow sequence space toConnection</name> <t pn="section-2.2-1">The exchange of keys in thedata sequence space. In this way, dataMP_CAPABLE handshake provides material that can beretransmitted on different subflows (mappedused to authenticate thesame DSN)endpoints when new subflows will be set up. Additional subflows begin in theevent of failure.</t> <t>The Data Sequence Signal (DSS) carries the Data Sequence Mapping. The Data Sequence Mapping consists ofsame way as initiating a normal TCP connection, but thesubflow sequence number, data sequence number,SYN, SYN/ACK, andlength for which this mapping is valid. This option canACK packets also carrya connection-level acknowledgment (the "Data ACK") forthereceived DSN.</t> <t>With MPTCP, all subflows share the same receive buffer and advertise the same receive window. There are two levels of acknowledgment in MPTCP. Regular TCP acknowledgments are used on eachMP_JOIN option. </t> <t pn="section-2.2-2">Host A initiates a new subflowto acknowledge the receptionbetween one of its addresses and one of Host B's addresses. The token -- generated from thesegments sent overkey -- is used to identify which MPTCP connection it is joining, and thesubflow independently of their DSN. In addition, there are connection-level acknowledgmentsHash‑based Message Authentication Code (HMAC) is used for authentication. The HMAC uses thedata sequence space. These acknowledgments track the advancement ofkeys exchanged in thebytestreamMP_CAPABLE handshake andslidethereceiving window.</t> <t>Further details are in <xref target="sec_generalop"/>.</t> <figure><artwork align="left"><![CDATA[ Host A Host B ------ ------ DSS -> [Data Sequence Mapping] [Data ACK] [Checksum] ]]></artwork></figure> </section> <section title="Requesting a Changerandom numbers (nonces) exchanged ina Path's Priority"> <t>Hoststhese MP_JOIN options. MP_JOIN also contains flags and an Address ID that canindicate at initial subflow setup whether they wish the subflow tobe usedas a regular or backup path -- a backup path only being used if there are no regular paths available. During a connection, Host A can request a change into refer to thepriority of a subflow throughsource address without theMP_PRIO signalsender needing toHost B.know if it has been changed by a NAT. Further details are given in <xreftarget="sec_policy"/>.</t> <figure><artwork align="left"><![CDATA[target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.2-3"> Host A Host B ------ ------MP_PRIO -> ]]></artwork></figure>MP_JOIN -> [B's token, A's nonce, A's Address ID, flags] <- MP_JOIN [B's HMAC, B's nonce, B's Address ID, flags] ACK + MP_JOIN -> [A's HMAC] <- ACK </artwork> </section> <sectiontitle="Closing an MPTCP Connection"> <t>When a host wants to close an existing subflow, but notnumbered="true" toc="include" removeInRFC="false" pn="section-2.3"> <name slugifiedName="name-informing-the-other-host-ab">Informing thewhole connection, it can initiate a regular TCP FIN/ACK exchange.</t> <t>WhenOther HostA wantsabout Another Potential Address</name> <t pn="section-2.3-1">The set of IP addresses associated toinforma multihomed host may change during the lifetime of an MPTCP connection. MPTCP supports the addition and removal of addresses on a host both implicitly and explicitly. If HostB that itA hasno more dataestablished a subflow starting at address/port pair IP#-A1 and wants tosend,open a second subflow starting at address/port pair IP#-A2, itsignals this "Data FIN" as part ofsimply initiates theData Sequence Signal (see above). It hasestablishment of thesame semantics and behaviorsubflow as explained above. The remote host will then be implicitly informed about the new address.</t> <t pn="section-2.3-2">In some circumstances, aregular TCP FIN, but athost may want to advertise to theconnection level. Once allremote host thedata onavailability of an address without establishing a new subflow -- for example, when a NAT prevents setup in one direction. In the example below, Host A informs Host B about its alternative IP address/port pair (IP#-A2). Host B may later send an MP_JOIN to this new address. The ADD_ADDR option contains an HMAC to authenticate theMPTCP connection hasaddress as having beensuccessfully received, thensent from the originator of the connection. The receiver of thismessage is acknowledged atoption echoes it back to theconnection level with a Data ACK.client to indicate successful receipt. Further details are given in <xreftarget="sec_close"/>.</t> <figure><artwork align="left"><![CDATA[target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.3-3"> Host A Host B ------ ------DSS -> [Data FIN] <- DSS [Data ACK] ]]></artwork></figure> <t>There is an additional methodADD_ADDR -> [Echo-flag=0, IP#-A2, IP#-A2's Address ID, HMAC ofconnection closure, referred to as "Fast Close", whichIP#-A2] <- ADD_ADDR [Echo-flag=1, IP#-A2, IP#-A2's Address ID, HMAC of IP#-A2] </artwork> <t pn="section-2.3-4">There isanalogous to closingasingle-path TCP connection with a RST signal. The MP_FASTCLOSEcorresponding signalis used to indicate to the peer that the connection will be abruptly closed and no data will be accepted anymore. This can be used on an ACK (ensuring reliabilityfor address removal, making use of thesignal), or a RST (whichAddress ID that isnot). Both examples are shownsignaled in thefollowing diagrams.ADD_ADDR handshake. Further details are given in <xreftarget="sec_fastclose"/>.</t> <figure><artwork align="left"><![CDATA[ Host A Host B ------ ------ ACK + MP_FASTCLOSE -> [B's key] [RST on all other subflows] -> <- [RST on all subflows]target="sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.3-5"> Host A Host B ------ ------RST + MP_FASTCLOSE -> [B's key] [on all subflows] <- [RST on all subflows] ]]></artwork></figure>REMOVE_ADDR -> [IP#-A2's Address ID] </artwork> </section> <sectiontitle="Notable Features"> <t>It is worth highlightingnumbered="true" toc="include" removeInRFC="false" pn="section-2.4"> <name slugifiedName="name-data-transfer-using-mptcp">Data Transfer Using MPTCP</name> <t pn="section-2.4-1">To ensure reliable, in-order delivery of data over subflows thatMPTCP's signalingmay appear and disappear at any time, MPTCP uses a 64-bit Data Sequence Number (DSN) to number all data sent over the MPTCP connection. Each subflow hasbeen designed with several key requirements in mind: <list style="symbols"> <t>To cope with NATs onits own 32-bit sequence number space, utilizing thepath, addresses are referredregular TCP sequence number header, and an MPTCP option maps the subflow sequence space toby Address IDs,the data sequence space. In this way, data can be retransmitted on different subflows (mapped to the same DSN) incasetheIP packet's source address gets changed by a NAT. Setting up a new TCP flow is not possible ifevent of failure.</t> <t pn="section-2.4-2">The Data Sequence Signal (DSS) carries thereceiverData Sequence Mapping. The Data Sequence Mapping consists of theSYNsubflow sequence number, data sequence number, and length for which this mapping isbehind a NAT; to allow subflows to be created when either end is behindvalid. This option can also carry aNAT, MPTCP uses the ADD_ADDR message. </t> <t>MPTCP falls back to ordinary TCP if MPTCP operation is not possible,connection-level acknowledgment (the "Data ACK") forexample, if one host is not MPTCP capable or if a middlebox altersthepayload. This is discussed in <xref target="sec_fallback"/>.</t> <t>To addressreceived DSN.</t> <t pn="section-2.4-3">With MPTCP, all subflows share thethreats identified in <xref target="RFC6181"/>,same receive buffer and advertise thefollowing steps are taken: keyssame receive window. There aresent in the cleartwo levels of acknowledgment inthe MP_CAPABLE messages; MP_JOIN messages are secured with HMAC-SHA256 (<xref target="RFC2104"/>, <xref target="RFC6234"/>) using those keys; and standardMPTCP. Regular TCPvalidity checksacknowledgments aremadeused onthe other messages (ensuring sequence numbers are in-window <xref target="RFC5961"/>). Residual threatseach subflow toMPTCP v0 were identified in <xref target="RFC7430"/>, and those affectingacknowledge theprotocol (i.e. modification to ADD_ADDR) have been incorporated in this document. Further discussionreception ofsecurity can be found in <xref target="sec_security"/>.</t> </list></t> </section> </section> <section title="MPTCP Protocol" anchor="sec_protocol"> <t>This section describestheoperation ofsegments sent over theMPTCP protocol, and is subdivided into sections for each key partsubflow independently ofthe protocol operation.</t> <t>All MPTCP operationstheir DSN. In addition, there aresignaled using optional TCP header fields. A single TCP option number ("Kind") has been assigned by IANAconnection-level acknowledgments forMPTCP (see <xref target="IANA"/>), and then individual messages will be determined by a "subtype",thevaluesdata sequence space. These acknowledgments track the advancement ofwhich are also stored in an IANA registry (andthe bytestream and slide the receive window.</t> <t pn="section-2.4-4">Further details arealso listedgiven in <xreftarget="IANA"/>). As with all TCP options, the Length field is specifiedtarget="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.4-5"> Host A Host B ------ ------ DSS -> [Data Sequence Mapping] [Data ACK] [Checksum] </artwork> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.5"> <name slugifiedName="name-requesting-a-change-in-a-pa">Requesting a Change inbytes, and includesa Path's Priority</name> <t pn="section-2.5-1">Hosts can indicate at initial subflow setup whether they wish the2 bytes of Kind and Length.</t> <t>Throughout this document, when reference is madesubflow toan MPTCP option by symbolic name, suchbe used as"MP_CAPABLE", this refers toaTCP option with the single MPTCP option type, and withregular or backup path -- a backup path only being used if there are no regular paths available. During a connection, Host A can request a change in thesubtype valuepriority of a subflow through thesymbolic name as definedMP_PRIO signal to Host B. Further details are given in <xreftarget="IANA"/>. This subtype istarget="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.5-2"> Host A Host B ------ ------ MP_PRIO -> </artwork> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.6"> <name slugifiedName="name-closing-an-mptcp-connection">Closing an MPTCP Connection</name> <t pn="section-2.6-1">When a4-bit field --host wants to close an existing subflow but not thefirst 4 bitswhole connection, it can initiate a regular TCP FIN/ACK exchange.</t> <t pn="section-2.6-2">When Host A wants to inform Host B that it has no more data to send, it signals this "Data FIN" as part of theoption payload,DSS (see above). It has the same semantics and behavior asshown in <xref target="fig_option"/>. Thea regular TCP FIN, but at the connection level. Once all the data on the MPTCPmessagesconnection has been successfully received, this message is acknowledged at the connection level with a Data ACK. Further details aredefinedgiven inthe following sections.</t> <?rfc needLines='8'?> <figure align="center" anchor="fig_option" title="MPTCP Option Format"><xref target="sec_close" format="default" sectionFormat="of" derivedContent="Section 3.3.3"/>.</t> <artworkalign="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----------------------+ | Kind | Length |Subtype| | +---------------+---------------+-------+ | | Subtype-specific data | | (variable length) | +---------------------------------------------------------------+ ]]></artwork> </figure> <t>Those MPTCP options associated with subflow initiation are used on packetsalign="left" name="" type="" alt="" pn="section-2.6-3"> Host A Host B ------ ------ DSS -> [Data FIN] <- DSS [Data ACK] </artwork> <t pn="section-2.6-4">There is an additional method of connection closure, referred to as "Fast Close", which is analogous to closing a single-path TCP connection withthe SYN flag set. Additionally, therea RST signal. The MP_FASTCLOSE signal isone MPTCP option for signaling metadataused toensure segmented data can be recombined for deliveryindicate to theapplication.</t> <t>The remaining options, however, are signalspeer thatdo not need to be on a specific packet, such as those for signaling additional addresses. Whilst an implementation may desire to send MPTCP options as soon as possible, it may notthe connection will bepossible to combine all desired options (both those for MPTCPabruptly closed andfor regular TCP, such as SACK (selective acknowledgment) <xref target="RFC2018"/>)no data will be accepted anymore. This can be used ona single packet. Therefore,animplementation may choose to send duplicate ACKs containing the additional signaling information. This changes the semanticsACK (which ensures reliability of the signal) or aduplicate ACK; theseRST (which does not). Both examples areusually only sent as a signal of a lost segment <xref target="RFC5681"/>shown inregular TCP. Therefore, an MPTCP implementation receiving a duplicate ACK that contains an MPTCP option MUST NOT treat it as a signal of congestion. Additionally, an MPTCP implementation SHOULD NOT send more than two duplicate ACKsthe following diagrams. Further details are given in <xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5"/>.</t> <artwork align="left" name="" type="" alt="" pn="section-2.6-5"> Host A Host B ------ ------ ACK + MP_FASTCLOSE -> [B's key] [RST on all other subflows] -> <- [RST on all subflows] Host A Host B ------ ------ RST + MP_FASTCLOSE -> [B's key] [on all subflows] <- [RST on all subflows] </artwork> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-2.7"> <name slugifiedName="name-notable-features">Notable Features</name> <t pn="section-2.7-1">It is worth highlighting that MPTCP's signaling has been designed with several key requirements in mind: </t> <ul spacing="normal" bare="false" empty="false" pn="section-2.7-2"> <li pn="section-2.7-2.1">To cope with NATs on the path, addresses are referred to by Address IDs, in case the IP packet's source address gets changed by arow forNAT. Setting up a new TCP flow is not possible if thepurposesreceiver ofsendingthe SYN is behind a NAT; to allow subflows to be created when either end is behind a NAT, MPTCPoptions alone, in orderuses the ADD_ADDR message. </li> <li pn="section-2.7-2.2">MPTCP falls back toensure no middleboxes misinterpret this asordinary TCP if MPTCP operation is not possible -- for example, if one host is not MPTCP capable or if asign of congestion.</t> <t>Furthermore,middlebox alters the payload. This is discussed in <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>.</li> <li pn="section-2.7-2.3">To address the threats identified in <xref target="RFC6181" format="default" sectionFormat="of" derivedContent="RFC6181"/>, the following steps are taken: keys are sent in the clear in the MP_CAPABLE messages; MP_JOIN messages are secured with HMAC-SHA256 (<xref target="RFC2104" format="default" sectionFormat="of" derivedContent="RFC2104"/> using the algorithm in <xref target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC6234"/>) using those keys; and standard TCP validity checks(such as ensuringare made on the other messages (ensuring that sequencenumber and acknowledgment numbernumbers arewithin window) MUST be undertaken before processing anyin‑window <xref target="RFC5961" format="default" sectionFormat="of" derivedContent="RFC5961"/>). Residual threats to MPTCPsignals, as describedv0 were identified in <xreftarget="RFC5961"/>,target="RFC7430" format="default" sectionFormat="of" derivedContent="RFC7430"/>, andinitial subflow sequence numbers SHOULD be generated according to the recommendations in <xref target="RFC6528"/>.</t> <section title="Connection Initiation" anchor="sec_init"> <t>Connection initiation begins with a SYN, SYN/ACK, ACK exchange on a single path. Each packet containsthose affecting theMultipath Capable (MP_CAPABLE) MPTCP option (<xref target="tcpm_capable"/>). This option declares its sender is capable of performing Multipath TCP and wishesprotocol (i.e., modifications todo so on this particular connection.</t> <t>The MP_CAPABLE exchangeADD_ADDR) have been incorporated in thisspecification (v1) is different to that specified in v0. If a host supports multiple versionsdocument. Further discussion ofMPTCP,security can be found in <xref target="sec_security" format="default" sectionFormat="of" derivedContent="Section 5"/>.</li> </ul> </section> </section> <section anchor="sec_protocol" numbered="true" toc="include" removeInRFC="false" pn="section-3"> <name slugifiedName="name-mptcp-operations-an-overvie">MPTCP Operations: An Overview</name> <t pn="section-3-1">This section describes thesenderoperation of MPTCP. The subsections below discuss each key part of theMP_CAPABLEprotocol operation.</t> <t pn="section-3-2">All MPTCP operations are signaled using optional TCP header fields. A single TCP optionSHOULD signal the highest versionnumberit supports. In return, in its MP_CAPABLE option, the receiver("Kind") has been assigned by IANA for MPTCP (see <xref target="IANA" format="default" sectionFormat="of" derivedContent="Section 7"/>), and then individual messages willsignalbe determined by a "subtype", theversion number it wishes to use,values of whichMUST be equal to or lower thanare also stored in an IANA registry (and are also listed in <xref target="IANA" format="default" sectionFormat="of" derivedContent="Section 7"/>). As with all TCP options, theversion number indicatedLength field is specified in bytes and includes theinitial MP_CAPABLE. There2 bytes of Kind and Length.</t> <t pn="section-3-3">Throughout this document, when reference isa caveat though with respectmade to an MPTCP option by symbolic name, such as "MP_CAPABLE", thisversion negotiationrefers to a TCP option withold listeners that only support v0. A listener that supports v0 expects thattheMP_CAPABLEsingle MPTCP optionintype, and with theSYN-segment includessubtype value of theinitiator's key. If the initiator however already upgraded to v1, it won't include the key in the SYN-segment. Thus, the listener will ignore the MP_CAPABLE of this SYN-segment and reply with a SYN/ACK that does not include an MP_CAPABLE. The initiator MAY choose to immediately fall back to TCP or MAY choose to attempt a connection using MPTCP v0 (if the initiator supports v0),symbolic name as defined inorder to discover whether the listener supports the earlier version of MPTCP. In general a MPTCP v0 connection<xref target="IANA" format="default" sectionFormat="of" derivedContent="Section 7"/>. This subtype islikely to be preferred to a TCP one, however inaparticular deployment scenario it may be known that the listener is unlikely to support MPTCPv0 and so4-bit field -- theinitiator may prefer not to attempt a v0 connection. An initiator MAY cache information for a peer about what versionfirst 4 bits ofMPTCP it supports if any, and use this information for future connection attempts.</t> <t>The MP_CAPABLE option is variable-length, with different fields included depending on which packetthe optionis used on. The full MP_CAPABLE option ispayload, as shown in <xreftarget="tcpm_capable"/>.</t> <?rfc needLines='10'?>target="fig_option" format="default" sectionFormat="of" derivedContent="Figure 3"/>. The MPTCP messages are defined in the following sections.</t> <figurealign="center" anchor="tcpm_capable" title="Multipath Capable (MP_CAPABLE) Option">anchor="fig_option" align="left" suppress-title="false" pn="figure-3"> <name slugifiedName="name-mptcp-option-format">MPTCP Option Format</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-3-4.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1+---------------+---------------+-------+-------+---------------++---------------+---------------+-------+-----------------------+ | Kind | Length|Subtype|Version|A|B|C|D|E|F|G|H| +---------------+---------------+-------+-------+---------------+|Subtype| |Option Sender's Key (64 bits)+---------------+---------------+-------+ | |(if option Length > 4)Subtype-specific data | | (variable length) | +---------------------------------------------------------------+| Option Receiver's Key (64 bits) | | (if option Length > 12) | | | +-------------------------------+-------------------------------+ | Data-Level Length (16 bits) | Checksum (16 bits, optional) | +-------------------------------+-------------------------------+ ]]></artwork></artwork> </figure><t>The MP_CAPABLE option is carried<t pn="section-3-5">Those MPTCP options associated with subflow initiation are used onthe SYN, SYN/ACK, and ACKpacketsthat startwith thefirst subflow of an MPTCP connection, as well as the first packet that carries data, if the initiator wishes to send first. The data carried by each optionSYN flag set. Additionally, there isas follows, where A = initiator and B = listener. <list style="symbols"> <t>SYN (A->B): only the first four octets (Length = 4).</t> <t>SYN/ACK (B->A): B's Key for this connection (Length = 12).</t> <t>ACK (no data) (A->B): A's Key followed by B's Key (Length = 20).</t> <t>ACK (with first data) (A->B): A's Key followed by B's Key followed by Data-Level Length, and optional Checksum (Length = 22 or 24).</t> </list> The contents of theone MPTCP optionis determined by the SYN and ACK flags of the packet, along with the option's length field. For the diagram shown in <xref target="tcpm_capable"/>, "sender" and "receiver" referfor signaling metadata tothe sender or receiver of the TCP packet (whichensure that segmented data can beeither host).</t> <t>The initial SYN, containing just the MP_CAPABLE header, is usedrecombined for delivery todefinetheversion ofapplication.</t> <t pn="section-3-6">The remaining options, however, are signals that do not need to be on a specific packet, such as those for signaling additional addresses. While an implementation may desire to send MPTCPbeing requested,options aswellsoon asexchanging flags to negotiate connection features, described later.</t> <t>This option is usedpossible, it may not be possible todeclare the 64-bit keys that the end hosts have generatedcombine all desired options (both those forthisMPTCPconnection. These keys are usedand for regular TCP, such as SACK (selective acknowledgment) <xref target="RFC2018" format="default" sectionFormat="of" derivedContent="RFC2018"/>) on a single packet. Therefore, an implementation may choose toauthenticatesend duplicate ACKs containing theadditionadditional signaling information. This changes the semantics offuture subflows to this connection. This is thea duplicate ACK; these are usually onlytime the key will besentin clear on the wire (unless "fast close",as a signal of a lost segment <xreftarget="sec_fastclose"/>, is used); all future subflows will identify the connection usingtarget="RFC5681" format="default" sectionFormat="of" derivedContent="RFC5681"/> in regular TCP. Therefore, an MPTCP implementation receiving a32-bit "token". This token isduplicate ACK that contains an MPTCP option <bcp14>MUST NOT</bcp14> treat it as acryptographic hashsignal ofthis key. The algorithmcongestion. Additionally, an MPTCP implementation <bcp14>SHOULD NOT</bcp14> send more than two duplicate ACKs in a row forthis process is dependent on the authentication algorithm selected;themethodpurposes ofselection is defined latersending MPTCP options alone, in order to ensure that no middleboxes misinterpret thissection.</t> <t>Upon reception of the initial SYN-segment, a stateful server generates a random key and replies withas aSYN/ACK. The key's methodsign ofgeneration is implementation specific. The key MUST be hard to guess,congestion.</t> <t pn="section-3-7">Furthermore, standard TCP validity checks (such as ensuring that the sequence number andit MUST be unique foracknowledgment number are within thesending host across all its currentwindow) <bcp14>MUST</bcp14> be undertaken before processing any MPTCPconnections. Recommendations for generating random numbers for use in keys are givensignals, as described in <xreftarget="RFC4086"/>. Connections willtarget="RFC5961" format="default" sectionFormat="of" derivedContent="RFC5961"/>, and initial subflow sequence numbers <bcp14>SHOULD</bcp14> beindexed at each host by the token (a one-way hash of the key). Therefore, an implementation will require a mapping from each tokengenerated according to thecorresponding connection, andrecommendations inturn to the keys for the connection.</t> <t>There is<xref target="RFC6528" format="default" sectionFormat="of" derivedContent="RFC6528"/>.</t> <section anchor="sec_init" numbered="true" toc="include" removeInRFC="false" pn="section-3.1"> <name slugifiedName="name-connection-initiation">Connection Initiation</name> <t pn="section-3.1-1">Connection initiation begins with arisk that two different keys will hash to the same token. The risk of hash collisions is usually small, unlessSYN, SYN/ACK, ACK exchange on a single path. Each packet contains thehost is handling many tens of thousands of connections. Therefore, an implementation SHOULD checkMultipath Capable (MP_CAPABLE) MPTCP option (<xref target="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>). This option declares itslistsender capable ofconnection tokensperforming Multipath TCP and wishes toensure theredo so on this particular connection.</t> <figure anchor="tcpm_capable" align="left" suppress-title="false" pn="figure-4"> <name slugifiedName="name-multipath-capable-mp_capabl">Multipath Capable (MP_CAPABLE) Option</name> <artwork align="left" name="" type="" alt="" pn="section-3.1-2.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-------+---------------+ | Kind | Length |Subtype|Version|A|B|C|D|E|F|G|H| +---------------+---------------+-------+-------+---------------+ | Option Sender's Key (64 bits) | | (if option Length > 4) | | | +---------------------------------------------------------------+ | Option Receiver's Key (64 bits) | | (if option Length > 12) | | | +-------------------------------+-------------------------------+ | Data-Level Length (16 bits) | Checksum (16 bits, optional) | +-------------------------------+-------------------------------+ </artwork> </figure> <t pn="section-3.1-3">The MP_CAPABLE exchange in this specification (v1) isno collision before sending its key, and if there is, then it should generate a new key. This would, however, be costly fordifferent than that specified in v0. If aserver with thousandshost supports multiple versions ofconnections. The subflow handshake mechanism (<xref target="sec_join"/>) will ensure that new subflows only joinMPTCP, thecorrect connection, however, throughsender of thecryptographic handshake, as well as checkingMP_CAPABLE option <bcp14>SHOULD</bcp14> signal theconnection tokens in both directions, and ensuring sequence numbers are in-window. Sohighest version number it supports. In return, in its MP_CAPABLE option, theworst case if there was a token collision,receiver will signal thenew subflow would not succeed, but the MPTCP connection would continue to provide a regular TCP service.</t> <t>Since key generation is implementation-specific, there is no requirement that they be simply random numbers. An implementation is free to exchange cryptographic material out-of-band and generate these keys from this, in orderversion number it wishes toprovide additional mechanisms byuse, which <bcp14>MUST</bcp14> be equal toverify the identity of the communicating entities. For example, an implementation could choose to link its MPTCP keys to those used in higher-layer TLSorSSH connections.</t> <t>Iflower than theserver behavesversion number indicated in the initial MP_CAPABLE. There is astateless manner, it hascaveat, though, with respect togenerate its own keythis version negotiation with old listeners that only support v0. A listener that supports v0 expects that the MP_CAPABLE option ina verifiable fashion. This verifiable way of generatingthekey can be done by using a hash ofSYN segment will include the4-tuple, sequence number and a local secret (similar to what is done forinitiator's key. If, however, theTCP-sequence number <xref target="RFC4987"/>). It will thus be ableinitiator already upgraded toverify whetherv1, itis indeed the originator ofwon't include the keyechoed backin thelaterSYN segment. Thus, the listener will ignore the MP_CAPABLEoption. As forof this SYN segment and reply with astateful server, the tokens SHOULD be checked for uniqueness, however if uniqueness isSYN/ACK that does notmet, and there is no way to generateinclude analternative verifiable key, then the connection MUSTMP_CAPABLE. The initiator <bcp14>MAY</bcp14> choose to immediately fall back tousing regularTCPby not sendingor <bcp14>MAY</bcp14> choose to attempt aMP_CAPABLE inconnection using MPTCP v0 (if theSYN/ACK.</t> <t>The ACK carries both A's key and B's key. This is the first time that A's key is seen on the wire, although it is expected that A will have generated a key locally before the initial SYN. The echoing of B's key allows B to operate statelessly, as described above. Therefore, A's key must be delivered reliably to B, andinitiator supports v0), in order todo this,discover whether thetransmission of this packet must be made reliable.</t> <t>If B has data to send first, thenlistener supports thereliable deliveryearlier version ofthe ACK+MP_CAPABLE canMPTCP. In general, an MPTCP v0 connection will likely beinferred by the receipt of this data withpreferred over aMPTCP Data Sequence Signal (DSS) option (<xref target="sec_generalop"/>). If,TCP connection; however,A wishes to send data first,in a particular deployment scenario, ithas two optionsmay be known that the listener is unlikely toensuresupport MPTCP v0 and so thereliable deliveryinitiator may prefer not to attempt a v0 connection. An initiator <bcp14>MAY</bcp14> cache information for a peer about what version ofthe ACK+MP_CAPABLE. IfMPTCP itimmediately has data to send, then the third ACK (with data) would also contain ansupports, if any, and use this information for future connection attempts.</t> <t pn="section-3.1-4">The MP_CAPABLE option is of variable length, withadditional data parameters (the Data-Level Length and optional Checksum asdifferent fields included, depending on which packet the option is used on. The full MP_CAPABLE option is shown in <xreftarget="tcpm_capable"/>). If A does not immediately have data to send, it MUST include thetarget="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>.</t> <t pn="section-3.1-5">The MP_CAPABLE option is carried on thethird ACK, but withoutSYN, SYN/ACK, and ACK packets that start theadditional data parameters. When A does have data to send, it must repeat the sendingfirst subflow of an MPTCP connection, as well as theMP_CAPABLE option fromfirst packet that carries data, if thethird ACK, with additionalinitiator wishes to send first. The dataparameters. This MP_CAPABLEcarried by each option isin place of the DSS,as follows, where A = initiator andsimply specifies the data-level length ofB = listener. </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.1-6"> <li pn="section-3.1-6.1">SYN (A->B): only thepayload,first 4 octets (Length = 4).</li> <li pn="section-3.1-6.2">SYN/ACK (B->A): B's key for this connection (Length = 12).</li> <li pn="section-3.1-6.3">ACK (no data) (A->B): A's key followed by B's key (Length = 20).</li> <li pn="section-3.1-6.4">ACK (with first data) (A->B): A's key followed by B's key followed by Data-Level Length, and optional Checksum (Length = 22 or 24).</li> </ul> <t pn="section-3.1-7"> The contents of thechecksum (ifoption are determined by theuseSYN and ACK flags ofchecksums is negotiated). This istheminimal data required to establish a MPTCP connection - it allows validation ofpacket, along with thepayload,option's Length field. In <xref target="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>, "Sender" andgiven it is"Receiver" refer to thefirst data,sender or receiver of theInitial Data Sequence Number (IDSN) is also known (as itTCP packet (which can be either host).</t> <t pn="section-3.1-8">The initial SYN, containing just the MP_CAPABLE header, isgenerated fromused to define thekey,version of MPTCP being requested and also to exchange flags to negotiate connection features, as describedbelow). Conveyinglater.</t> <t pn="section-3.1-9">This option is used to declare the 64-bit keyson the first data packet allowsthat theTCP reliability mechanismsend hosts have generated for this MPTCP connection. These keys are used toensureauthenticate thepacket is successfully delivered. The receiver will acknowledgeaddition of future subflows to thisdata atconnection. This is the only time the key will be sent in the clear on the wire (unless "Fast Close" (<xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5"/>) is used); all future subflows will identify the connectionlevel withusing aData ACK, as if32-bit "token". This token is aDSS option has been received.</t> <t>There could be situations where both A and B attempt to transmit initial data atcryptographic hash of this key. The algorithm for this process is dependent on thesame time. For example, if A did not initially have data to send, but then needed to transmit data before it had received anything from B, it would use a MP_CAPABLE option with data parameters (since it would not know ifauthentication algorithm selected; theMP_CAPABLE onmethod of selection is defined later in this section.</t> <t pn="section-3.1-10">Upon reception of theACK was received). In such a situation, B may also have transmitted data withinitial SYN segment, aDSS option, but it had not yet been received at A. Therefore, B has received data withstateful server generates aMP_CAPABLE mapping after it has sent datarandom key and replies with aDSS option. To ensure these situations canSYN/ACK. The key's method of generation is implementation specific. The key <bcp14>MUST</bcp14> behandled,hard to guess, and itfollows that<bcp14>MUST</bcp14> be unique for thedata parameterssending host across all its current MPTCP connections. Recommendations for generating random numbers for use ina MP_CAPABLEkeys aresemantically equivalent to thosegiven ina DSS option and can<xref target="RFC4086" format="default" sectionFormat="of" derivedContent="RFC4086"/>. Connections will beused interchangeably. Similar situations could occur whenindexed at each host by theMP_CAPABLE with data is losttoken (a one-way hash of the key). Therefore, an implementation will require a mapping from each token to the corresponding connection, andretransmitted. Furthermore,in turn to thecase of TCP Segmentation Offloading,keys for theMP_CAPABLE with data parameters may be duplicated across multiple packets, and implementations must also be ableconnection.</t> <t pn="section-3.1-11">There is a risk that two different keys will hash tocope with duplicate MP_CAPABLE mappings as well as duplicate DSS mappings.</t> <t>Additionally,theMP_CAPABLE exchange allowssame token. The risk of hash collisions is usually small, unless thesafe passagehost is handling many tens ofMPTCP options on SYN packetsthousands of connections. Therefore, an implementation <bcp14>SHOULD</bcp14> check its list of connection tokens to ensure that there is no collision before sending its key, and if there is, then it should generate a new key. This would, however, bedetermined. If anycostly for a server with thousands ofthese options are dropped, MPTCPconnections. The subflow handshake mechanism (<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) willgracefully fall back to regular single-path TCP,ensure that new subflows only join the correct connection, however, through the cryptographic handshake, asdocumentedwell as checking the connection tokens in<xref target="sec_fallback"/>. If at any pointboth directions, and ensuring that sequence numbers are in-window. So, in thehandshake either party thinks the MPTCP negotiation is compromised, for example byworst case, if there was amiddlebox corruptingtoken collision, theTCP options, or unexpected ACK numbers being present,new subflow would not succeed, but thehost MUST stop using MPTCP and no longer includeMPTCPoptions in future TCP packets. The other host will then also fall backconnection would continue to provide a regular TCPusing the fall back mechanism. Noteservice.</t> <t pn="section-3.1-12">Since key generation is implementation specific, there is no requirement thatnew subflows MUST NOTthey simply beestablished (usingrandom numbers. An implementation is free to exchange cryptographic material out of band and generate these keys from this material, in order to provide additional mechanisms by which to verify theprocess documentedidentity of the communicating entities. For example, an implementation could choose to link its MPTCP keys to those used in higher-layer TLS or SSH connections.</t> <t pn="section-3.1-13">If the server behaves in<xref target="sec_join"/>) untilaData Sequence Signal (DSS) optionstateless manner, it hasbeen successfully received across the path (as documentedto generate its own key in<xref target="sec_generalop"/>).</t> <t>Like all MPTCP options,a verifiable fashion. This verifiable way of generating theMP_CAPABLE option starts withkey can be done by using a hash of theKind4-tuple, sequence number, andLengtha local secret (similar tospecifywhat is done for theTCP-option kind and its length. Followed by thatTCP sequence number <xref target="RFC4987" format="default" sectionFormat="of" derivedContent="RFC4987"/>). It will thus be able to verify whether it is indeed theMP_CAPABLE option. The first 4 bitsoriginator of thefirst octetkey echoed back in the subsequent MP_CAPABLEoption (<xref target="tcpm_capable"/>) defineoption. As for a stateful server, theMPTCP option subtype (see <xref target="IANA"/>;tokens <bcp14>SHOULD</bcp14> be checked forMP_CAPABLE, thisuniqueness; however, if uniqueness is0x0),not met andthe remaining 4 bits of this octet specify the MPTCP version in use (for this specification, this is 1).</t> <t>The second octetthere isreserved for flags, allocated as follows: <list style="hanging"> <t hangText="A:"> The leftmost bit, labeled "A", SHOULD be set to 1no way toindicate "Checksum Required", unlessgenerate an alternative verifiable key, then thesystem administrator has decided that checksums areconnection <bcp14>MUST</bcp14> fall back to using regular TCP by notrequired (for example, ifsending an MP_CAPABLE in theenvironment is controlledSYN/ACK.</t> <t pn="section-3.1-14">The ACK carries both A's key andno middleboxes existB's key. This is the first time thatmight adjustA's key is seen on thepayload).</t> <t hangText="B:"> The second bit, labeled "B",wire, although it isan extensibility flag, and MUST be set to 0 for current implementations. Thisexpected that A willbe used for an extensibility mechanism inhave generated afuture specification, andkey locally before theimpactinitial SYN. The echoing ofthis flag willB's key allows B to operate statelessly, as described above. Therefore, A's key must bedefined at a later date. It is expected, but not mandated, thatdelivered reliably to B, and in order to do this, the transmission of thisflag wouldpacket must beused as part of an alternative security mechanism that does not require a full version upgrade ofmade reliable.</t> <t pn="section-3.1-15">If B has data to send first, then theprotocol, but does require redefining some elementsreliable delivery of thehandshake. If receiving a message with the 'B' flag set to 1, and thisACK + MP_CAPABLE isnot understood, thenensured by theMP_CAPABLE inreceipt of thisSYN MUST be silently ignored, which triggersdata with an MPTCP Data Sequence Signal (DSS) option (<xref target="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>) containing afallback to regular TCP;DATA_ACK for thesenderMP_CAPABLE (which isexpected to retry with a format compatible with this legacy specification. Note thatthelengthfirst octet of theMP_CAPABLE option, anddata sequence space). If, however, A wishes to send data first, it has two options to ensure themeaningsreliable delivery ofbits "D" through "H", may be altered by setting B=1.</t> <t hangText="C:"> The third bit, labeled "C", is set to "1"the ACK + MP_CAPABLE. If it immediately has data toindicate thatsend, then thesender of thisfirst ACK (with data) would also contain an MP_CAPABLE optionwill not acceptwith additionalMPTCP subflowsdata parameters (the Data-Level Length and optional Checksum as shown in <xref target="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>). If A does not immediately have data to send, it <bcp14>MUST</bcp14> include thesource address and port, and thereforeMP_CAPABLE on thereceiver MUST NOT tryfirst ACK, but without the additional data parameters. When A does have data toopen anysend, it must repeat the sending of the MP_CAPABLE option from the first ACK, with additionalsubflows towards this address and port.data parameters. This MP_CAPABLE option isan efficiency improvement for situations where the sender knows a restriction isused inplace, for example ifplace of thesender is behind a strict NAT, or operating behind a legacy Layer 4 load balancer.</t> <t hangText="D through H:"> The remaining bits, labeled "D" through "H", are used for crypto algorithm negotiation. In this specification onlyDSS and simply specifies (1) the Data-Level Length of therightmost bit, labeled "H", is assigned. Bit "H" indicatespayload and (2) the checksum (if the use ofHMAC-SHA256 (as defined in <xref target="sec_join"/>). An implementation that only supports this method MUST set bit "H" to 1, and bits "D" through "G"checksums is negotiated). This is the minimal data required to0.</t> </list> A crypto algorithm MUST be specified. If flag bits D through H are all 0,establish an MPTCP connection -- it allows validation of theMP_CAPABLE option MUST be treated as invalidpayload, andignored (that is,given that itmust be treated as a regular TCP handshake).</t> <t>The selection of the authentication algorithm also impacts the algorithm used to generateis thetoken andfirst data, the Initial Data Sequence Number(IDSN). In this specification, with only the SHA-256 algorithm (bit "H") specified and selected, the token MUST be a truncated (most significant 32 bits) SHA-256 hash (<xref target="RFC6234"/>) of the key. A different, 64-bit truncation (the least significant 64 bits) of the SHA-256 hash of(IDSN) is also known (as it is generated from thekey MUST be usedkey, as described below). Conveying theIDSN. Note that the key MUST be hashed in network byte order. Also note that the "least significant" bits MUST be the rightmost bits of the SHA-256 digest, as per <xref target="RFC6234"/>. Future specifications ofkeys on theuse offirst data packet allows thecrypto bits may chooseTCP reliability mechanisms tospecify different algorithms for token and IDSN generation.</t> <t>Bothensure that thecrypto and checksum bits negotiate capabilities in similar ways. Forpacket is successfully delivered. The receiver will acknowledge this data at theChecksum Required bit (labeled "A"),connection level with a Data ACK, as ifeither host requires the use of checksums, checksums MUST be used. In other words, the only way for checksums not toa DSS option has been received.</t> <t pn="section-3.1-16">There could beused is ifsituations where bothhosts in their SYNs set A=0. This decision is confirmed by the setting of the "A" bit in the third packet (the ACK) ofA and B attempt to transmit initial data at thehandshake.same time. For example, ifthe initiator sets A=0 in the SYN,A did not initially have data to send butthe responder sets A=1 in the SYN/ACK, checksums MUST be used in both directions, and the initiator will set A=1 in the ACK. The decision whetherthen needed touse checksums will be stored by an implementation in a per-connection binary state variable. If A=1 istransmit data before it had receivedby a host that does not want to use checksums,anything from B, itMUST fall back to regular TCP by ignoring thewould use an MP_CAPABLE optionas ifwith data parameters (since itwas invalid.</t> <t>For crypto negotiation,would not know if theresponder hasMP_CAPABLE on thechoice. The initiator createsACK was received). In such aproposal settingsituation, B may also have transmitted data with abit for each algorithmDSS option, but itsupports to 1 (in this version of the specification, there is only one proposal, so bit "H" willhad not yet been received at A. Therefore, B has received data with an MP_CAPABLE mapping after it has sent data with a DSS option. To ensure that these situations can bealways sethandled, it follows that the data parameters in an MP_CAPABLE are semantically equivalent to1). The responder respondsthose in a DSS option and can be used interchangeably. Similar situations could occur when the MP_CAPABLE withonly 1 bit set -- thisdata is lost and retransmitted. Furthermore, in thechosen algorithm. The rationale for this behavior is thatcase of TCP segmentation offloading, theresponder will typically be a serverMP_CAPABLE withpotentially many thousands of connections, so itdata parameters maywish to choose an algorithmbe duplicated across multiple packets, and implementations must also be able to cope withminimal computational complexity, depending onduplicate MP_CAPABLE mappings as well as duplicate DSS mappings.</t> <t pn="section-3.1-17">Additionally, theload. If a responder does not support (or does not wantMP_CAPABLE exchange allows the safe passage of MPTCP options on SYN packets tosupport)be determined. If any ofthe initiator's proposals, it MUST respond without an MP_CAPABLE option, thus forcing a fallbackthese options are dropped, MPTCP will gracefully fall back to regularTCP.</t> <t>The MP_CAPABLE option is only usedsingle-path TCP, as documented in <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>. If at any point in thefirst subflow ofhandshake either party thinks the MPTCP negotiation is compromised -- for example, by aconnection,middlebox corrupting the TCP options or by unexpected ACK numbers being present -- the host <bcp14>MUST</bcp14> stop using MPTCP and no longer include MPTCP options inorderfuture TCP packets. The other host will then also fall back toidentifyregular TCP using theconnection; all followingfallback mechanism. Note that new subflowswill use<bcp14>MUST NOT</bcp14> be established (using the"Join"process documented in <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) until a DSS option(seehas been successfully received across the path (as documented in <xreftarget="sec_join"/>) to jointarget="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>).</t> <t pn="section-3.1-18">Like all MPTCP options, theexisting connection.</t> <t>If a SYN contains anMP_CAPABLE optionbutstarts with theSYN/ACK does not, it is assumed that sender ofKind and Length to specify theSYN/ACKTCP option's kind and length. This information isnot multipath capable; thus,followed by theMPTCP session MUST operate as a regular, single-path TCP. If a SYN does not contain aMP_CAPABLEoption,option. The first 4 bits of theSYN/ACK MUST NOT contain onefirst octet inresponse. If the third packet (the ACK) does not containthe MP_CAPABLEoption, then the session MUST fall back to operating as a regular, single-path TCP. This is to maintain compatibility with middleboxes onoption (<xref target="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>) define thepath that drop some or all TCP options. Note that an implementation MAY choose to attempt sendingMPTCPoptions more than one time before making this decision to operate as regular TCPOption Subtype (see <xreftarget="heuristics"/>).</t> <t>If the SYN packets are unacknowledged, it is up to local policy to decide how to respond. Ittarget="IANA" format="default" sectionFormat="of" derivedContent="Section 7"/>; for MP_CAPABLE, this value isexpected that a sender will eventually fall back to single-path TCP (i.e., without0x0), and theMP_CAPABLE option)remaining 4 bits of this octet specify the MPTCP version inorderuse (for this specification, this value is 1).</t> <t pn="section-3.1-19">The second octet is reserved for flags, allocated as follows: </t> <dl newline="false" spacing="normal" indent="14" pn="section-3.1-20"> <dt pn="section-3.1-20.1">A:</dt> <dd pn="section-3.1-20.2"> The leftmost bit, labeled "A", <bcp14>SHOULD</bcp14> be set towork around middleboxes that may drop packets with unknown options; however,1 to indicate "Checksum required", unless thenumber of multipath-capable attemptssystem administrator has decided that checksums aremade first will be up to local policy. Itnot required (for example, if the environment ispossible that MPTCPcontrolled andnon-MPTCP SYNs could get reordered in the network. Therefore,no middleboxes exist that might adjust thefinal statepayload).</dd> <dt pn="section-3.1-20.3">B:</dt> <dd pn="section-3.1-20.4"> The second bit, labeled "B", isinferred from the presence or absence of the MP_CAPABLE optionan extensibility flag. It <bcp14>MUST</bcp14> be set to 0 for current implementations. This flag will be used for an extensibility mechanism in a future specification, and thethird packetimpact ofthe TCP handshake. Ifthisoptionflag will be defined at a later date. It is expected, but notpresent, the connection SHOULD fall back to regular TCP,mandated, that this flag would be used asdocumented in <xref target="sec_fallback"/>.</t> <t>The initial data sequence number onpart of anMPTCP connection is generated fromalternative security mechanism that does not require a full version upgrade of thekey. The algorithm for IDSN generation is also determined fromprotocol but does require redefining some elements of thenegotiated authentication algorithm. In this specification,handshake. If receiving a message withonlytheSHA-256 algorithm specified"B" flag set to 1 andselected,this is not understood, then theIDSN of a host MUSTMP_CAPABLE in this SYN <bcp14>MUST</bcp14> bethe least significant 64 bits of the SHA-256 hash of its key, i.e., IDSN-A = Hash(Key-A) and IDSN-B = Hash(Key-B). This deterministic generation of the IDSN allowssilently ignored, which triggers areceiverfallback toensureregular TCP; the sender is expected to retry with a format compatible with this legacy specification. Note thatthere are no gaps in sequence space atthestartlength of theconnection. The SYN withMP_CAPABLEoccupiesoption, and thefirst octetmeanings ofdata sequence space, although this does not need tobits "D" through "H", may beacknowledged at the connection level until the first dataaltered by setting B=1.</dd> <dt pn="section-3.1-20.5">C:</dt> <dd pn="section-3.1-20.6"> The third bit, labeled "C", issent (see <xref target="sec_generalop"/>).</t> </section> <section title="Starting a New Subflow" anchor="sec_join"> <t>Once an MPTCP connection has begun withset to 1 to indicate that theMP_CAPABLE exchange, furthersender of this option will not accept additional MPTCP subflowscan be addedto theconnection. Hosts have knowledge of their own address(es),source address andcan become aware ofport, and therefore theother host's addresses through signaling exchanges as described in <xref target="sec_pm"/>. Usingreceiver <bcp14>MUST NOT</bcp14> try to open any additional subflows toward thisknowledge, a host can initiate a new subflow over a currently unused pair of addresses. It is permitted for either hostaddress and port. This improves efficiency ina connection to initiatesituations where thecreation ofsender knows anew subflow, but itrestriction isexpected that this will normally be the original connection initiator (see <xref target="heuristics"/>in place -- forheuristics).</t> <t>A new subflowexample, if the sender isstarted asbehind anormal TCP SYN/ACK exchange.strict NAT or operating behind a legacy Layer 4 load balancer.</dd> <dt pn="section-3.1-20.7">D through H:</dt> <dd pn="section-3.1-20.8"> TheJoin Connection (MP_JOIN) MPTCP option isremaining bits, labeled "D" through "H", are usedto identifyfor crypto algorithm negotiation. In this specification, only theconnection to be joined byrightmost bit, labeled "H", is assigned. Bit "H" indicates thenew subflow. It uses keying material that was exchangeduse of HMAC-SHA256 (as defined inthe initial MP_CAPABLE handshake (<xref target="sec_init"/>), and<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>). An implementation thathandshake also negotiates theonly supports this method <bcp14>MUST</bcp14> set bit "H" to 1 and bits "D" through "G" to 0.</dd> </dl> <t pn="section-3.1-21">A crypto algorithmin use for the MP_JOIN handshake.</t> <t>This section specifies<bcp14>MUST</bcp14> be specified. If flag bits "D" through "H" are all 0, thebehaviorMP_CAPABLE option <bcp14>MUST</bcp14> be treated as invalid and ignored (that is, it must be treated as a regular TCP handshake).</t> <t pn="section-3.1-22">The selection ofMP_JOIN usingtheHMAC-SHA256 algorithm. An MP_JOIN option is present inauthentication algorithm also impacts theSYN, SYN/ACK,algorithm used to generate the token andACK ofthethree-way handshake, although in each caseIDSN. In this specification, witha different format.</t> <t>Inonly thefirst MP_JOIN onSHA-256 algorithm (bit "H") specified and selected, theSYN packet, illustrated intoken <bcp14>MUST</bcp14> be a truncated (most significant 32 bits) SHA-256 hash <xreftarget="tcpm_join"/>,target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC6234"/> of theinitiator sends a token, random number, and address ID.</t> <t>The token is used to identifykey. A different, 64-bit truncation (the least significant 64 bits) of theMPTCP connection and is a cryptographicSHA-256 hash of thereceiver's key,key <bcp14>MUST</bcp14> be used asexchanged intheinitial MP_CAPABLE handshake (<xref target="sec_init"/>). In this specification,IDSN. Note that thetokens presentedkey <bcp14>MUST</bcp14> be hashed inthis option are generated bynetwork byte order. Also note that the "least significant" bits <bcp14>MUST</bcp14> be the rightmost bits of the SHA-256 digest, as per <xreftarget="RFC6234"/> algorithm, truncated totarget="RFC6234" format="default" sectionFormat="of" derivedContent="RFC6234"/>. Future specifications of themost significant 32 bits. Theuse of the crypto bits may choose to specify different algorithms for tokenincluded inand IDSN generation.</t> <t pn="section-3.1-23">Both theMP_JOIN option iscrypto and checksum bits negotiate capabilities in similar ways. For thetoken that"Checksum required" bit (labeled "A"), if either host requires thereceiveruse of checksums, checksums <bcp14>MUST</bcp14> be used. In other words, thepacket usesonly way for checksums not toidentify this connection; i.e., Host A will send Token-B (which is generated from Key-B). Note that the hash generation algorithm canbeoverriddenused is if both hosts in their SYNs set A=0. This decision is confirmed by thechoicesetting ofcryptographic handshake algorithm, as definedthe "A" bit in<xref target="sec_init"/>.</t> <t>The MP_JOIN SYN sends not onlythetoken (which is static for a connection) but also random numbers (nonces) that are used to prevent replay attacks onthird packet (the ACK) of theauthentication method. Recommendations forhandshake. For example, if thegeneration of random numbers for this purpose are giveninitiator sets A=0 in<xref target="RFC4086"/>.</t> <t>The MP_JOIN option includes an "Address ID". This is an identifier generated bythesender ofSYN but theoption, used to identifyresponder sets A=1 in thesource address of this packet, even ifSYN/ACK, checksums <bcp14>MUST</bcp14> be used in both directions, and theIP header has been changedinitiator will set A=1 intransitthe ACK. The decision regarding whether to use checksums will be stored by an implementation in amiddlebox. The numeric value of this fieldper-connection binary state variable. If A=1 isgeneratedreceived bythe sender and must map uniquely toasource IP address for the sending host. The Address ID allows address removal (<xref target="sec_remove_addr"/>) without needinghost that does not want toknow what the source address at the receiver is, thus allowing address removal through NATs. The Address ID also allows correlation between new subflow setup attempts and address signaling (<xref target="sec_add_address"/>),use checksums, it <bcp14>MUST</bcp14> fall back toprevent setting up duplicate subflows onregular TCP by ignoring thesame path,MP_CAPABLE option as ifan MP_JOIN and ADD_ADDR are sent at the same time.</t> <t>The Address IDs ofit was invalid.</t> <t pn="section-3.1-24">For crypto negotiation, thesubflow used inresponder has theinitial SYN exchangechoice. The initiator creates a proposal setting a bit for each algorithm it supports to 1 (in this version of thefirst subflow inspecification, there is only one proposal, so bit "H" will always be set to 1). The responder responds with only 1 bit set -- this is theconnection are implicit, and havechosen algorithm. The rationale for this behavior is that thevalue zero. A host MUST store the mappings between Address IDs and addresses both for itself and the remote host. An implementationresponder willalso needtypically be a server with potentially many thousands of connections, so it may wish toknow which local and remote Address IDs are associatedchoose an algorithm withwhich established subflows, for when addresses are removed from a local or remote host.</t> <t>The MP_JOIN optionminimal computational complexity, depending onpackets withtheSYN flag set also includes 4 bitsload. If a responder does not support (or does not want to support) any offlags, 3the initiator's proposals, it <bcp14>MUST</bcp14> respond without an MP_CAPABLE option, thus forcing a fallback to regular TCP.</t> <t pn="section-3.1-25">The MP_CAPABLE option is only used in the first subflow ofwhich are currently reserved and MUST be seta connection, in order tozero byidentify thesender. The final bit, labeled "B", indicates whetherconnection; all subsequent subflows will use thesender of thisMP_JOIN optionwishes this subflow(see <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) tobe used asjoin the existing connection.</t> <t pn="section-3.1-26">If abackup path (B=1) inSYN contains an MP_CAPABLE option but theevent of failure of other paths, or whether it wantsSYN/ACK does not, itto be used as part of the connection immediately. By setting B=1,is assumed that the sender of theoptionSYN/ACK isrequestingnot multipath capable; thus, theother host to only send data on this subflow if there are no available subflows where B=0. Subflow policy is discussedMPTCP session <bcp14>MUST</bcp14> operate as a regular, single-path TCP session. If a SYN does not contain an MP_CAPABLE option, the SYN/ACK <bcp14>MUST NOT</bcp14> contain one inmore detail in <xref target="sec_policy"/>.</t> <?rfc needLines='10'?> <figure align="center" anchor="tcpm_join" title="Join Connection (MP_JOIN) Option (for Initial SYN)"> <artwork align="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----+-+---------------+ | Kind | Length = 12 |Subtype|(rsv)|B| Address ID | +---------------+---------------+-------+-----+-+---------------+ | Receiver's Token (32 bits) | +---------------------------------------------------------------+ | Sender's Random Number (32 bits) | +---------------------------------------------------------------+ ]]></artwork> </figure> <t>When receivingresponse. If the third packet (the ACK) does not contain the MP_CAPABLE option, then the session <bcp14>MUST</bcp14> fall back to operating as aSYNregular, single-path TCP session. This is done to maintain compatibility withan MP_JOIN optionmiddleboxes on the path that drop some or all TCP options. Note thatcontains a valid token foranexistingimplementation <bcp14>MAY</bcp14> choose to attempt sending MPTCPconnection, the recipient SHOULD respond with a SYN/ACK also containing an MP_JOIN option containing a random number and a truncated (leftmost 64 bits) Hash-based Message Authentication Code (HMAC). This version of the option is shown inoptions more than one time before making this decision to operate as regular TCP (see <xreftarget="tcpm_join2"/>. Iftarget="heuristics" format="default" sectionFormat="of" derivedContent="Section 3.9"/>).</t> <t pn="section-3.1-27">If thetokenSYN packets are unacknowledged, it isunknown, or the host wantsup torefuse subflow establishment (for example, duelocal policy to decide how to respond. It is expected that alimit on the number of subflows it will permit), the receiversender willsendeventually fall backa reset (RST) signal, analogoustoan unknown portsingle-path TCP (i.e., without the MP_CAPABLE option) inTCP, containing a MP_TCPRST option (<xref target="sec_reset"/>) with a "MPTCP specific error" reason code. Although calculating an HMAC requires cryptographic operations, it is believedorder to work around middleboxes that may drop packets with unknown options; however, the32-bit tokennumber of multipath-capable attempts that are made first will be up to local policy. It is possible that MPTCP and non-MPTCP SYNs could get reordered in theMP_JOIN SYN gives sufficient protection against blindnetwork. Therefore, the final stateexhaustion attacks; therefore, thereisno need to provide mechanisms to allow a responder to operate statelessly atinferred from theMP_JOIN stage.</t> <t>An HMAC is sent by both hosts -- bypresence or absence of theinitiator (Host A)MP_CAPABLE option in the third packet(the ACK) and by the responder (Host B) in the second packet (the SYN/ACK). Doingof theHMAC exchange atTCP handshake. If thisstage allows both hosts to have first exchanged random data (in the first two SYN packets) thatoption isused asnot present, the"message". This specification defines that HMACconnection <bcp14>SHOULD</bcp14> fall back to regular TCP, asdefineddocumented in <xreftarget="RFC2104"/>target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>.</t> <t pn="section-3.1-28">The IDSN on an MPTCP connection isused, alonggenerated from the key. The algorithm for IDSN generation is also determined from the negotiated authentication algorithm. In this specification, with only the SHA-256hashalgorithm<xref target="RFC6234"/>,specified andthatselected, theoutput is truncated toIDSN of a host <bcp14>MUST</bcp14> be theleftmost 160 bits (20 octets). Due to option space limitations,least significant 64 bits of theHMAC included inSHA-256 hash of its key, i.e., IDSN-A = Hash(Key-A) and IDSN-B = Hash(Key-B). This deterministic generation of theSYN/ACK is truncatedIDSN allows a receiver tothe leftmost 64 bits, but this is acceptable since random numbersensure that there areused; thus, an attacker only has one chance to correctly guess the HMAC that matchesno gaps in sequence space at therandom number previously sent bystart of thepeer (ifconnection. The SYN with MP_CAPABLE occupies theHMAC is incorrect,first octet of data sequence space, although this does not need to be acknowledged at theTCPconnection level until the first data isclosed, sosent (see <xref target="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>).</t> </section> <section anchor="sec_join" numbered="true" toc="include" removeInRFC="false" pn="section-3.2"> <name slugifiedName="name-starting-a-new-subflow">Starting anew MP_JOIN negotiationNew Subflow</name> <t pn="section-3.2-1">Once an MPTCP connection has begun witha new random number is required).</t> <t>The initiator's authentication information is sent in its first ACK (the third packetthe MP_CAPABLE exchange, further subflows can be added to the connection. Hosts have knowledge of their own address(es) and can become aware of thehandshake),other host's addresses through signaling exchanges asshowndescribed in <xreftarget="tcpm_join3"/>. This data needstarget="sec_pm" format="default" sectionFormat="of" derivedContent="Section 3.4"/>. Using this knowledge, a host can initiate a new subflow over a currently unused pair of addresses. It is permissible for either host in a connection tobe sent reliably, sinceinitiate the creation of a new subflow, but it isthe only timeexpected that thisHMACwill normally be the original connection initiator (see <xref target="heuristics" format="default" sectionFormat="of" derivedContent="Section 3.9"/> for heuristics).</t> <t pn="section-3.2-2">A new subflow issent; therefore, receipt of this packet MUST triggerstarted as aregularnormal TCPACK in response, and the packet MUST be retransmitted if this ACKSYN/ACK exchange. The Join Connection (MP_JOIN) MPTCP option isnot received. In other words, sendingused to identify theACK/MP_JOIN packet placesconnection to be joined by thesubflownew subflow. It uses keying material that was exchanged in thePRE_ESTABLISHED state,initial MP_CAPABLE handshake (<xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>), andit moves tothat handshake also negotiates theESTABLISHED state only on receipt of an ACK from the receiver. It is not permitted to send data while in the PRE_ESTABLISHED state. The reserved bitscrypto algorithm inthis option MUST be set to zero by the sender.</t> <t>The keyuse for theHMAC algorithm, inMP_JOIN handshake.</t> <t pn="section-3.2-3">This section specifies thecasebehavior of MP_JOIN using themessage transmitted by Host A, will be Key-A followed by Key-B, andHMAC-SHA256 algorithm. An MP_JOIN option is present in thecaseSYN, SYN/ACK, and ACK ofHost B, Key-B followed by Key-A. These arethekeys that were exchangedthree-way handshake, although in each case with a different format.</t> <t pn="section-3.2-4">In theoriginal MP_CAPABLE handshake. The "message" forfirst MP_JOIN on theHMAC algorithmSYN packet, illustrated ineach case is<xref target="tcpm_join" format="default" sectionFormat="of" derivedContent="Figure 5"/>, theconcatenations ofinitiator sends a token, randomnumber for each host (denoted by R): for Host A, R-A followed by R-B;number, andfor Host B, R-B followed by R-A.</t> <?rfc needLines='10'?>Address ID.</t> <figurealign="center" anchor="tcpm_join2" title="Joinanchor="tcpm_join" align="left" suppress-title="false" pn="figure-5"> <name slugifiedName="name-join-connection-mp_join-opt">Join Connection (MP_JOIN) Option (forResponding SYN/ACK)">Initial SYN)</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-3.2-5.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----+-+---------------+ | Kind | Length =1612 |Subtype|(rsv)|B| Address ID | +---------------+---------------+-------+-----+-+---------------+ || | Sender's Truncated HMAC (64Receiver's Token (32 bits) || |+---------------------------------------------------------------+ | Sender's Random Number (32 bits) | +---------------------------------------------------------------+]]></artwork> </figure> <?rfc needLines='12'?> <figure align="center" anchor="tcpm_join3" title="Join Connection (MP_JOIN) Option (for Third ACK)"> <artwork align="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----------------------+ | Kind | Length = 24 |Subtype| (reserved) | +---------------+---------------+-------+-----------------------+ | | | | | Sender's Truncated HMAC (160 bits) | | | | | +---------------------------------------------------------------+ ]]></artwork></artwork> </figure><t>These various MPTCP options fit together<t pn="section-3.2-6">The token is used toenable authenticated subflow setup as illustrated in <xref target="fig_tokens"/>.</t> <?rfc needLines='24'?> <figure align="center" anchor="fig_tokens" title="Example Use of MPTCP Authentication"> <artwork align="left"><![CDATA[ Host A Host B ------------------------ ---------- Address A1 Address A2 Address B1 ---------- ---------- ---------- | | | | | SYN + MP_CAPABLE | |--------------------------------------------->| |<---------------------------------------------| | SYN/ACK + MP_CAPABLE(Key-B) | | | | | ACK + MP_CAPABLE(Key-A, Key-B) | |--------------------------------------------->| | | | | | SYN + MP_JOIN(Token-B, R-A) | | |------------------------------->| | |<-------------------------------| | | SYN/ACK + MP_JOIN(HMAC-B, R-B) | | | | | | ACK + MP_JOIN(HMAC-A) | | |------------------------------->| | |<-------------------------------| | | ACK | HMAC-A = HMAC(Key=(Key-A+Key-B), Msg=(R-A+R-B)) HMAC-B = HMAC(Key=(Key-B+Key-A), Msg=(R-B+R-A)) ]]></artwork> </figure> <t>Ifidentify thetoken received at Host BMPTCP connection and isunknown or local policy prohibits the acceptancea cryptographic hash of thenew subflow,receiver's key, as exchanged in therecipient MUST respond with a TCP RST forinitial MP_CAPABLE handshake (<xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>). In this specification, thesubflow. If appropriate, a MP_TCPRSTtokens presented in this optionwith a "Administratively prohibited" reason code (<xref target="sec_reset"/>) should be included.</t> <t>Ifare generated by the SHA-256 algorithm <xref target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC6234"/>, truncated to the most significant 32 bits. The token included in the MP_JOIN option isaccepted at Host B, buttheHMAC returnedtoken that the receiver of the packet uses to identify this connection; i.e., Host Adoes not matchwill send Token-B (which is generated from Key-B). Note that theone expected, Host A MUST closehash generation algorithm can be overridden by thesubflow with a TCP RST. In this, and all following caseschoice ofsending a RSTcryptographic handshake algorithm, as defined inthis section, the sender SHOULD send a MP_TCPRST option (<xref target="sec_reset"/>) on this RST packet with<xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>.</t> <t pn="section-3.2-7">The MP_JOIN SYN sends not only thereason codetoken (which is static for a"MPTCP specific error".</t> <t>If Host B does not receiveconnection) but also random numbers (nonces) that are used to prevent replay attacks on theexpected HMAC, orauthentication method. Recommendations for the generation of random numbers for this purpose are given in <xref target="RFC4086" format="default" sectionFormat="of" derivedContent="RFC4086"/>.</t> <t pn="section-3.2-8">The MP_JOIN option includes an "Address ID". This ismissing from the ACK, it MUST close the subflow with a TCP RST.</t> <t>Ifan identifier generated by theHMACs are verified as correct, then both hosts have verified each other as beingsender of thesame peers as existed atoption, used to identify thestartsource address of this packet, even if theconnection, and they have agreedIP header has been changed in transit by a middlebox. The numeric value ofwhich connectionthissubflow will becomefield is generated by the sender and must map uniquely to apart.</t> <t>Ifsource IP address for theSYN/ACK as receivedsending host. The Address ID allows address removal (<xref target="sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/>) without needing to know what the source address atHost A does not have an MP_JOIN option, Host A MUST closethe receiver is, thus allowing address removal through NATs. The Address ID also allows correlation between new subflowwith a TCP RST.</t> <t>This covers all cases ofsetup attempts and address signaling (<xref target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>), to prevent setting up duplicate subflows on theloss of an MP_JOIN. In more detail,same path, if an MP_JOINis stripped fromand ADD_ADDR are sent at the same time.</t> <t pn="section-3.2-9">The Address IDs of the subflow used in the initial SYNonexchange of thepath from A to B,first subflow in the connection are implicit andHost B does nothavea listener ontherelevant port, it will respond with a RST invalue zero. A host <bcp14>MUST</bcp14> store thenormal way. If in responsemappings between Address IDs and addresses both for itself and the remote host. An implementation will also need toa SYNknow which local and remote Address IDs are associated withan MP_JOIN option,which established subflows, for when addresses are removed from aSYN/ACK is received without thelocal or remote host.</t> <t pn="section-3.2-10">The MP_JOIN option(either since it was stripped on the return path, or it was strippedonthe outgoing path but Host B responded as if it were a new regular TCP session), then the subflow is unusable and Host A MUST close itpackets witha RST.</t> <t>Note that additional subflows can be created between any pairthe SYN flag set also includes 4 bits ofports (but see <xref target="heuristics"/> for heuristics); no explicit application-level accept calls or bind callsflags, 3 of which arerequiredcurrently reserved and <bcp14>MUST</bcp14> be set toopen additional subflows. To associate a new subflow with an existing connection,0 by thetoken supplied insender. The final bit, labeled "B", indicates whether thesubflow's SYN exchange issender of this option (1) wishes this subflow to be usedfor demultiplexing. This then bindsas a backup path (B=1) in the5-tupleevent of failure of other paths or (2) wants theTCPsubflow to be used as part of thelocal tokenconnection immediately. By setting B=1, the sender of theconnection. A consequenceoption is requesting thatit is possible to allow any port pairs to be used for a connection. </t> <t>Demultiplexing subflow SYNs MUST be done usingthetoken;other host only send data on thisis unlike traditional TCP, where the destination port is used for demultiplexing SYN packets. Once asubflow if there are no available subflows where B=0. Subflow policy isset up, demultiplexing packets is done using the 5-tuple, asdiscussed intraditional TCP. The 5-tuples will be mapped to the local connection identifier (token). Notemore detail in <xref target="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>.</t> <t pn="section-3.2-11">When receiving a SYN with an MP_JOIN option thatHost A will know its local token for the subflow even though it is not sent on the wire -- only the responder'scontains a valid tokenis sent.</t> </section> <section title="General MPTCP Operation" anchor="sec_generalop"> <t>This section discusses operation of MPTCPfordata transfer. At a high level,an existing MPTCPimplementation will take one input data stream from an application, and split it into one or more subflows, with sufficient control information to allow it to be reassembled and delivered reliably and in order toconnection, the recipientapplication. The following subsections define this behavior in detail.</t> <t>The data sequence mapping<bcp14>SHOULD</bcp14> respond with a SYN/ACK also containing an MP_JOIN option containing a random number and a truncated (leftmost 64 bits) HMAC. This version of theData ACK are signaled in the Data Sequence Signal (DSS)option(<xref target="tcpm_dsn"/>). Either or both can be signaledis shown inone DSS, depending on the flags set. The data sequence mapping defines how<xref target="tcpm_join2" format="default" sectionFormat="of" derivedContent="Figure 6"/>. If thesequence space ontoken is unknown or the host wants to refuse subflowmapsestablishment (for example, due to a limit on theconnection level, and the Data ACK acknowledges receiptnumber ofdata at the connection level. These functions are described in more detail insubflows it will permit), thefollowing two subsections.</t> <?rfc needLines='18'?> <figure align="center" anchor="tcpm_dsn" title="Data Sequence Signal (DSS) Option">receiver will send back a reset (RST) signal, analogous to an unknown port in TCP, containing an MP_TCPRST option (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) with an "MPTCP specific error" reason code. Although calculating an HMAC requires cryptographic operations, it is believed that the 32-bit token in the MP_JOIN SYN gives sufficient protection against blind state exhaustion attacks; therefore, there is no need to provide mechanisms to allow a responder to operate statelessly at the MP_JOIN stage.</t> <figure anchor="tcpm_join2" align="left" suppress-title="false" pn="figure-6"> <name slugifiedName="name-join-connection-mp_join-opti">Join Connection (MP_JOIN) Option (for Responding SYN/ACK)</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-3.2-12.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1+---------------+---------------+-------+----------------------++---------------+---------------+-------+-----+-+---------------+ | Kind | Length|Subtype| (reserved) |F|m|M|a|A| +---------------+---------------+-------+----------------------+= 16 |Subtype|(rsv)|B| Address ID |Data ACK (4 or 8 octets, depending on flags)+---------------+---------------+-------+-----+-+---------------+ |+--------------------------------------------------------------+|Data sequence number (4 or 8 octets, depending on flags)|+--------------------------------------------------------------+Sender's Truncated HMAC (64 bits) |Subflow Sequence Number (4 octets)|+-------------------------------+------------------------------+|Data-Level Length (2 octets)+---------------------------------------------------------------+ |Checksum (2 octets)Sender's Random Number (32 bits) |+-------------------------------+------------------------------+ ]]></artwork>+---------------------------------------------------------------+ </artwork> </figure><t>The flags, when set, define<t pn="section-3.2-13">An HMAC is sent by both hosts -- by thecontents ofinitiator (Host A) in the third packet (the ACK) and by the responder (Host B) in the second packet (the SYN/ACK). Doing the HMAC exchange at thisoption, as follows: <list style="symbols"> <t>A = Data ACK present</t> <t>a = Data ACKstage allows both hosts to have first exchanged random data (in the first two SYN packets) that is8 octets (if not set, Data ACKused as the "message". This specification defines that HMAC as defined in <xref target="RFC2104" format="default" sectionFormat="of" derivedContent="RFC2104"/> is4 octets)</t> <t>M = Data Sequence Number (DSN), Subflow Sequence Number (SSN), Data-Level Length,used, along with the SHA-256 hash algorithm <xref target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC6234"/>, andChecksum (if negotiated) present</t> <t>m = Data sequence number is 8 octets (if not set, DSN is 4 octets)</t> </list> The flags 'a' and 'm' only have meaning if the corresponding 'A' or 'M' flags are set; otherwise, they will be ignored. The maximum length of this option, with all flags set, is 28 octets.</t> <t>The 'F' flag indicates "Data FIN". If present, this meansthatthis mapping covers the final data fromthesender. Thisoutput is truncated to theconnection-level equivalentleftmost 160 bits (20 octets). Due to option space limitations, theFIN flagHMAC included insingle-path TCP. A connection is not closed unless there has been a Data FIN exchange, a MP_FASTCLOSE (<xref target="sec_fastclose"/>) message, or an implementation-specific, connection-level send timeout. The purpose oftheData FIN andSYN/ACK is truncated to theinteractions betweenleftmost 64 bits, but thisflag, the subflow-level FIN flag, and the data sequence mappingis acceptable, since random numbers aredescribed in <xref target="sec_close"/>. The remaining reserved bits MUST be set to zero byused; thus, animplementation of this specification.</t> <t>Note that the checksum isattacker onlypresent in this option if the use of MPTCP checksumminghasbeen negotiated atone chance to correctly guess theMP_CAPABLE handshake (see <xref target="sec_init"/>). The presence ofHMAC that matches thechecksum can be inferred fromrandom number previously sent by thelength ofpeer (if theoption. If a checksumHMAC ispresent, but its use had not been negotiated in the MP_CAPABLE handshake, the receiver MUST closeincorrect, thesubflow withTCP connection is closed, so aRST as it not behaving as negotiated. Ifnew MP_JOIN negotiation with achecksumnew random number isnot present whenrequired).</t> <t pn="section-3.2-14">The initiator's authentication information is sent in itsuse has been negotiated, the receiver MUST closefirst ACK (the third packet of thesubflow with a RSThandshake), as shown in <xref target="tcpm_join3" format="default" sectionFormat="of" derivedContent="Figure 7"/>. This data needs to be sent reliably, since it isconsidered broken. In both cases,the only time thisRST SHOULD be accompanied withHMAC is sent; therefore, receipt of this packet <bcp14>MUST</bcp14> trigger aMP_TCPRST option (<xref target="sec_reset"/>) withregular TCP ACK in response, and thereason code for a "MPTCP specific error".</t> <section title="Data Sequence Mapping" anchor="sec_dsn"> <t>The data stream as a whole canpacket <bcp14>MUST</bcp14> bereassembled through the use of the data sequence mapping components of the DSS option (<xref target="tcpm_dsn"/>), which defineretransmitted if this ACK is not received. In other words, sending themapping fromACK/MP_JOIN packet places the subflowsequence number to the data sequence number. This is used byin thereceiver to ensure in-order deliveryPRE_ESTABLISHED state, and it moves to theapplication layer. Meanwhile, the subflow-level sequence numbers (i.e., the regular sequence numbers inESTABLISHED state only on receipt of an ACK from theTCP header) have subflow-only relevance.receiver. It isexpected (butnotmandated) that SACK <xref target='RFC2018'/> is used at the subflow level to improve efficiency.</t> <t>The data sequence mapping specifies a mapping from subflow sequence spacepermissible to send datasequence space. This is expressedwhile interms of starting sequence numbers for the subflow andthedata level, and a length of bytes for whichPRE_ESTABLISHED state. The reserved bits in thismapping is valid. This explicit mapping for a range of data was chosen rather than per-packet signalingoption <bcp14>MUST</bcp14> be set toassist with compatibility with situations where TCP/IP segmentation or coalescing is undertaken separately from the stack that is generating0 by thedata flow (e.g., throughsender.</t> <figure anchor="tcpm_join3" align="left" suppress-title="false" pn="figure-7"> <name slugifiedName="name-join-connection-mp_join-optio">Join Connection (MP_JOIN) Option (for Initiator's First ACK)</name> <artwork align="left" name="" type="" alt="" pn="section-3.2-15.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----------------------+ | Kind | Length = 24 |Subtype| (reserved) | +---------------+---------------+-------+-----------------------+ | | | | | Sender's Truncated HMAC (160 bits) | | | | | +---------------------------------------------------------------+ </artwork> </figure> <t pn="section-3.2-16">The key for theuse of TCP segmentation offloading on network interface cards, or by middleboxes such as performance enhancing proxies). It also allows a single mapping to cover many packets, which may be useful in bulk transfer situations.</t> <t>A mapping is fixed,HMAC algorithm, inthat the subflow sequence number is bound to the data sequence number afterthemapping has been processed. A sender MUST NOT change this mapping after it has been declared; however,case of thesame data sequence number can be mapped tomessage transmitted bydifferent subflows for retransmission purposes (see <xref target="sec_retransmit"/>). This would also permit the same data toHost A, will besent simultaneously on multiple subflows for resilience or efficiency purposes, especiallyKey-A followed by Key-B; and in the case oflossy links. Although the detailed specification of such operation is outside the scope of this document, an implementation SHOULD treatHost B, Key-B followed by Key-A. These are thefirst datakeys thatis received at a subflowwere exchanged in the original MP_CAPABLE handshake. The "message" for thedata sequence space as that which should be delivered toHMAC algorithm in each case is theapplication, and any later dataconcatenations of random numbers forthat sequence space SHOULD be ignored.</t> <t>The data sequence number is specified as an absolute value, whereas theeach host (denoted by R): for Host A, R-A followed by R-B; and for Host B, R-B followed by R-A.</t> <t pn="section-3.2-17">These various MPTCP options fit together to enable authenticated subflowsequence numbering is relative (the SYN at the startsetup as illustrated in <xref target="fig_tokens" format="default" sectionFormat="of" derivedContent="Figure 8"/>.</t> <figure anchor="fig_tokens" align="left" suppress-title="false" pn="figure-8"> <name slugifiedName="name-example-use-of-mptcp-authen">Example Use of MPTCP Authentication</name> <artwork align="left" name="" type="" alt="" pn="section-3.2-18.1"> Host A Host B ------------------------ ---------- Address A1 Address A2 Address B1 ---------- ---------- ---------- | | | | | SYN + MP_CAPABLE | |--------------------------------------------->| |<---------------------------------------------| | SYN/ACK + MP_CAPABLE(Key-B) | | | | | ACK + MP_CAPABLE(Key-A, Key-B) | |--------------------------------------------->| | | | | | SYN + MP_JOIN(Token-B, R-A) | | |------------------------------->| | |<-------------------------------| | | SYN/ACK + MP_JOIN(HMAC-B, R-B) | | | | | | ACK + MP_JOIN(HMAC-A) | | |------------------------------->| | |<-------------------------------| | | ACK | HMAC-A = HMAC(Key=(Key-A + Key-B), Msg=(R-A + R-B)) HMAC-B = HMAC(Key=(Key-B + Key-A), Msg=(R-B + R-A)) </artwork> </figure> <t pn="section-3.2-19">If thesubflow has relative subflow sequence number 0). Thistoken received at Host B isto allow middleboxes to changeunknown or local policy prohibits theinitial sequence numberacceptance ofathe new subflow,such as firewalls that undertake Initial Sequence Number (ISN) randomization.</t> <t>The data sequence mapping also containsthe recipient <bcp14>MUST</bcp14> respond with achecksum ofTCP RST for thedata that this mapping covers, if use of checksums has been negotiatedsubflow. If appropriate, an MP_TCPRST option with an "Administratively prohibited" reason code (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) should be included.</t> <t pn="section-3.2-20">If the token is accepted at Host B but theMP_CAPABLE exchange. Checksums are usedHMAC returned todetect ifHost A does not match thepayload has been adjusted in any way byone expected, Host A <bcp14>MUST</bcp14> close the subflow with anon-MPTCP-aware middlebox. IfTCP RST. In thischecksum fails, it will trigger a failureand all subsequent cases ofthe subflow, orsending afallback to regular TCP,RST asdocumenteddescribed in<xref target="sec_fallback"/>, since MPTCP can no longer reliably knowthis section, the sender <bcp14>SHOULD</bcp14> send an MP_TCPRST option (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) on this RST packet with the reason code for an "MPTCP-specific error".</t> <t pn="section-3.2-21">If Host B does not receive the expected HMAC or the MP_JOIN option is missing from the ACK, it <bcp14>MUST</bcp14> close the subflowsequence spacewith a TCP RST.</t> <t pn="section-3.2-22">If the HMACs are verified as correct, then both hosts have verified each other as being the same peers as those that existed at thereceiver to build data sequence mappings. Without checksumming enabled, corrupt data may be delivered tostart of theapplication ifconnection, and they have agreed of which connection this subflow will become amiddlebox alters segment boundaries, alters content, orpart.</t> <t pn="section-3.2-23">If the SYN/ACK as received at Host A does notdeliver all segments covered byhave an MP_JOIN option, Host A <bcp14>MUST</bcp14> close the subflow with adata sequence mapping. It is therefore RECOMMENDED to use checksumming unless it is knownTCP RST.</t> <t pn="section-3.2-24">This covers all cases of thenetwork path contains no such devices.</t> <t>The checksum algorithm usedloss of an MP_JOIN. In more detail, if an MP_JOIN is stripped from thestandard TCP checksum <xref target="RFC0793"/>, operating overSYN on thedata covered by this mapping, alongpath from A to B and Host B does not have a listener on the relevant port, it will respond with apseudo-header as shownRST in<xref target="fig_pseudo"/>.</t> <?rfc needLines='18'?> <figure align="center" anchor="fig_pseudo" title="Pseudo-Header for DSS Checksum"> <artwork align="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +--------------------------------------------------------------+ | | | Data Sequence Number (8 octets) | | | +--------------------------------------------------------------+ | Subflow Sequence Number (4 octets) | +-------------------------------+------------------------------+ | Data-Level Length (2 octets) | Zeros (2 octets) | +-------------------------------+------------------------------+ ]]></artwork> </figure> <t>Note thatthedata sequence number usednormal way. If inthe pseudo-header is always the 64-bit value, irrespective of what lengthresponse to a SYN with an MP_JOIN option a SYN/ACK isused inreceived without theDSSMP_JOIN optionitself. The standard TCP checksum algorithm has been chosen since(because itwill be calculated anyway for the TCP subflow, and if calculated first overwas either stripped on thedata before addingreturn path, or stripped on thepseudo-headers, it only needsoutgoing path leading tobe calculated once. Furthermore, since theHost B responding as if it was a new regular TCPchecksum is additive,session), then thechecksum forsubflow is unusable and Host A <bcp14>MUST</bcp14> close it with aDSN_MAPRST.</t> <t pn="section-3.2-25">Note that additional subflows can beconstructed by simply adding together the checksumscreated between any pair of ports (but see <xref target="heuristics" format="default" sectionFormat="of" derivedContent="Section 3.9"/> for heuristics); no explicit application-level accept calls or bind calls are required to open additional subflows. To associate a new subflow with an existing connection, thedata of each constituent TCP segment, and addingtoken supplied in thechecksumsubflow's SYN exchange is used for demultiplexing. This then binds theDSS pseudo-header.</t> <t>Note that checksumming relies on5-tuple of the TCP subflowcontaining contiguous data; therefore, a TCP subflow MUST NOT use the Urgent Pointertointerrupt an existing mapping. Further note, however, that if Urgent datathe local token of the connection. One consequence isreceived on a subflow,that itSHOULD be mapped to the data sequence space and deliveredis possible tothe application analogousallow any port pairs toUrgent data in regular TCP.</t> <t>To avoid possible deadlock scenarios, subflow-level processing shouldbeundertaken separately from that at connection level. Therefore, even ifused for amapping does not exist from theconnection. </t> <t pn="section-3.2-26">Demultiplexing subflowspace to the data-level space, the data SHOULD stillSYNs <bcp14>MUST</bcp14> beACKed atdone using thesubflow (if ittoken; this isin-window). This data cannot, however, be acknowledged atunlike traditional TCP, where thedata level (<xref target="sec_dataack"/>) because its data sequence numbers are unknown. Implementations MAY hold onto such unmapped datadestination port is used for demultiplexing SYN packets. Once ashort while insubflow is set up, demultiplexing packets is done using theexpectation that a mapping5-tuple, as in traditional TCP. The 5-tuples willarrive shortly. Such unmapped data cannotbecounted as being withinmapped to the local connectionlevel receive window because thisidentifier (token). Note that Host A will know its local token for the subflow even though it isrelative tonot sent on thedata sequence numbers, so ifwire -- only thereceiver runs outresponder's token is sent.</t> </section> <section anchor="sec_generalop" numbered="true" toc="include" removeInRFC="false" pn="section-3.3"> <name slugifiedName="name-mptcp-operation-and-data-tr">MPTCP Operation and Data Transfer</name> <t pn="section-3.3-1">This section discusses the operation ofmemory to hold this data, it will have to be discarded. If a mappingMPTCP forthat subflow-level sequence space does not arrive withindata transfer. At areceive window of data, that subflow SHOULD be treated as broken, closed with a RST, and any unmappedhigh level, an MPTCP implementation will take one input datasilently discarded.</t> <t>Data sequence numbers are always 64-bit quantities,stream from an application andMUSTsplit it into one or more subflows, with sufficient control information to allow it to bemaintained as suchreassembled and delivered reliably and inimplementations. If a connection is progressing at a slow rate, so protection against wrapped sequence numbers is not required, then an implementation MAY include just the lower 32 bits oforder to thedata sequence numberrecipient application. The following subsections define this behavior in detail.</t> <t pn="section-3.3-2">The Data Sequence Mapping and thedata sequence mapping and/orData ACKas an optimization, and an implementationare signaled in the DSS option (<xref target="tcpm_dsn" format="default" sectionFormat="of" derivedContent="Figure 9"/>). Either or both canmake this choice independently for each packet. An implementation MUSTbeable to receive and process both 64-bit or 32-bit sequence number values, but it is not required that an implementation is able to send both.</t> <t>An implementation MUST sendsignaled in one DSS, depending on the flags set. The Data Sequence Mapping defines how thefull 64-bit datasequencenumber if it is transmitting at a sufficiently high rate thatspace on the32-bit value could wrap withinsubflow maps to theMaximum Segment Lifetime (MSL) <xref target="RFC7323"/>. The lengthsconnection level, and the Data ACK acknowledges receipt of data at theDSNs used in these values (which may be different)connection level. These functions aredeclared with flagsdescribed in more detail in theDSS option. Implementations MUST accept a 32-bit DSN and implicitly promote it to a 64-bit quantity by incrementingfollowing two subsections.</t> <figure anchor="tcpm_dsn" align="left" suppress-title="false" pn="figure-9"> <name slugifiedName="name-data-sequence-signal-dss-op">Data Sequence Signal (DSS) Option</name> <artwork align="left" name="" type="" alt="" pn="section-3.3-3.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+----------------------+ | Kind | Length |Subtype| (reserved) |F|m|M|a|A| +---------------+---------------+-------+----------------------+ | Data ACK (4 or 8 octets, depending on flags) | +--------------------------------------------------------------+ | Data Sequence Number (4 or 8 octets, depending on flags) | +--------------------------------------------------------------+ | Subflow Sequence Number (4 octets) | +-------------------------------+------------------------------+ | Data-Level Length (2 octets) | Checksum (2 octets) | +-------------------------------+------------------------------+ </artwork> </figure> <t pn="section-3.3-4">The flags, when set, define theupper 32 bitscontents ofsequence number each timethis option, as follows: </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.3-5"> <li pn="section-3.3-5.1">A = Data ACK present</li> <li pn="section-3.3-5.2">a = Data ACK is 8 octets (if not set, Data ACK is 4 octets)</li> <li pn="section-3.3-5.3">M = Data Sequence Number (DSN), Subflow Sequence Number (SSN), Data-Level Length, and Checksum (if negotiated) present</li> <li pn="section-3.3-5.4">m = Data Sequence Number is 8 octets (if not set, DSN is 4 octets)</li> </ul> <t pn="section-3.3-6"> The flags "a" and "m" only have meaning if thelower 32 bits wrap. A sanity check MUSTcorresponding "A" or "M" flags are set; otherwise, they will beimplemented to ensureignored. The maximum length of this option, with all flags set, is 28 octets.</t> <t pn="section-3.3-7">The "F" flag indicates "Data FIN". If present, this means thata wrap occurs at an expected time (e.g.,this mapping covers thesequence number jumpsfinal data froma very high number to a very low number) and is not triggered by out-of-order packets.</t> <t>As withthestandard TCP sequence number,sender. This is thedata sequence number shouldconnection-level equivalent of the FIN flag in single-path TCP. A connection is notstart at zero, but atclosed unless there has been arandom value to make blind session hijacking harder. This specification requires setting the initial data sequence number (IDSN)Data FIN exchange, an MP_FASTCLOSE (<xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5"/>) message, or an implementation-specific connection-level send timeout. The purpose ofeach host totheleast significant 64 bits ofData FIN and theSHA-256 hash ofinteractions between this flag, thehost's key, assubflow-level FIN flag, and the Data Sequence Mapping are described in <xreftarget="sec_init"/>. This is required also in order for the receivertarget="sec_close" format="default" sectionFormat="of" derivedContent="Section 3.3.3"/>. The remaining reserved bits <bcp14>MUST</bcp14> be set toknow what the expected IDSN is, and thus determine if any initial connection-level packets are missing;0 by an implementation of this specification.</t> <t pn="section-3.3-8">Note that the checksum isparticularly relevant if two subflows start transmitting simultaneously.</t> <t>A data sequence mapping does not need to be includedonly present inevery MPTCP packet, as long asthis option if thesubflow sequence space in that packet is covered by a mapping knownuse of MPTCP checksumming has been negotiated at thereceiver. ThisMP_CAPABLE handshake (see <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>). The presence of the checksum can beused to reduce overhead in cases whereinferred from themappinglength of the option. If a checksum isknownpresent but its use had not been negotiated inadvance; one such case is when therethe MP_CAPABLE handshake, the receiver <bcp14>MUST</bcp14> close the subflow with a RST, as it is not behaving as negotiated. If asingle subflow between the hosts, anotherchecksum is not present whensegments of data are scheduled in larger than packet-sized chunks.</t> <t>An "infinite" mapping can be used to fall back to regular TCP by mapping the subflow-level data to the connection-level data forits use has been negotiated, theremainder ofreceiver <bcp14>MUST</bcp14> close theconnection (see <xref target="sec_fallback"/>). Thissubflow with a RST, as it isachievedconsidered broken. In both cases, this RST <bcp14>SHOULD</bcp14> be accompanied bysetting the Data-Level Length field of the DSSan MP_TCPRST optionto(<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) with thereserved value of 0. The checksum, in such a case, will also be set to zero.</t> </section>reason code for an "MPTCP-specific error".</t> <sectiontitle="Data Acknowledgments" anchor="sec_dataack"> <t>To provide full end-to-end resilience, MPTCP provides a connection-level acknowledgment, to actanchor="sec_dsn" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.1"> <name slugifiedName="name-data-sequence-mapping">Data Sequence Mapping</name> <t pn="section-3.3.1-1">The data stream as acumulative ACK forwhole can be reassembled through theconnection as a whole. This isuse of the"Data ACK" fieldData Sequence Mapping components of the DSS option (<xreftarget="tcpm_dsn"/>). The Data ACK is analogous totarget="tcpm_dsn" format="default" sectionFormat="of" derivedContent="Figure 9"/>), which define thebehavior ofmapping from the subflow sequence number to thestandard TCP cumulative ACK -- indicating how muchdatahas been successfully received (with no holes).sequence number. This isin comparisonused by the receiver to ensure in-order delivery to the application layer. Meanwhile, the subflow-levelACK, which acts analogous tosequence numbers (i.e., the regular sequence numbers in the TCPSACK, givenheader) are only relevant to the subflow. It is expected (but not mandated) thatthere may stillSACK <xref target="RFC2018" format="default" sectionFormat="of" derivedContent="RFC2018"/> will beholes in the data streamused at theconnection level. Thesubflow level to improve efficiency.</t> <t pn="section-3.3.1-2">The DataACKSequence Mapping specifies a mapping from thenext datasubflow sequencenumber it expectsspace toreceive.</t> <t>The Data ACK, as for the DSN, can be sent as the full 64-bit value, or asthelower 32 bits. Ifdatais received with a 64-bit DSN, it MUST be acknowledged with a 64-bit Data ACK. Ifsequence space. This is expressed in terms of starting sequence numbers for theDSN receivedsubflow and the data level, and a length of bytes for which this mapping is32 bits, an implementation can choose whether to sendvalid. This explicit mapping for a32-bitrange of data, rather than per‑packet signaling, was chosen to assist with compatibility with situations where TCP/IP segmentation or64-bit Data ACK, and an implementation MUST accept either in this situation.</t> <t>The Data ACK provescoalescing is undertaken separately from the stack that is generating thedata, and all required MPTCP signaling, has been received and accepted bydata flow (e.g., through theremote end. One keyuse ofthe Data ACK signalTCP segmentation offloading on network interface cards, or by middleboxes such as Performance Enhancing Proxies (PEPs) <xref target="RFC3135" format="default" sectionFormat="of" derivedContent="RFC3135"/>). It also allows a single mapping to cover many packets; this may be useful in bulk‑transfer situations.</t> <t pn="section-3.3.1-3">A mapping is fixed, in thatitthe subflow sequence number isusedbound toindicatetheleft edge ofdata sequence number after theadvertised receive window. As explained in <xref target="sec_rwin"/>,mapping has been processed. A sender <bcp14>MUST NOT</bcp14> change this mapping after it has been declared; however, thereceive window is sharedsame data sequence number can be mapped to byalldifferent subflowsand is relativefor retransmission purposes (see <xref target="sec_retransmit" format="default" sectionFormat="of" derivedContent="Section 3.3.6"/>). This would also permit the same data to be sent simultaneously on multiple subflows for resilience or efficiency purposes, especially in theData ACK. Becausecase ofthis, an implementation MUST NOT uselossy links. Although theRCV.WND fielddetailed specification ofa TCP segment atsuch operation is outside theconnection level if it does not also carry a DSS option withscope of this document, an implementation <bcp14>SHOULD</bcp14> treat the first data that is received at aData ACK field. Furthermore, separatingsubflow for theconnection-level acknowledgments fromdata sequence space as thesubflow level allows processing todata that should bedone separately, and a receiver has the freedomdelivered todrop segments after acknowledgment atthesubflow level,application, and any subsequent data forexample, due to memory constraints when many segments arrive out of order.</t> <t>An MPTCP sender MUST NOT freethat sequence space <bcp14>SHOULD</bcp14> be ignored.</t> <t pn="section-3.3.1-4">The datafromsequence number is specified as an absolute value, whereas thesend buffer until it has been acknowledged by both a Data ACK received on anysubflowandsequence numbering is relative (the SYN at thesubflow level by all subflows on which the data was sent. The former condition ensures livenessstart of theconnection and the latter condition ensures liveness and self-consistence ofsubflow has a relative subflowwhen data needssequence number of 0). This is done tobe retransmitted. Note, however, that if some data needsallow middleboxes tobe retransmitted multiple times overchange the Initial Sequence Number (ISN) of a subflow,there issuch as firewalls that undertake ISN randomization.</t> <t pn="section-3.3.1-5">The Data Sequence Mapping also contains ariskchecksum ofblockingthesending window. Indata that thiscase,mapping covers, if theMPTCP sender can decideuse of checksums has been negotiated at the MP_CAPABLE exchange. Checksums are used toterminatedetect if thesubflow that is behaving badlypayload has been adjusted in any way bysendingaRST, using an appropriate MP_TCPRST (<xref target="sec_reset"/>) error code.</t> <t>The Data ACK MAY be included in all segments; however, optimizations SHOULD be considered in more advanced implementations, where the Data ACK is present in segments only when the Data ACK value advances, andnon-MPTCP-aware middlebox. If thisbehavior MUST be treated as valid. This behavior ensures the sender buffer is freed, while reducing overhead whenchecksum fails, it will trigger a failure of thedata transfer is unidirectional.</t> </section> <section title="Closingsubflow, or aConnection" anchor="sec_close"> <t>Infallback to regular TCP,a FIN announcesas documented in <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>, since MPTCP can no longer reliably know thereceiver thatsubflow sequence space at thesender has no more data to send. In order to allow subflowsreceiver tooperate independently andbuild Data Sequence Mappings. Without checksumming enabled, corrupt data may be delivered tokeep the appearance of TCP overthewire,application if aFIN in MPTCP only affects the subflow on which it is sent. This allows nodes to exercise considerable freedom over which paths are in use at any one time. The semantics ofmiddlebox alters segment boundaries, alters content, or does not deliver all segments covered by aFIN remain as for regular TCP; i.e., itData Sequence Mapping. It isnot until both sides have ACKed each other's FINstherefore <bcp14>RECOMMENDED</bcp14> thatthe subflowchecksumming be used, unless it isfully closed.</t> <t>When an application calls close() on a socket, this indicatesknown thatit hasthe network path contains nomoresuch devices.</t> <t pn="section-3.3.1-6">The checksum algorithm used is the standard TCP checksum <xref target="RFC0793" format="default" sectionFormat="of" derivedContent="RFC0793"/>, operating over the datato send; for regular TCP,covered by thiswould result inmapping, along with aFIN on the connection. For MPTCP, an equivalent mechanism is needed, and this is referred topseudo‑header as shown in <xref target="fig_pseudo" format="default" sectionFormat="of" derivedContent="Figure 10"/>.</t> <figure anchor="fig_pseudo" align="left" suppress-title="false" pn="figure-10"> <name slugifiedName="name-pseudo-header-for-dss-check">Pseudo-Header for DSS Checksum</name> <artwork align="left" name="" type="" alt="" pn="section-3.3.1-7.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +--------------------------------------------------------------+ | | | Data Sequence Number (8 octets) | | | +--------------------------------------------------------------+ | Subflow Sequence Number (4 octets) | +-------------------------------+------------------------------+ | Data-Level Length (2 octets) | Zeros (2 octets) | +-------------------------------+------------------------------+ </artwork> </figure> <t pn="section-3.3.1-8">Note that theDATA_FIN.</t> <t>A DATA_FINdata sequence number used in the pseudo-header isan indication thatalways thesender64-bit value, irrespective of what length is used in the DSS option itself. The standard TCP checksum algorithm hasno morebeen chosen, since it will be calculated anyway for the TCP subflow, and if calculated first over the data before adding the pseudo-headers, it only needs tosend, and as such canbeused to verify that all data has been successfully received. A DATA_FIN, as withcalculated once. Furthermore, since theFIN on a regularTCPconnection,checksum is additive, the checksum for aunidirectional signal.</t> <t>The DATA_FIN is signaledDSN_MAP can be constructed bysettingsimply adding together the'F' flag inchecksums for theData Sequence Signal option (<xref target="tcpm_dsn"/>) to 1. A DATA_FIN occupies 1 octet (the final octet)data of each constituent TCP segment and adding theconnection-level sequence space. Note that the DATA_FIN is included inchecksum for theData-Level Length, but not atDSS pseudo‑header.</t> <t pn="section-3.3.1-9">Note that checksumming relies on the TCP subflowlevel: for example,containing contiguous data; therefore, asegment with DSN 80, and Data-Level Length 11, with DATA_FIN set, would map 10 octets from theTCP subflowinto data sequence space 80-89,<bcp14>MUST NOT</bcp14> use theDATA_FIN is DSN 90; therefore, this segment including DATA_FIN would be acknowledged with a DATA_ACK of 91.</t> <t>NoteUrgent Pointer to interrupt an existing mapping. Further note, however, thatwhen the DATA_FINif Urgent data isnot attached toreceived on aTCP segment containing data,subflow, it <bcp14>SHOULD</bcp14> be mapped to theData Sequence Signal MUST have a subflowdata sequencenumber of 0, a Data-Level Length of 1,space and delivered to the application, analogous to Urgent datasequence number that corresponds with the DATA_FIN itself. The checksuminthis case will only cover the pseudo-header.</t> <t>A DATA_FIN has the semantics and behavior as aregularTCP FIN, but at the connection level. Notably, it is only DATA_ACKed once all data has been successfully receivedTCP.</t> <t pn="section-3.3.1-10">To avoid possible deadlock scenarios, subflow-level processing should be undertaken separately from processing at the connection level.Note, therefore, thatTherefore, even if aDATA_FIN is decoupledmapping does not exist fromathe subflowFIN. It is only permissiblespace tocombine these signals on one subflow if there is nothe data‑level space, the dataoutstanding on other subflows. Otherwise, it may<bcp14>SHOULD</bcp14> still benecessary to retransmit data on different subflows. Essentially, a host MUST NOT close all functioning subflows unlessACKed at the subflow (if it issafe to do so, i.e., until all outstandingin-window). This datahas been DATA_ACKed, or until the segment with the DATA_FIN flag set iscannot, however, be acknowledged at theonly outstanding segment.</t> <t>Oncedata level (<xref target="sec_dataack" format="default" sectionFormat="of" derivedContent="Section 3.3.2"/>) because its data sequence numbers are unknown. Implementations <bcp14>MAY</bcp14> hold onto such unmapped data for aDATA_FIN has been acknowledged, all remaining subflows MUSTshort while, in the expectation that a mapping will arrive shortly. Such unmapped data cannot beclosed with standard FIN exchanges. Both hosts SHOULD send FINs on all subflows,counted asa courtesy to allow middleboxes to clean up state even if an individual subflow has failed. Itbeing within the connection-level receive window because this isalso encouragedrelative toreducethetimeouts (Maximum Segment Lifetime) on subflows at end hosts after receiving a DATA_FIN. In particular, any subflows where there is still outstandingdataqueued (which has been retransmitted on other subflows in order to getsequence numbers, so if theDATA_FIN acknowledged) MAYreceiver runs out of memory to hold this data, it will have to beclosed withdiscarded. If aRST with MP_TCPRST (<xref target="sec_reset"/>) error codemapping for"too much outstanding data".</t> <t>A connection is considered closed once both hosts' DATA_FINs have been acknowledged by DATA_ACKs.</t> <t>As specified above,that subflow-level sequence space does not arrive within astandard TCP FIN on an individual subflow only shuts down thereceive window of data, that subflowon which it was sent. If all subflows have been<bcp14>SHOULD</bcp14> be treated as broken, closed with aFIN exchange, but no DATA_FIN has been receivedRST, andacknowledged, the MPTCPany unmapped data silently discarded.</t> <t pn="section-3.3.1-11">Data sequence numbers are always 64-bit quantities and <bcp14>MUST</bcp14> be maintained as such in implementations. If a connection istreated as closed only afterprogressing at atimeout. This implies thatslow rate, so protection against wrapped sequence numbers is not required, then an implementationwill have TIME_WAIT states at both<bcp14>MAY</bcp14> include just thesubflow and connection levels (see <xref target="app_fsm"/>). This permits "break-before-make" scenarios where connectivity is lost on all subflows before a new one can be re-established.</t> </section> <section title="Receiver Considerations" anchor="sec_rwin"> <t>Regular TCP advertises a receive window in each packet, tellinglower 32 bits of thesender how muchdata sequence number in thereceiver is willing to accept past the cumulative ack. The receive window is usedData Sequence Mapping and/or Data ACK as an optimization, and an implementation can make this choice independently for each packet. An implementation <bcp14>MUST</bcp14> be able toimplement flow control, throttling down fast senders when receivers cannot keep up. </t> <t>MPTCP also uses a uniquereceivewindow, shared between the subflows. The ideaand process both 64-bit and 32-bit sequence number values, but it isto allow any subflownot required that an implementation be able to senddata as long asboth.</t> <t pn="section-3.3.1-12">An implementation <bcp14>MUST</bcp14> send thereceiverfull 64-bit data sequence number if it iswilling to accept it. The alternative, maintaining per subflow receive windows,transmitting at a sufficiently high rate that the 32-bit value couldend up stalling some subflows while others would not use up their window.</t> <t>The receive window is relative towrap within theDATA_ACK. AsMaximum Segment Lifetime (MSL) <xref target="RFC7323" format="default" sectionFormat="of" derivedContent="RFC7323"/>. The lengths of the DSNs used inTCP,these values (which may be different) are declared with flags in the DSS option. Implementations <bcp14>MUST</bcp14> accept areceiver MUST NOT shrink32-bit DSN and implicitly promote it to a 64-bit quantity by incrementing theright edgeupper 32 bits of thereceive window (i.e., DATA_ACK + receive window). The receiver will use the datasequence number each time the lower 32 bits wrap. A sanity check <bcp14>MUST</bcp14> be implemented totell ifensure that apacket should be acceptedwrap occurs atthe connection level.</t> <t>When deciding to accept packets at subflow level, regular TCP checksan expected time (e.g., the sequence numberin the packet against the allowed receive window. With multipath, suchjumps from acheckvery high number to a very low number) and isdone using onlynot triggered by out‑of-order packets.</t> <t pn="section-3.3.1-13">As with theconnection-level window. A sanity check SHOULD be performedstandard TCP sequence number, the data sequence number should not start atsubflow levelzero, but at a random value toensure thatmake blind session hijacking harder. This specification requires setting thesubflow and mapped sequence numbers meetIDSN of each host to thefollowing test: SSN - SUBFLOW_ACK <= DSN - DATA_ACK, where SSN isleast significant 64 bits of thesubflow sequence numberSHA-256 hash of thereceived packet and SUBFLOW_ACKhost's key, as described in <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>. This is also required in order for the receiver to know what theRCV.NXT (nextexpectedsequence number)IDSN is and thus determine if any initial connection-level packets are missing; this is particularly relevant if two subflows start transmitting simultaneously.</t> <t pn="section-3.3.1-14">The mapping provided by a Data Sequence Mapping MUST apply to some or all of the subflow(withsequence space in theequivalent connection-level definitions for DSN and DATA_ACK).</t> <t>In regular TCP, once aTCP segmentis deemed in-window, it is put either inthat carries thein-order receive queue oroption. It does not need to be included in every MPTCP packet, as long as theout-of-order queue. In Multipath TCP, the same happens butsubflow sequence space in that packet is covered by a mapping known at theconnection level: a segment is placedreceiver. This can be used to reduce overhead in cases where theconnection level in-order or out-of-order queue if itmapping isin-window at both connection andknown in advance. One such case is when there is a single subflowlevels. The stack still has to remember, for each subflow, whichbetween the hosts, and another is when segmentswere received successfully so that itof data are scheduled in larger-than-packet-sized chunks.</t> <t pn="section-3.3.1-15">An "infinite" mapping canACK them at subflow level appropriately. Typically, this willbeimplementedused to fall back to regular TCP bykeeping per subflow out-of-order queues (containing only message headers, not the payloads) and remembering the value ofmapping thecumulative ACK. </t> <t>It is important for implementerssubflow-level data tounderstand how large a receiver buffer is appropriate. The lower boundthe connection-level data forfull network utilization isthemaximum bandwidth-delay product of any oneremainder of thepaths. However, this might be insufficient when a packet is lost on a slower subflow and needs to be retransmittedconnection (see <xreftarget="sec_retransmit"/>). A tight upper bound would betarget="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>). This is achieved by setting themaximum round-trip time (RTT)Data-Level Length field ofany path multiplied bythetotal bandwidth available across all paths. This permits all subflowsDSS option tocontinue at full speed while a packet is fast-retransmitted onthemaximum RTT path. Even this mightreserved value of 0. The checksum, in such a case, will also beinsufficientset tomaintain0.</t> </section> <section anchor="sec_dataack" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.2"> <name slugifiedName="name-data-acknowledgments">Data Acknowledgments</name> <t pn="section-3.3.2-1">To provide fullperformance in the event ofend-to-end resilience, MPTCP provides aretransmit timeout onconnection-level acknowledgment, to act as a cumulative ACK for themaximum RTT path. Itconnection as a whole. This isfor future study to determinedone via therelationship between retransmission strategies and receive buffer sizing.</t> </section> <section title="Sender Considerations" anchor="sec_sender"> <t>The sender remembers receiver window advertisements from the receiver. It should only update its local receive window values when the largest sequence number allowed (i.e., DATA_ACK + receive window) increases, on the receipt"Data ACK" field ofa DATA_ACK. This is important to allow using paths with different RTTs, and thus different feedback loops. </t> <t>MPTCP uses a single receive window across all subflows, and ifthereceive window was guaranteedDSS option (<xref target="tcpm_dsn" format="default" sectionFormat="of" derivedContent="Figure 9"/>). The Data ACK is analogous tobe unchanged end-to-end, a host could always readthemost recent receive window value. However, some classesbehavior ofmiddleboxes may alter the TCP-level receive window. Typically, these will shrinktheoffered window, although for short periods of time it maystandard TCP cumulative ACK -- indicating how much data has been successfully received (with no holes). This can bepossible forcompared to thewindowsubflow-level ACK, which acts in a fashion analogous tobe larger (however, noteTCP SACK, given thatthis would not continue for long periods since ultimatelythere may still be holes in themiddlebox must keep up with deliveringdatatostream at thereceiver). Therefore, if receive window sizes differ on multiple subflows, when sendingconnection level. The Data ACK specifies the next dataMPTCP SHOULD takesequence number it expects to receive.</t> <t pn="section-3.3.2-2">The Data ACK, as for thelargest ofDSN, can be sent as themost recent window sizesfull 64-bit value or as theone to use in calculations. This rulelower 32 bits. If data isimplicit inreceived with a 64-bit DSN, it <bcp14>MUST</bcp14> be acknowledged with a 64-bit Data ACK. If therequirement not to reduce the right edge of the window.</t> <t>The sender MUST also remember the receive windows advertised by each subflow. The allowed window for subflow i is (ack_i, ack_i + rcv_wnd_i), where ack_iDSN received isthe subflow-level cumulative ACK of subflow i. This ensures data will not be sent32 bits, an implementation can choose whether to send amiddlebox unless there is enough buffering for32-bit or 64-bit Data ACK, and an implementation <bcp14>MUST</bcp14> accept either in this situation.</t> <t pn="section-3.3.2-3">The Data ACK proves that thedata. </t> <t>Puttingdata, and all required MPTCP signaling, have been received and accepted by thetwo rules together, we getremote end. One key use of thefollowing: a senderData ACK signal isallowedthat it is used tosend data segments with data-level sequence numbers between (DATA_ACK, DATA_ACK + receive_window). Eachindicate the left edge ofthese segments will be mapped onto subflows, as long as subflow sequence numbers are intheallowed windows for those subflows. Note that subflow sequence numbers do not generally affect flow control ifadvertised receive window. As explained in <xref target="sec_rwin" format="default" sectionFormat="of" derivedContent="Section 3.3.4"/>, thesamereceive window isadvertised acrossshared by allsubflows. They will perform flow control for thosesubflowswithand is relative to the Data ACK. Because of this, an implementation <bcp14>MUST NOT</bcp14> use the RCV.WND field of asmaller advertised receive window. </t> <t>The send buffer MUST,TCP segment at the connection level if it does not also carry aminimum, be as big asDSS option with a Data ACK field. Furthermore, separating thereceive buffer, to enableconnection-level acknowledgments from thesendersubflow level allows processing toreach maximum throughput.</t> </section> <section title="Reliabilitybe done separately, andRetransmissions" anchor="sec_retransmit"> <t>The data sequence mapping allows senders to resend data with the same data sequence number on a different subflow. When doing this,ahost MUST still retransmit the original data onreceiver has theoriginal subflow, in orderfreedom topreservedrop segments after acknowledgment at the subflowintegrity (middleboxes could replay old data, and/or could reject holes in subflows), and a receiver will ignore these retransmissions. While this is clearly suboptimal,level -- forcompatibility reasons this is sensible behavior. Optimizations could be negotiated in future versionsexample, due to memory constraints when many segments arrive out ofthis protocol. Note also that this property would also permit aorder.</t> <t pn="section-3.3.2-4">An MPTCP senderto always send the same data, with the same<bcp14>MUST NOT</bcp14> free datasequence number,from the send buffer until it has been acknowledged by both a Data ACK received onmultiple subflows, if desired for reliability reasons.</t> <t>This protocol specification does not mandateanymechanisms for handling retransmissions,subflow andmuch will be dependent upon local policy (as discussed in <xref target="sec_policy"/>). One can imagine aggressive connection-level retransmissions policies where every packet lostat the subflow levelis retransmittedby all subflows on which the data was sent. The former condition ensures liveness of the connection, and the latter condition ensures liveness and self-consistence of adifferentsubflow(hence, wasting bandwidth but possibly reducing application-to-application delays), or conservative retransmission policies where connection-level retransmits are only used after a few subflow-level retransmission timeouts occur.</t> <t>It is envisagedwhen data needs to be retransmitted. Note, however, thata standard connection-level retransmission mechanism wouldif some data needs to beimplemented aroundretransmitted multiple times over aconnection-level data queue: all segments that haven't been DATA_ACKed are stored. A timersubflow, there isset when the heada risk of blocking the send window. In this case, the MPTCP sender can decide to terminate theconnection-level is ACKed atsubflowlevel but its corresponding datathat isnot ACKed at data level. This timer will guard against failures in retransmissionbehaving badly bymiddleboxes that proactivelysending a RST, using an appropriate MP_TCPRST (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) error code.</t> <t pn="section-3.3.2-5">The Data ACKdata.</t> <t>The sender MUST keep data<bcp14>MAY</bcp14> be included inits send buffer as long asall segments; however, optimizations <bcp14>SHOULD</bcp14> be considered in more advanced implementations, where thedata has not been acknowledged at both connection levelData ACK is present in segments only when the Data ACK value advances, andon all subflows on which it has been sent. Inthisway,behavior <bcp14>MUST</bcp14> be treated as valid. This behavior ensures that thesender can always retransmitsend buffer is freed, while reducing overhead when the dataif needed, on the same subflow or on a different one. A special casetransfer iswhenunidirectional.</t> </section> <section anchor="sec_close" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.3"> <name slugifiedName="name-closing-a-connection">Closing asubflow fails:Connection</name> <t pn="section-3.3.3-1">In regular TCP, a FIN announces to thesender will typically resendreceiver that the sender has no more dataon other workingto send. In order to allow subflowsafter a timeout,to operate independently andwill keep tryingtoretransmitkeep thedata onappearance of TCP over thefailed subflow too. The sender will declarewire, a FIN in MPTCP only affects the subflowfailed after a predefined upper boundonretransmissionswhich it isreached (which MAY be lower than the usual TCP limits of the Maximum Segment Life), or on the receipt of an ICMP error, and only then delete the outstanding data segments. </t> <t>If multiple retransmissions are triggered that indicate that a subflow performs badly, this MAY lead to a host resetting the subflow with a RST. However, additional research is requiredsent. This allows nodes tounderstand the heuristicsexercise considerable freedom over which paths are in use at any one time. The semantics ofhow and when to reset underperforming subflows. For example,ahighly asymmetric path may be misdiagnosedFIN remain asunderperforming. A RSTforthis purpose SHOULD be accompanied with an "Unacceptable performance" MP_TCPRST option (<xref target="sec_reset"/>).</t> </section> <section title="Congestion Control Considerations" anchor="sec_cc"> <t>Different subflows in an MPTCP connection have different congestion windows. To achieve fairness at bottlenecks and resource pooling,regular TCP; i.e., it isnecessary to couplenot until both sides have ACKed each other's FINs that thecongestion windows in usesubflow is fully closed.</t> <t pn="section-3.3.3-2">When an application calls close() oneach subflow, in order to push most traffica socket, this indicates that it has no more data touncongested links. One algorithmsend; forachievingregular TCP, thisis presentedwould result in<xref target="RFC6356"/>;a FIN on thealgorithm does not achieve perfect resource pooling butconnection. For MPTCP, an equivalent mechanism is"safe" in that itneeded; this isreadily deployable inreferred to as thecurrent Internet. By this, we meanDATA_FIN.</t> <t pn="section-3.3.3-3">A DATA_FIN is an indication thatit does not take upthe sender has no morecapacity on any one path than ifdata to send, and as such itwas a single path flow using onlycan be used to verify thatroute, so this ensures fair coexistence with single-path TCP at shared bottlenecks.</t> <t>It is foreseeable that different congestion controllers will be implemented for MPTCP, each aiming to achieve different properties in the resource pooling/fairness/stability design space, as wellall data has been successfully received. A DATA_FIN, asthose for achieving different properties in quality of service, reliability, and resilience.</t> <t>Regardless ofwith thealgorithm used,FIN on a regular TCP connection, is a unidirectional signal.</t> <t pn="section-3.3.3-4">The DATA_FIN is signaled by setting thedesign of"F" flag in theMPTCP protocol aimsDSS option (<xref target="tcpm_dsn" format="default" sectionFormat="of" derivedContent="Figure 9"/>) toprovide1. A DATA_FIN occupies 1 octet (the final octet) of thecongestion control implementations sufficient information to takeconnection-level sequence space. Note that theright decisions; this information includes,DATA_FIN is included in the Data-Level Length but not at the subflow level: foreach subflow, which packets were lost and when. </t> </section> <section title="Subflow Policy" anchor="sec_policy"> <t>Withinexample, alocal MPTCP implementation,segment with ahost may use any local policy it wishes to decide how to share the traffic to be sent overDSN value of 80 and a Data-Level Length of 11, with DATA_FIN set, would map 10 octets from theavailable paths.</t> <t>Insubflow into data sequence space 80-89, and thetypical use case, whereDATA_FIN would be DSN 90; therefore, this segment, including DATA_FIN, would be acknowledged with a DATA_ACK of 91.</t> <t pn="section-3.3.3-5">Note that when thegoalDATA_FIN is not attached tomaximize throughput, all available paths will be used simultaneously for data transfer, using coupled congestion control as described in <xref target="RFC6356"/>. It is expected, however, that other use cases will appear.</t> <t>For instance,apossibility is an 'all-or-nothing' approach, i.e.,TCP segment containing data, the DSS <bcp14>MUST</bcp14> have asecond path ready for use in the event of failuresubflow sequence number of 0, a Data-Level Length of 1, and thefirst path,data sequence number that corresponds with the DATA_FIN itself. The checksum in this case will only cover the pseudo-header.</t> <t pn="section-3.3.3-6">A DATA_FIN has the same semantics and behavior as a regular TCP FIN, butalternatives could include entirely saturatingat the connection level. Notably, it is only DATA_ACKed once all data has been successfully received at the connection level. Note, therefore, that a DATA_FIN is decoupled from a subflow FIN. It is only permissible to combine these signals on onepath before using an additional path (the 'overflow' case). Such choices would be most likely basedsubflow if there is no data outstanding onthe monetary cost of links, butother subflows. Otherwise, it mayalsobebasednecessary to retransmit data onproperties such as the delay or jitter of links, where stability (of delay or bandwidth)different subflows. Essentially, a host <bcp14>MUST NOT</bcp14> close all functioning subflows unless it ismore important than throughput. Application requirements such as these are discussed in detail in <xref target="RFC6897"/>.</t> <t>The abilitysafe tomake effective choices atdo so, i.e., until all outstanding data has been DATA_ACKed or until thesender requires full knowledge ofsegment with thepath "cost", whichDATA_FIN flag set isunlikely to bethecase. It would be desirable foronly outstanding segment.</t> <t pn="section-3.3.3-7">Once areceiver toDATA_FIN has been acknowledged, all remaining subflows <bcp14>MUST</bcp14> beableclosed with standard FIN exchanges. Both hosts <bcp14>SHOULD</bcp14> send FINs on all subflows, as a courtesy, tosignal their own preferences for paths, since they will often be the multihomed party, and may haveallow middleboxes topay for metered incoming bandwidth.</t> <t>To enable this, the MP_JOIN option (see <xref target="sec_join"/>) contains the 'B' bit, which allows a host to indicate to its peer that this path should be treated as a backup path to use only inclean up state even if an individual subflow has failed. Reducing theevent of failure of other workingtimeouts (MSL) on subflows(i.e.,at end hosts after receiving asubflowDATA_FIN is also encouraged. In particular, any subflows wherethe receiver has indicated B=1 SHOULD NOT be used to send data unlessthereare no usableis still outstanding data queued (which has been retransmitted on other subflowswhere B=0).</t> <t>In the event that the available set of paths changes, a host may wish to signal a changeinpriority of subflowsorder to get thepeer (e.g.,DATA_FIN acknowledged) <bcp14>MAY</bcp14> be closed with a RST with an MP_TCPRST (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) error code for "too much outstanding data".</t> <t pn="section-3.3.3-8">A connection is considered closed once both hosts' DATA_FINs have been acknowledged by DATA_ACKs.</t> <t pn="section-3.3.3-9">As specified above, a standard TCP FIN on an individual subflowthat was previously set as backup should now take priority over all remaining subflows). Therefore, the MP_PRIO option, shown in <xref target="tcpm_prio"/>, can be used to change the 'B' flag ofonly shuts down the subflow on which itis sent.</t> <t>Another use ofwas sent. If all subflows have been closed with a FIN exchange but no DATA_FIN has been received and acknowledged, theMP_PRIO optionMPTCP connection isto set the 'B' flag ontreated as closed only after a timeout. This implies that an implementation will have TIME_WAIT states at both the subflowto cleanly retire its use before closing itlevel andremoving it with REMOVE_ADDRthe connection level (see <xreftarget="sec_remove_addr"/>, for example to support make-before-break session continuity,target="app_fsm" format="default" sectionFormat="of" derivedContent="Appendix D"/>). This permits "break-before-make" scenarios wherenewconnectivity is lost on all subflowsare addedbeforethe previously used ones are closed.</t> <?rfc needLines='8'?> <figure align="center" anchor="tcpm_prio" title="Change Subflow Priority (MP_PRIO) Option"> <artwork align="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----+-+ | Kind | Length |Subtype|(rsv)|B| +---------------+---------------+-------+-----+-+ ]]></artwork> </figure> <t>It should be noted that the backup flag isarequest fromnew one can be re‑established.</t> </section> <section anchor="sec_rwin" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.4"> <name slugifiedName="name-receiver-considerations">Receiver Considerations</name> <t pn="section-3.3.4-1">Regular TCP advertises a receive window in each packet, telling the sender how much data the receiver is willing toa data sender only, andaccept past thedata sender SHOULD adherecumulative ACK. The receive window is used tothese requests. A hostimplement flow control, throttling down fast senders when receivers cannotassume that the data sender will do so, however, since local policies -- or technical difficulties -- may override MP_PRIO requests. Notekeep up. </t> <t pn="section-3.3.4-2">MPTCP alsothat this signal applies touses asingle direction, and sounique receive window, shared between thesender of this option could choosesubflows. The idea is tocontinue using theallow any subflow to send dataeven if it has signaled B=1 to the other host.</t> </section> </section> <section title="Address Knowledge Exchange (Path Management)" anchor="sec_pm"> <t>We useas long as theterm "path management"receiver is willing toreferaccept it. The alternative -- maintaining per-subflow receive windows -- could end up stalling some subflows while others would not use up their window.</t> <t pn="section-3.3.4-3">The receive window is relative to theexchange of information about additional paths between hosts, whichDATA_ACK. As inthis design is managed by multiple addresses at hosts. For more detail of the architectural thinking behind this design, see the MPTCP Architecture document <xref target="RFC6182"/>.</t> <t>This design makes use of two methods of sharing such information, and both can be used onTCP, aconnection. The first isreceiver <bcp14>MUST NOT</bcp14> shrink thedirect setupright edge ofnew subflows, already described in <xref target="sec_join"/>, wheretheinitiator has an additional address.receive window (i.e., DATA_ACK + receive window). Thesecond method, described inreceiver will use thefollowing subsections, signals addresses explicitlydata sequence number to tell if a packet should be accepted at theother host to allow itconnection level.</t> <t pn="section-3.3.4-4">When deciding toinitiate new subflows. The two mechanisms are complementary: the first is implicit and simple, whileaccept packets at theexplicit is more complex but is more robust. Together,subflow level, regular TCP checks themechanisms allow addresses to changesequence number inflight (and thus support operation through NATs, sincethesource address need notpacket against the allowed receive window. With MPTCP, such a check is done using only the connection-level window. A sanity check <bcp14>SHOULD</bcp14> beknown), and also allowperformed at thesignaling of previously unknown addresses, and of addresses belongingsubflow level toother address families (e.g., both IPv4ensure that the subflow andIPv6).</t> <t>Here is an example of typical operation ofmapped sequence numbers meet theprotocol: <list style="symbols"> <t>An MPTCP connectionfollowing test: SSN - SUBFLOW_ACK <= DSN - DATA_ACK, where SSN isinitially set up between address/port A1the subflow sequence number ofHost Athe received packet andaddress/port B1 of Host B. If Host ASUBFLOW_ACK ismultihomed and multiaddressed, it can start an additionalthe RCV.NXT (next expected sequence number) of the subflowfrom its address A2 to B1, by sending a SYN with a Join option from A2 to B1, using B's previously declared token(with the equivalent connection-level definitions forthis connection. Alternatively, if BDSN and DATA_ACK).</t> <t pn="section-3.3.4-5">In regular TCP, once a segment ismultihomed,deemed in-window, itcan try to set up a new subflow from B2 to A1, using A's previously declared token. Inis put in eithercase,theSYN will be sent toin-order receive queue or theport already in use forout-of-order queue. In Multipath TCP, theoriginal subflow onsame thing happens, but at thereceiving host.</t> <t>Simultaneously (or afterconnection level: atimeout), an ADD_ADDR option (<xref target="sec_add_address"/>)segment issent on an existing subflow, informingplaced in thereceiver ofconnection-level in-order or out-of-order queue if it is in-window at both the connection level and thesender's alternative address(es). The recipient can use this information to open a newsubflow level. The stack still has to remember, for each subflow, which segments were received successfully so that it can ACK them at thesender's additional address. In our example, Asubflow level appropriately. Typically, this willsend ADD_ADDR option informing B of address/port A2. The mix of usingbe implemented by keeping per-subflow out-of-order queues (containing only message headers -- not theSYN-based optionpayloads) and remembering theADD_ADDR option, including timeouts,value of the cumulative ACK. </t> <t pn="section-3.3.4-6">It isimplementation specific and can be tailoredimportant for implementers toagree with local policy.</t> <t>If subflow A2-B1understand how large a receive buffer is appropriate. The lower bound for full network utilization issuccessfully set up, Host B can use the Address ID intheJoin option to correlate this withmaximum bandwidth-delay product of any one of theADD_ADDR option that will also arrivepaths. However, this might be insufficient when a packet is lost onan existing subflow; now B knows not to open A2-B1, ignoring the ADD_ADDR. Otherwise, if B has not received the A2-B1 MP_JOIN SYN but received the ADD_ADDR, it can try to initiateanewslower subflowfrom one or more of its addressesand needs toaddress A2.be retransmitted (see <xref target="sec_retransmit" format="default" sectionFormat="of" derivedContent="Section 3.3.6"/>). A tight upper bound would be the maximum round-trip time (RTT) of any path multiplied by the total bandwidth available across all paths. This permitsnew sessionsall subflows tobe opened if one host is behindcontinue at full speed while aNAT.</t> </list> Other ways of usingpacket is fast-retransmitted on thetwo signaling mechanisms are possible; for instance, signaling addresses in other address families can onlymaximum RTT path. Even this might bedone explicitly usinginsufficient to maintain full performance in theAdd Address option. </t> <section title="Address Advertisement" anchor="sec_add_address"> <t>The Add Address (ADD_ADDR) MPTCP option announces additional addresses (and optionally, ports) on which a host can be reached (<xref target="tcpm_address"/>). This option can be used at any time duringevent of aconnection, dependingretransmit timeout onwhenthe maximum RTT path. Determining the relationship between retransmission strategies and receive buffer sizing is left for future study.</t> </section> <section anchor="sec_sender" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.5"> <name slugifiedName="name-sender-considerations">Sender Considerations</name> <t pn="section-3.3.5-1">The senderwishes to enable multiple paths and/or when paths become available. As with all MPTCP signals,remembers receive window advertisements from thereceiver MUST undertake standard TCP validity checks, e.g. <xref target="RFC5961"/>, before acting upon it.</t> <t>Every address has an Address ID that can be used for uniquely identifyingreceiver. It should only update its local receive window values when theaddress withinlargest sequence number allowed (i.e., DATA_ACK + receive window) increases on the receipt of aconnection for address removal. The Address IDDATA_ACK. This isalso usedimportant for allowing the use of paths with different RTTs and thus different feedback loops. </t> <t pn="section-3.3.5-2">MPTCP uses a single receive window across all subflows, and if the receive window was guaranteed toidentify MP_JOIN options (see <xref target="sec_join"/>) relatingbe unchanged end to end, a host could always read thesame address, even when address translators are in use. The Address ID MUST uniquely identifymost recent receive window value. However, some classes of middleboxes may alter theaddress forTCP-level receive window. Typically, these will shrink thesenderoffered window, although for short periods of time it may be possible for theoption (withinwindow to be larger (however, note that this would not continue for long periods, since ultimately thescopemiddlebox must keep up with delivering data to the receiver). Therefore, if receive window sizes differ on multiple subflows, when sending data MPTCP <bcp14>SHOULD</bcp14> take the largest of theconnection), butmost recent window sizes as themechanism for allocating such IDsone to use in calculations. This rule isimplementation specific.</t> <t>All address IDs learned via either MP_JOIN or ADD_ADDR SHOULD be stored by the receiverimplicit ina data structure that gathers alltheAddress IDrequirement not toaddress mappings for a connection (identifiedreduce the right edge of the window.</t> <t pn="section-3.3.5-3">The sender <bcp14>MUST</bcp14> also remember the receive windows advertised bya token pair). In this way, there is a stored mapping between Address ID, observed source address, and token pair for future processing of control information for a connection. Note that an implementation MAY discard incoming address advertisements at will, for example,each subflow. The allowed window foravoiding updating mapping state, or because advertised addresses are of no use to it (for example, IPv6 addresses when it has IPv4 only). Therefore, a host MUST treat address advertisements as soft state, and it MAY choose to refresh advertisements periodically. Note also that an implementation MAY choose to cache these address advertisements even if they are not currently relevant but may be relevant in the future, such as IPv4 addresses when IPv6 connectivity is available but IPv4 is awaiting DHCP.</t> <t>This optionsubflow i isshown in <xref target="tcpm_address"/>. The illustration(ack_i, ack_i + rcv_wnd_i), where ack_i issized for IPv4 addresses. For IPv6,thelengthsubflow-level cumulative ACK ofthe addresssubflow i. This ensures that data will not be16 octets (instead of 4).</t> <t>The 2 octets that specify the TCP port numbersent touse are optional and their presence can be inferred froma middlebox unless there is enough buffering for thelength ofdata. </t> <t pn="section-3.3.5-4">Putting theoption. Although it is expected thattwo rules together, we get themajorityfollowing: a sender is allowed to send data segments with data-level sequence numbers between (DATA_ACK, DATA_ACK + receive_window). Each ofuse casesthese segments willuse the same port pairsbe mapped onto subflows, asused forlong as subflow sequence numbers are in theinitialallowed windows for those subflows. Note that subflow(e.g., port 80 remains port 80 on all subflows, as doessequence numbers do not generally affect flow control if theephemeral portsame receive window is advertised across all subflows. They will perform flow control for those subflows with a smaller advertised receive window. </t> <t pn="section-3.3.5-5">The send buffer <bcp14>MUST</bcp14>, atthe client), there maya minimum, becases (suchasport-based load balancing) wherebig as theexplicit specification of a different port is required. If no port is specified, MPTCP SHOULD attempt to connectreceive buffer, to enable thespecified address onsender to reach maximum throughput.</t> </section> <section anchor="sec_retransmit" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.6"> <name slugifiedName="name-reliability-and-retransmiss">Reliability and Retransmissions</name> <t pn="section-3.3.6-1">The Data Sequence Mapping allows senders to resend data with the sameport as is already in use bydata sequence number on a different subflow. When doing this, a host <bcp14>MUST</bcp14> still retransmit thesubfloworiginal data onwhichtheADD_ADDR signal was sent; this is discussed in more detailoriginal subflow, in<xref target="heuristics"/>.</t> <t>The Truncated HMAC presentorder to preserve the subflow's integrity (middleboxes could replay old data and/or could reject holes in subflows), and a receiver will ignore these retransmissions. While thisOptionisthe rightmost 64 bits of an HMAC, negotiated and calculated in the same way asclearly suboptimal, forMP_JOIN as described in <xref target="sec_join"/>. Forcompatibility reasons thisspecification of MPTCP, as thereisonly one hash algorithm option specified, this willsensible behavior. Optimizations could beHMAC as definednegotiated in<xref target="RFC2104"/>, usingfuture versions of this protocol. Note also that this property would also permit a sender to always send theSHA-256 hash algorithm <xref target="RFC6234"/>. Insame data, with the sameway asdata sequence number, on multiple subflows, if desired forMP_JOIN, the keyreliability reasons.</t> <t pn="section-3.3.6-2">This protocol specification does not mandate any mechanisms forthe HMAC algorithm, in the case of the message transmitted by Host A,handling retransmissions, and much will beKey-A followed by Key-B, anddependent upon local policy (as discussed in <xref target="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>). One can imagine aggressive connection-level retransmission policies where every packet lost at thecase of Host B, Key-B followed by Key-A. Thesesubflow level is retransmitted on a different subflow (hence wasting bandwidth but possibly reducing application-to-application delays) or conservative retransmission policies where connection-level retransmissions arethe keysonly used after a few subflow-level retransmission timeouts occur.</t> <t pn="section-3.3.6-3">It is envisaged thatwere exchanged in the original MP_CAPABLE handshake. The message for the HMACa standard connection-level retransmission mechanism would be implemented around a connection-level data queue: all segments that haven't been DATA_ACKed are stored. A timer is set when theAddress ID, IP Address, and Port which precede the HMAC inhead of theADD_ADDR option. Ifconnection level is ACKed at theportsubflow level but is notpresent in the ADD_ADDR option,DATA_ACKed at theHMAC messagedata level. This timer willnevertheless include two octets of value zero. The rationale for the HMAC is to prevent unauthorized entities from injecting ADD_ADDR signals in an attempt to hijack a connection. Noteguard against retransmission failures by middleboxes thatadditionallyproactively ACK data.</t> <t pn="section-3.3.6-4">The sender <bcp14>MUST</bcp14> keep data in its send buffer as long as thepresence ofdata has not been acknowledged both (1) at the connection level and (2) on all subflows on which it has been sent. In thisHMAC preventsway, theaddress being changed in flight unlesssender can always retransmit thekey is known by an intermediary. Ifdata if needed, on the same subflow or on ahost receives an ADD_ADDR option for which it cannot validatedifferent one. A special case is when a subflow fails: theHMAC, it SHOULD silently ignoresender will typically resend theoption.</t> <t>A set of four flags are presentdata on other working subflows afterthe subtypea timeout andbeforewill keep trying to retransmit theAddress ID. Onlydata on therightmost bit - labelled 'E' - is assigned in this specification.failed subflow too. Theother bits are currently unassigned and MUST be set to zero by asenderand MUST be ignored by the receiver.</t> <t>The 'E' flag exists to provide reliability for this option. Because this optionwilloften be sentdeclare the subflow failed after a predefined upper bound onpure ACKs, thereretransmissions isno guaranteereached (which <bcp14>MAY</bcp14> be lower than the usual TCP limits ofreliability. Therefore,the MSL) or on the receipt of an ICMP error, and only then delete the outstanding data segments. </t> <t pn="section-3.3.6-5">If multiple retransmissions that indicate that areceiver receivingsubflow is performing badly are triggered, this <bcp14>MAY</bcp14> lead to afresh ADD_ADDR option (where E=0), will sendhost resetting thesame option backsubflow with a RST. However, additional research is required to understand thesender, but not including the HMAC,heuristics of how andwith E=1,when toindicate receipt. The lack ofreset underperforming subflows. For example, a highly asymmetric path may be misdiagnosed as underperforming. A RST for thisecho canpurpose <bcp14>SHOULD</bcp14> beusedaccompanied bythe initial ADD_ADDR sender to retransmit the ADD_ADDR according to local policy.</t> <?rfc needLines='11'?> <figure align="center" anchor="tcpm_address" title="Add Address (ADD_ADDR) Option"> <artwork align="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-------+---------------+ | Kind | Length |Subtype|(rsv)|E| Address ID | +---------------+---------------+-------+-------+---------------+ | Address (IPv4 - 4 octets / IPv6 - 16 octets) | +-------------------------------+-------------------------------+ | Port (2 octets, optional) | | +-------------------------------+ | | Truncated HMAC (8 octets, if E=0) | | +-------------------------------+ | | +-------------------------------+ ]]></artwork> </figure> <t>Due to the proliferation of NATs,an "Unacceptable performance" MP_TCPRST option (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>).</t> </section> <section anchor="sec_cc" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.7"> <name slugifiedName="name-congestion-control-consider">Congestion Control Considerations</name> <t pn="section-3.3.7-1">Different subflows in an MPTCP connection have different congestion windows. To achieve fairness at bottlenecks and resource pooling, it isreasonably likely that one host may attempt to advertise private addresses <xref target="RFC1918"/>. It is not desirablenecessary toprohibit this, since there may be cases where both hosts have additional interfaces oncouple thesame private network, and a host MAY advertise such addresses. The MP_JOIN handshake to create a new subflow (<xref target="sec_join"/>) provides mechanismscongestion windows in use on each subflow, in order tominimize security risks. The MP_JOIN message contains a 32-bit token that uniquely identifies the connectionpush most traffic tothe receiving host. If the tokenuncongested links. One algorithm for achieving this isunknown, the host will return with a RST. Inpresented in <xref target="RFC6356" format="default" sectionFormat="of" derivedContent="RFC6356"/>; theunlikely eventalgorithm does not achieve perfect resource pooling but is "safe" in thatthe tokenit isvalid at the receiving host, subflow setup will continue, butreadily deployable in theHMAC exchange must occur for authentication. This will fail, and will provide sufficient protection against two unconnected hosts accidentally settingcurrent Internet. By this we mean that it does not take up more capacity on any one path than if it was anew subflow uponsingle path flow using only that route, so this ensures fair coexistence with single-path TCP at shared bottlenecks.</t> <t pn="section-3.3.7-2">It is foreseeable that different congestion controllers will be implemented for MPTCP, each aiming to achieve different properties in thesignalresource pooling / fairness / stability design space, as well as those for achieving different properties in quality of service, reliability, and resilience.</t> <t pn="section-3.3.7-3">Regardless ofa private address. Further security considerations aroundtheissuealgorithm used, the design ofADD_ADDR messages that accidentally misdirect, or maliciously direct, new MP_JOIN attempts are discussed in <xref target="sec_security"/>.</t> <t>A host that receives an ADD_ADDR but finds a connection set upMPTCP aims tothat IP address and port number is unsuccessful SHOULD NOT perform further connection attemptsprovide the congestion control implementations with sufficient information to make the right decisions; thisaddress/port combinationinformation includes, forthis connection. A sender that wants to triggereach subflow, which packets were lost and when. </t> </section> <section anchor="sec_policy" numbered="true" toc="include" removeInRFC="false" pn="section-3.3.8"> <name slugifiedName="name-subflow-policy">Subflow Policy</name> <t pn="section-3.3.8-1">Within anew incoming connection attempt onlocal MPTCP implementation, apreviously advertised address/port combination can therefore refresh ADD_ADDR information by sending the option again.</t> <t>Ahostcan therefore send an ADD_ADDR message with an already assigned Address ID, butmay use any local policy it wishes to decide how to share theAddress MUSTtraffic to be sent over thesame as previously assigned to this Address ID. A new ADD_ADDR may haveavailable paths.</t> <t pn="section-3.3.8-2">In thesame, or different, port number. Iftypical use case, where theport numbergoal isdifferent, the receiving host SHOULD try to set up a new subflow to this new address/port combination.</t> <t>A host wishingtoreplace an existing Address ID MUST first remove the existing one (<xref target="sec_remove_addr"/>).</t> <t>During normal MPTCP operation, it is unlikely that theremaximize throughput, all available paths will besufficient TCP option space for ADD_ADDR to be included along with thoseused simultaneously for datasequence numbering (<xref target="sec_dsn"/>). Therefore, ittransfer, using coupled congestion control as described in <xref target="RFC6356" format="default" sectionFormat="of" derivedContent="RFC6356"/>. It isexpectedexpected, however, thatan MPTCP implementationother use cases willsend the ADD_ADDR option on separate ACKs. As discussed earlier, however,appear.</t> <t pn="section-3.3.8-3">For instance, one possibility is anMPTCP implementation MUST NOT treat duplicate ACKs with any MPTCP option, with"all-or-nothing" approach, i.e., have a second path ready for use in theexceptionevent of failure of theDSS option,first path, but alternatives could include entirely saturating one path before using an additional path (the "overflow" case). Such choices would be most likely based on the monetary cost of links but may also be based on properties such asindicationsthe delay or jitter ofcongestion <xref target="RFC5681"/>, and an MPTCP implementation SHOULD NOT sendlinks, where stability (of delay or bandwidth) is more important thantwo duplicate ACKsthroughput. Application requirements such as these are discussed ina row for signaling purposes.</t> </section> <section title="Remove Address" anchor="sec_remove_addr"> <t>If, duringdetail in <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="RFC6897"/>.</t> <t pn="section-3.3.8-4">The ability to make effective choices at thelifetimesender requires full knowledge ofan MPTCP connection, a previously announced address becomes invalid (e.g., iftheinterface disappears, or an IPv6 addresspath "cost", which isno longer preferred), the affected host SHOULD announce this so that the peer can remove subflows relatedunlikely tothis address. Even if an address is not in use bybe the case. It would be desirable for aMPTCP connection, if it has been previously announced, an implementation SHOULD announce its removal. A host MAY also choosereceiver toannounce that a valid IP address should notbeused any longer,able to signal their own preferences forexamplepaths, since they will often be the multihomed party and may have to pay formake-before-break session continuity.</t> <t>This is achieved throughmetered incoming bandwidth.</t> <t pn="section-3.3.8-5">To enable this behavior, theRemove Address (REMOVE_ADDR)MP_JOIN option(<xref target="tcpm_remove"/>),(see <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) contains the "B" bit, whichwill remove a previously added address (or list of addresses) from a connection and terminate any subflows currently using that address.</t> <t>For security purposes, ifallows a hostreceivesto indicate to its peer that this path should be treated as aREMOVE_ADDR option, it must ensure the affected path(s) are no longer inbackup path to usebefore it instigates closure. The receipt of REMOVE_ADDR SHOULD first triggeronly in thesendingevent of failure of other working subflows (i.e., aTCP keepalive <xref target="RFC1122"/> on the path, and if a response is receivedsubflow where thepath SHOULD NOTreceiver has indicated that B=1 <bcp14>SHOULD NOT</bcp14> beremoved. If the path is foundused tostill be alive, the receiving host SHOULDsend data unless there are nolonger useusable subflows where B=0).</t> <t pn="section-3.3.8-6">In thespecified address for future connections, but it isevent that theresponsibilityavailable set ofthepaths changes, a hostwhich sent the REMOVE_ADDRmay wish toshut down the subflow. The requesting host MAY also use MP_PRIO (<xref target="sec_policy"/>)signal a change in priority of subflows torequestthe peer (e.g., apath is no longer used, before removal. Typical TCP validity tests on thesubflow(e.g., ensuring sequence and ACK numbers are correct) MUST also be undertaken. An implementation can use indications of these test failures as part of intrusion detection or error logging.</t> <t>The sending and receipt (if no keepalive responsethat wasreceived) of this message SHOULD trigger the sending of RSTs by both hosts on the affected subflow(s) (if possible),previously set as acourtesy to cleaning up middlebox state, before cleaning up any local state.</t> <t>Address removal is undertaken by ID, so asbackup should now take priority over all remaining subflows). Therefore, the MP_PRIO option, shown in <xref target="tcpm_prio" format="default" sectionFormat="of" derivedContent="Figure 11"/>, can be used topermitchange theuse"B" flag ofNATs and other middleboxes that rewrite source addresses. If there is no address at the requested ID, the receiver will silently ignoretherequest.</t> <t>Asubflowthaton which it isstill functioning MUST be closed with a FIN exchange as in regular TCP, rather than using this option. For more information, see <xref target="sec_close"/>.</t> <?rfc needLines='8'?>sent.</t> <figurealign="center" anchor="tcpm_remove" title="Remove Address (REMOVE_ADDR) Option">anchor="tcpm_prio" align="left" suppress-title="false" pn="figure-11"> <name slugifiedName="name-change-subflow-priority-mp_">Change Subflow Priority (MP_PRIO) Option</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-3.3.8-7.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1+---------------+---------------+-------+-------+---------------++---------------+---------------+-------+-----+-+ | Kind | Length= 3+n |Subtype|(resvd)| Address ID | ... +---------------+---------------+-------+-------+---------------+ (followed by n-1 Address IDs, if required) ]]></artwork>|Subtype|(rsv)|B| +---------------+---------------+-------+-----+-+ </artwork> </figure></section> </section> <section title="Fast Close" anchor="sec_fastclose"> <t>Regular TCP has the means<t pn="section-3.3.8-8">Another use ofsending a reset (RST) signal to abruptly close a connection. With MPTCP, a regular RST only hasthescope ofMP_PRIO option is to set the "B" flag on a subflow to cleanly "retire" its use before closing it andwill only close the concerned subflow but not affectremoving it with REMOVE_ADDR (<xref target="sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/>) -- for example, to support make-before-break session continuity, where new subflows are added before theremaining subflows. MPTCP's connection will stay alive atpreviously used subflows are closed.</t> <t pn="section-3.3.8-9">It should be noted that the backup flag is a request from a datalevel, in orderreceiver topermit break-before-make handover between subflows. It is therefore necessarya data sender only, and the data sender <bcp14>SHOULD</bcp14> adhere toprovide an MPTCP-level "reset"these requests. A host cannot assume that the data sender will do so, however, since local policies -- or technical difficulties -- may override MP_PRIO requests. Note also that this signal applies toallowa single direction, and so theabrupt closuresender ofthe whole MPTCP connection, andthisisoption could choose to continue using theMP_FASTCLOSE option.</t> <t>MP_FASTCLOSE is usedsubflow toindicatesend data even if it has signaled B=1 to thepeer that the connection will be abruptly closed and no data will be accepted anymore. The reasons for triggering an MP_FASTCLOSE are implementation specific. Regular TCP does not allow sending a RST whileother host.</t> </section> </section> <section anchor="sec_pm" numbered="true" toc="include" removeInRFC="false" pn="section-3.4"> <name slugifiedName="name-address-knowledge-exchange-">Address Knowledge Exchange (Path Management)</name> <t pn="section-3.4-1">We use theconnection is in a synchronized state <xref target="RFC0793"/>. Nevertheless, implementations allowterm "path management" to refer to thesendingexchange ofa RSTinformation about additional paths between hosts, which in thisstate, if, for example, the operating systemdesign isrunning out of resources. In these cases, MPTCP should sendmanaged by multiple addresses at hosts. For more details regarding theMP_FASTCLOSE. This optionarchitectural thinking behind this design, see the MPTCP architecture document <xref target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/>.</t> <t pn="section-3.4-2">This design makes use of two methods of sharing such information, and both can be used on a connection. The first isillustratedthe direct setup of new subflows (described in <xreftarget="tcpm_fastclose"/>.</t> <?rfc needLines='12'?> <figure align="center" anchor="tcpm_fastclose" title="Fast Close (MP_FASTCLOSE) Option"> <artwork align="left"><![CDATA[ 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----------------------+ | Kind | Length |Subtype| (reserved) | +---------------+---------------+-------+-----------------------+ | Option Receiver's Key | | (64 bits) | | | +---------------------------------------------------------------+ ]]></artwork> </figure> <t>If Host A wants to forcetarget="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>), where theclosure of an MPTCP connection, itinitiator hastwo different options: <list style="symbols"> <t>Option A (ACK) : Host A sendsanACK containing the MP_FASTCLOSE option on one subflow, containing the key of Host B as declaredadditional address. The second method (described in theinitial connection handshake. On allfollowing subsections) signals addresses explicitly to the othersubflows, Host A sends a regular TCP RSThost toclose these subflows, and tears them down. Host A now enters FASTCLOSE_WAIT state.</t> <t>Option R (RST) : Host A sends a RST containingallow it to initiate new subflows. The two mechanisms are complementary: theMP_FASTCLOSE option on all subflows, containingfirst is implicit and simple, while thekey of Host B as declaredsecond (explicit) is more complex but is more robust. Together, these mechanisms allow addresses to change in flight (and thus support operation through NATs, since theinitial connection handshake. Host A can tearsource address need not be known); they also allow thesubflowssignaling of previously unknown addresses andthe connection down immediately.</t> </list> </t> <t>If host A decidesof addresses belonging toforce the closure by using Option Aother address families (e.g., both IPv4 andsendingIPv6).</t> <t pn="section-3.4-3">Here is anACK with the MP_FASTCLOSE option,example of typical operation of the protocol: </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.4-4"> <li pn="section-3.4-4.1">An MPTCP connectionshall proceed as follows: <list style="symbols"> <t>Upon receiptis initially set up between address/port A1 ofan ACK with MP_FASTCLOSE byHostB, containing the valid key, Host B answers on the same subflow with a TCP RSTA andtears down all subflows also through sending TCP RST signals.address/port B1 of HostB can now close the whole MPTCP connection (it transitions directly to CLOSED state).</t> <t>As soon asB. If Host Ahas received the TCP RST on the remaining subflow,is multihomed and multiaddressed, it canclose thisstart an additional subflowand tear down the whole connection (transitionfromFASTCLOSE_WAITits address A2 toCLOSED states). If Host A receives an MP_FASTCLOSE instead ofB1, by sending aTCP RST, both hosts attempted fast closure simultaneously. Host A should replySYN with an MP_JOIN option from A2 to B1, using B's previously declared token for this connection. Alternatively, if B is multihomed, it can try to set up aTCP RST and tear downnew subflow from B2 to A1, using A's previously declared token. In either case, theconnection.</t> <t>If Host A does not receive a TCP RST in replySYN will be sent toits MP_FASTCLOSEthe port already in use for the original subflow on the receiving host.</li> <li pn="section-3.4-4.2">Simultaneously (or afterone retransmission timeout (RTO) (the RTOa timeout), an ADD_ADDR option (<xref target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>) is sent on an existing subflow, informing the receiver of the sender's alternative address(es). The recipient can use this information to open a new subflowwhereto theMP_FASTCLOSE has been sent), it SHOULD retransmitsender's additional address(es). In our example, A will send theMP_FASTCLOSE.ADD_ADDR option informing B of address/port A2. Thenumbermix ofretransmissions SHOULD be limited to avoid this connection from being retained for a long time, but this limit is implementation specific. A RECOMMENDED number is 3. If no TCP RST is received in response, Host A SHOULD send a TCP RST withusing theMP_FASTCLOSESYN‑based optionitself when it releases state in order to clear any remaining state at middleboxes.</t> </list> </t> <t>If however host A decides to force the closure by using Option Randsending a RST withtheMP_FASTCLOSEADD_ADDR option,Host B will act as follows: Upon receipt of a RSTincluding timeouts, is implementation specific and can be tailored to agree withMP_FASTCLOSE, containing the valid key, Host B tears down all subflows by sending a TCP RST.local policy.</li> <li pn="section-3.4-4.3">If subflow A2-B1 is successfully set up, Host B cannow closeuse thewhole MPTCP connection (it transitions directlyAddress ID in the MP_JOIN option toCLOSED state).</t> </section> <section title="Subflow Reset" anchor="sec_reset"> <t>An implementation of MPTCP maycorrelate this source address with the ADD_ADDR option that will alsoneed to send a regular TCP RSTarrive on an existing subflow; now B knows not toforceopen A2-B1, ignoring theclosure of a subflow. A host sends a TCP RST in orderADD_ADDR. Otherwise, if B has not received the A2-B1 MP_JOIN SYN but received the ADD_ADDR, it can try tocloseinitiate a new subflow from one orreject an attemptmore of its addresses toopen a subflow (MP_JOIN). In orderaddress A2. This permits new sessions toinform the receivingbe opened if one hostwhy a subflowisbeing closed or rejected, the TCP RST packet MAY includebehind a NAT.</li> </ul> <t pn="section-3.4-5"> Other ways of using theMP_TCPRST Option. The host MAY use this information to decide, for example, whether it triestwo signaling mechanisms are possible; for instance, signaling addresses in other address families can only be done explicitly using the Add Address (ADD_ADDR) option. </t> <section anchor="sec_add_address" numbered="true" toc="include" removeInRFC="false" pn="section-3.4.1"> <name slugifiedName="name-address-advertisement">Address Advertisement</name> <t pn="section-3.4.1-1">The ADD_ADDR MPTCP option announces additional addresses (and, optionally, ports) on which a host can be reached (<xref target="tcpm_address" format="default" sectionFormat="of" derivedContent="Figure 12"/>). This option can be used at any time during a connection, depending on when the sender wishes tore-establishenable multiple paths and/or when paths become available. As with all MPTCP signals, thesubflow immediately, later, or never.</t> <?rfc needLines='8'?>receiver <bcp14>MUST</bcp14> undertake standard TCP validity checks, e.g., per <xref target="RFC5961" format="default" sectionFormat="of" derivedContent="RFC5961"/>, before acting upon it.</t> <figurealign="center" anchor="tcpm_reset" title="TCP RST Reason (MP_TCPRST) Option">anchor="tcpm_address" align="left" suppress-title="false" pn="figure-12"> <name slugifiedName="name-add-address-add_addr-option">Add Address (ADD_ADDR) Option</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-3.4.1-2.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1+---------------+---------------+-------+-----------------------++---------------+---------------+-------+-------+---------------+ | Kind | Length|Subtype|U|V|W|T| Reason|Subtype|(rsv)|E| Address ID |+---------------+---------------+-------+-----------------------+ ]]></artwork>+---------------+---------------+-------+-------+---------------+ | Address (IPv4: 4 octets / IPv6: 16 octets) | +-------------------------------+-------------------------------+ | Port (2 octets, optional) | | +-------------------------------+ | | Truncated HMAC (8 octets, if E=0) | | +-------------------------------+ | | +-------------------------------+ </artwork> </figure><t>The MP_TCPRST option contains a reason code<t pn="section-3.4.1-3">Every address has an Address ID thatallows the sender ofcan be used for uniquely identifying theoptionaddress within a connection for address removal. The Address ID is also used to identify MP_JOIN options (see <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) relating toprovide more information aboutthereasonsame address, even when address translators are in use. The Address ID <bcp14>MUST</bcp14> uniquely identify the address for theterminationsender of thesubflow. Using 12 bits ofoptionspace,(within thefirst four bits are reserved for flags (only onescope ofwhich is currently defined), andtheremaining octet is used to express a reason code for this subflow termination, from which a receiver MAY infer information aboutconnection); theusability of this path.</t> <t>The "T" flag is used by the sender to indicate whether the error condition that is reportedmechanism for allocating such IDs isTransient (T bit set to 1)implementation specific.</t> <t pn="section-3.4.1-4">All Address IDs learned via either MP_JOIN orPermanent (T bit set to 0). If the error condition is considered toADD_ADDR <bcp14>SHOULD</bcp14> beTransientstored by thesender of the RST segment, the recipient of this segment MAY try to reestablishreceiver in asubflowdata structure that gathers all the Address-ID-to-address mappings forthisa connectionover the failed path. The time at which(identified by areceiver may try to re-establishtoken pair). In this way, there isimplementation-specific, but SHOULD take into account the properties of the failure defined by the following reason code. If the error condition is considered to be permanent, the receiver of the RST segment SHOULD NOT try to reestablishasubflow for this connection over this path. The "U", "V" and "W" flags are not defined by this specificationstored mapping between the Address ID, observed source address, andare reservedtoken pair for futureuse. An implementationprocessing ofthis specification MUST set these flags to 0, andcontrol information for areceiver MUST ignore them.</t> <t>The "Reason" code is an 8-bit fieldconnection. Note thatindicates the reasonan implementation <bcp14>MAY</bcp14> discard incoming address advertisements at will -- forthe termination of the subflow. The following codesexample, to avoid updating mapping state or because advertised addresses aredefined in this document: <list style="symbols"> <t>Unspecified error (code 0x0). This is the default error implying the subflow is no longer available. The presenceofthis option shows that the RST was generated byno use to it (for example, IPv6 addresses when it has IPv4 only). Therefore, aMPTCP-aware device.</t> <t>MPTCP specific error (code 0x01). An error has been detectedhost <bcp14>MUST</bcp14> treat address advertisements as soft state, and it <bcp14>MAY</bcp14> choose to refresh advertisements periodically. Note also that an implementation <bcp14>MAY</bcp14> choose to cache these address advertisements even if they are not currently relevant but may be relevant in theprocessing of MPTCP options. Thisfuture, such as IPv4 addresses when IPv6 connectivity isthe usual reason code to returnavailable but IPv4 is awaiting DHCP.</t> <t pn="section-3.4.1-5">This option is shown inthe cases where a RST<xref target="tcpm_address" format="default" sectionFormat="of" derivedContent="Figure 12"/>. The illustration isbeing sent to close a subflowsized forreasonsIPv4 addresses. For IPv6, the length ofan invalid response.</t> <t>Lackthe address will be 16 octets (instead ofresources (code 0x02). This code indicates4).</t> <t pn="section-3.4.1-6">The 2 octets that specify thesending host does not have enough resourcesTCP port number tosupportuse are optional, and their presence can be inferred from theterminated subflow.</t> <t>Administratively prohibited (code 0x03). This code indicates thatlength of therequested subflowoption. Although it isprohibited byexpected that thepoliciesmajority of use cases will use thesending host.</t> <t>Too much outstanding data (code 0x04). This code indicates that there is an excessive amount of data that need to be transmitted oversame port pairs as those used for theterminatedinitial subflowwhile having already been acknowledged over one or more other subflows. This(e.g., port 80 remains port 80 on all subflows, as does the ephemeral port at the client), there mayoccur if a path has been unavailable forbe cases (such as port-based load balancing) where the explicit specification of ashort period and itdifferent port ismore efficient to reset and start again than itrequired. If no port is specified, MPTCP <bcp14>SHOULD</bcp14> attempt to connect toretransmitthequeued data.</t> <t>Unacceptable performance (code 0x05). This code indicatesspecified address on the same port as the port that is already in use by theperformance of thissubflowwas too low compared toon which theother subflows ofADD_ADDR signal was sent; thisMultipath TCP connection.</t> <t>Middlebox interference (code 0x06). Middlebox interference has been detected overis discussed in more detail in <xref target="heuristics" format="default" sectionFormat="of" derivedContent="Section 3.9"/>.</t> <t pn="section-3.4.1-7">The Truncated HMAC parameter present in thissubflow making MPTCP signaling invalid.option is the rightmost 64 bits of an HMAC, negotiated and calculated in the same way as for MP_JOIN as described in <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>. Forexample,thismayspecification of MPTCP, as there is only one hash algorithm option specified, this will besent ifHMAC as defined in <xref target="RFC2104" format="default" sectionFormat="of" derivedContent="RFC2104"/>, using thechecksum does not validate.</t> </list> </t> </section> <section title="Fallback" anchor="sec_fallback"> <t>Sometimes, middleboxes will exist on a path that could preventSHA-256 hash algorithm <xref target="RFC6234" format="default" sectionFormat="of" derivedContent="RFC6234"/>. In theoperationsame way as for MP_JOIN, the key for the HMAC algorithm, in the case ofMPTCP. MPTCP has been designedthe message transmitted by Host A, will be Key-A followed by Key-B, and inorder to cope with many middlebox modifications (see <xref target="sec_middleboxes"/>), but there are still some cases where a subflow could fail to operate withintheMPTCP requirements.case of Host B, Key-B followed by Key-A. Thesecasesarenotablythefollowing:keys that were exchanged in theloss of MPTCP options on a path, andoriginal MP_CAPABLE handshake. The message for themodification of payload data. If such an event occurs, itHMAC isnecessary to "fall back" totheprevious, safe operation. This may be either falling back to regular TCP or removing a problematic subflow.</t> <t>AtAddress ID, IP address, and port that precede thestart of an MPTCP connection (i.e.,HMAC in thefirst subflow), it is important to ensure thatADD_ADDR option. If thepathport isfully MPTCP capable andnot present in thenecessary MPTCP options can reach each host.ADD_ADDR option, the HMAC message will nevertheless include 2 octets of value zero. Thehandshake as describedrationale for the HMAC is to prevent unauthorized entities from injecting ADD_ADDR signals in<xref target="sec_init"/> SHOULD fall backan attempt toregular TCP if either of the SYN messages do not havehijack a connection. Note that, additionally, theMPTCP options:presence of thisisHMAC prevents thesame, and desired, behavioraddress from being changed in flight unless thecase wherekey is known by an intermediary. If a hostis not MPTCP capable, orreceives an ADD_ADDR option for which it cannot validate thepath does not supportHMAC, it <bcp14>SHOULD</bcp14> silently ignore theMPTCP options. When attempting to join an existing MPTCP connection (<xref target="sec_join"/>), if a pathoption.</t> <t pn="section-3.4.1-8">A set of four flags isnot MPTCP capable andpresent after theMPTCP options do not get through onsubtype and before theSYNs,Address ID. Only thesubflow willrightmost bit -- labeled "E" -- is assigned in this specification. The other bits are currently unassigned; they <bcp14>MUST</bcp14> beclosed accordingset to 0 by a sender and <bcp14>MUST</bcp14> be ignored by theMP_JOIN logic.</t> <t>There is, however, another corner case that shouldreceiver.</t> <t pn="section-3.4.1-9">The "E" flag exists to provide reliability for this option. Because this option will often beaddressed. Thatsent on pure ACKs, there isoneno guarantee ofMPTCP options getting through onreliability. Therefore, a receiver receiving a fresh ADD_ADDR option (where E=0) will send theSYN,same option back to the sender, but noton regular packets. This can be resolved if the subflow isincluding thefirst subflow,HMAC andthus all data in flight is contiguous, using the following rules.</t> <t>A sender MUST include a DSS optionwithdata sequence mapping in every segment until one ofE=1, to indicate receipt. According to local policy, thesent segments has been acknowledged with a DSS option containing a Data ACK. Upon receptionlack of this type of "echo" can indicate to theacknowledgment, theinitial ADD_ADDR senderhas the confirmationthat theDSS option passes in both directions andADD_ADDR needs to be retransmitted.</t> <t pn="section-3.4.1-10">Due to the proliferation of NATs, it is reasonably likely that one host maychooseattempt tosend fewer DSS options than once per segment.</t> <t>If, however, an ACKadvertise private addresses <xref target="RFC1918" format="default" sectionFormat="of" derivedContent="RFC1918"/>. It isreceived for data (not just fornot desirable to prohibit this behavior, since there may be cases where both hosts have additional interfaces on theSYN) withoutsame private network, and aDSS option containinghost <bcp14>MAY</bcp14> advertise such addresses. The MP_JOIN handshake to create aData ACK,new subflow (<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) provides mechanisms to minimize security risks. The MP_JOIN message contains a 32-bit token that uniquely identifies thesender determinesconnection to thepath is not MPTCP capable. Inreceiving host. If thecase of this occurring on an additional subflow (i.e., one started with MP_JOIN),token is unknown, the hostMUST close the subflow with a RST, which SHOULD contain a MP_TCPRST option (<xref target="sec_reset"/>)will respond with a"Middlebox interference" reason code.</t> <t>InRST. In thecase of such an ACK being received onunlikely event that thefirsttoken is valid at the receiving host, subflow(i.e., that started with MP_CAPABLE), before any additional subflows are added,setup will continue, but theimplementation MUST drop out of an MPTCP mode, back to regular TCP.HMAC exchange must occur for authentication. ThesenderHMAC exchange willsend one final data sequence mapping, withfail and will provide sufficient protection against two unconnected hosts accidentally setting up a new subflow upon theData-Level Length valuesignal of0 indicating an infinite mapping (to inform the other end in casea private address. Further security considerations around thepath drops optionsissue of ADD_ADDR messages that accidentally misdirect, or maliciously direct, new MP_JOIN attempts are discussed inone direction only), and then revert to sending data on the single subflow without any MPTCP options.</t> <t>If<xref target="sec_security" format="default" sectionFormat="of" derivedContent="Section 5"/>.</t> <t pn="section-3.4.1-11">A host that receives an ADD_ADDR but finds that asubflow breaks during operation, e.g. if it is re-routedconnection set up to that IP address andMPTCP options are no longer permitted, then once this is detected (by the subflow-level receive buffer filling up, since thereport number isno mapping available in orderunsuccessful <bcp14>SHOULD NOT</bcp14> perform further connection attempts toDATA_ACKthisdata), the subflow SHOULD be treated as broken and closed withaddress/port combination for this connection. A sender that wants to trigger aRST, since no datanew incoming connection attempt on a previously advertised address/port combination canbe delivered totherefore refresh ADD_ADDR information by sending theapplication layer, and no fallback signaloption again.</t> <t pn="section-3.4.1-12">A host can therefore send an ADD_ADDR message with an already-assigned Address ID, but the address <bcp14>MUST</bcp14> bereliably sent. This RST SHOULD include the MP_TCPRST option (<xref target="sec_reset"/>) with a "Middlebox interference" reason code.</t> <t>These rules should cover all cases where such a failure could happen: whether it's on the forward or reverse path and whethertheserver orsame as theclient first sends data.</t> <t>So faraddress previously assigned to thissection has discussedAddress ID. A new ADD_ADDR may have theloss of MPTCP options, either initially,same port number orduring the course of the connection. As described in <xref target="sec_generalop"/>, each portion of data for which there isamappingdifferent port number. If the port number isprotected bydifferent, the receiving host <bcp14>SHOULD</bcp14> try to set up achecksum, if checksums have been negotiated. This mechanism is usednew subflow todetect if middleboxes have made any adjustmentsthis new address/port combination.</t> <t pn="section-3.4.1-13">A host wishing to replace an existing Address ID <bcp14>MUST</bcp14> first remove thepayload (added, removed, or changed data). A checksumexisting one (<xref target="sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/>).</t> <t pn="section-3.4.1-14">During normal MPTCP operation, it is unlikely that there willfail if thebe sufficient TCP option space for ADD_ADDR to be included along with those for datahas been changed in any way. Thissequence numbering (<xref target="sec_dsn" format="default" sectionFormat="of" derivedContent="Section 3.3.1"/>). Therefore, it is expected that an MPTCP implementation willalso detect ifsend thelength of dataADD_ADDR option on separate ACKs. As discussed earlier, however, an MPTCP implementation <bcp14>MUST NOT</bcp14> treat duplicate ACKs with any MPTCP option, with thesubflow is increased or decreased,exception of the DSS option, as indications of congestion <xref target="RFC5681" format="default" sectionFormat="of" derivedContent="RFC5681"/>, andthis meansan MPTCP implementation <bcp14>SHOULD NOT</bcp14> send more than two duplicate ACKs in a row for signaling purposes.</t> </section> <section anchor="sec_remove_addr" numbered="true" toc="include" removeInRFC="false" pn="section-3.4.2"> <name slugifiedName="name-remove-address">Remove Address</name> <t pn="section-3.4.2-1">If, during thedata sequence mapping is no longer valid. The sender no longer knows what subflow-level sequence number the receiver is genuinely operating at (the middlebox will be faking ACKs in return), and it cannot signal any further mappings. Furthermore, in addition to the possibilitylifetime ofpayload modifications that are valid at the application layer, there is the possibility that such modifications could be triggered acrossan MPTCPsegment boundaries, corrupting the data. Therefore, all data fromconnection, a previously announced address becomes invalid (e.g., if thestart ofinterface disappears or an IPv6 address is no longer preferred), thesegmentaffected host <bcp14>SHOULD</bcp14> announce this situation so thatfailedthechecksum onwardspeer can remove subflows related to this address. Even if an address is nottrustworthy.</t> <t>Note thatin use by an MPTCP connection, ifchecksum usageit hasnotbeennegotiated, this fallback mechanism cannot be used unless there is some higher or lower layer signal to inform the MPTCPpreviously announced, an implementation <bcp14>SHOULD</bcp14> announce its removal. A host <bcp14>MAY</bcp14> also choose to announce thatthe payload has been tampered with.</t> <t>When multiple subflows are in use, the data in flight onasubflow will likely involve data that isvalid IP address should notcontiguously part of the connection-level stream, since segments willbespread across the multiple subflows. Due to the problems identified above, it is not possible to determine what adjustment has done to the data (notably,used anychanges to the subflow sequence numbering). Therefore, itlonger -- for example, for make‑before-break session continuity.</t> <t pn="section-3.4.2-2">This isnot possible to recover the subflow, andachieved through theaffected subflow must be immediately closed with a RST, featuring an MP_FAILRemove Address (REMOVE_ADDR) option (<xreftarget="tcpm_fallback"/>),target="tcpm_remove" format="default" sectionFormat="of" derivedContent="Figure 13"/>), whichdefines the data sequence number at the startwill remove a previously added address (or list ofthe segment (defined by the data sequence mapping) that had the checksum failure. Noteaddresses) from a connection and terminate any subflows currently using thatthe MP_FAIL option requires the use of the full 64-bit sequence number, even if 32-bit sequence numbers are normally in use in the DSS signals on the path.</t> <?rfc needLines='8'?>address.</t> <figurealign="center" anchor="tcpm_fallback" title="Fallback (MP_FAIL) Option">anchor="tcpm_remove" align="left" suppress-title="false" pn="figure-13"> <name slugifiedName="name-remove-address-remove_addr-">Remove Address (REMOVE_ADDR) Option</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-3.4.2-3.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1+---------------+---------------+-------+----------------------++---------------+---------------+-------+-------+---------------+ | Kind |Length = 3 + n |Subtype|(resvd)| Address ID |Length=12 |Subtype| (reserved) | +---------------+---------------+-------+----------------------+ | | | Data Sequence Number (8 octets) | | | +--------------------------------------------------------------+ ]]></artwork>... +---------------+---------------+-------+-------+---------------+ (followed by n-1 Address IDs, if required) </artwork> </figure><t>The receiver of this option MUST discard all data following the data sequence number specified. Failed data MUST NOT be DATA_ACKed and so will be retransmitted on other subflows (<xref target="sec_retransmit"/>). </t> <t>A special case is when there is<t pn="section-3.4.2-4">For security purposes, if asingle subflow and it fails withhost receives achecksum error. IfREMOVE_ADDR option, itis knownmust ensure thatall unacknowledged datathe affected path or paths are no longer inflight is contiguous (which will usually beuse before it instigates closure. The receipt of REMOVE_ADDR <bcp14>SHOULD</bcp14> first trigger thecase withsending of asingle subflow), an infinite mapping can be applied to the subflow withoutTCP keepalive <xref target="RFC1122" format="default" sectionFormat="of" derivedContent="RFC1122"/> on theneed to close it first,path, andessentially turn off all further MPTCP signaling. In this case,if areceiver identifies a checksum failure when thereresponse isonly one path, it will send back an MP_FAIL option onreceived, thesubflow-level ACK, referring topath <bcp14>SHOULD NOT</bcp14> be removed. If thedata-level sequence number of the start of the segment on which the checksum error was detected. The sender will receive this, and if all unacknowledged data in flightpath iscontiguous, will signal an infinite mapping. This infinite mapping willfound to still bea DSS option (<xref target="sec_generalop"/>) onalive, thefirst new packet, containing a data sequence mapping that acts retroactively, referring toreceiving host <bcp14>SHOULD</bcp14> no longer use thestart ofspecified address for future connections, but it is thesubflow sequence numberresponsibility of themost recent segmenthost thatwas knownsent the REMOVE_ADDR tobe delivered intact (i.e. was successfully DATA_ACKed). From that point onwards, data can be altered by a middlebox without affecting MPTCP, asshut down thedata streamsubflow. Before the address isequivalentremoved, the requesting host <bcp14>MAY</bcp14> also use MP_PRIO (<xref target="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>) to request that aregular, legacypath no longer be used. Typical TCPsession. Whilst in theory paths may onlyvalidity tests on the subflow (e.g., ensuring that sequence and ACK numbers are correct) <bcp14>MUST</bcp14> also bedamaged in one direction,undertaken. An implementation can use indications of these test failures as part of intrusion detection or error logging.</t> <t pn="section-3.4.2-5">The sending andthe MP_FAIL signal affects only one directionreceipt (if no keepalive response was received) oftraffic, for implementation simplicity,this message <bcp14>SHOULD</bcp14> trigger thereceiversending ofan MP_FAIL MUST also respond with an MP_FAIL inRSTs by both hosts on thereverse direction and entirely revert toaffected subflow(s) (if possible), as aregular TCP session.</t> <t>In the rare case thatcourtesy, to allow thedatacleanup of middlebox state before cleaning up any local state.</t> <t pn="section-3.4.2-6">Address removal isnot contiguous (which could happen when there is only one subflow but it is retransmitting data from a subflowundertaken according to the Address ID, so as to permit the use of NATs and other middleboxes thathas recently been uncleanly closed),rewrite source addresses. If an Address ID is not known, the receiverMUST closewill silently ignore the request.</t> <t pn="section-3.4.2-7">A subflow that is still functioning <bcp14>MUST</bcp14> be closed with aRST with MP_FAIL. The receiver MUST discard all data that followsFIN exchange as in regular TCP, rather than using this option. For more information, see <xref target="sec_close" format="default" sectionFormat="of" derivedContent="Section 3.3.3"/>.</t> </section> </section> <section anchor="sec_fastclose" numbered="true" toc="include" removeInRFC="false" pn="section-3.5"> <name slugifiedName="name-fast-close">Fast Close</name> <t pn="section-3.5-1">Regular TCP has thedata sequence number specified. The sender MAY attempt to createmeans of sending anew subflow belongingRST signal to abruptly close a connection. With MPTCP, a regular RST only has thesame connection, and, ifscope of the subflow; itchooses to do so, SHOULD placewill only close thesingleapplicable subflowimmediately in single-path mode by setting an infinite data sequence mapping. This mappingand willbegin fromnot affect thedata-level sequence number that was declared inremaining subflows. MPTCP's connection will stay alive at theMP_FAIL.</t> <t>After a sender signals an infinite mapping, it MUST only use subflow ACKsdata level, in order toclear its send buffer. Thispermit break-before-make handover between subflows. It isbecause Data ACKs may become misaligned with the subflow ACKs when middleboxes insert or delete data. The receive SHOULD stop generating Data ACKs after it receives an infinite mapping. </t> <t>When a connection has fallen back withtherefore necessary to provide aninfinite mapping, only one subflow can send data; otherwise, the receiver would not know howMPTCP-level "reset" toreorderallow thedata. In practice, this means that all MPTCP subflows will have to be terminated except one. Once MPTCP falls back to regular TCP, it MUST NOT revert to MPTCP later inabrupt closure of theconnection.</t> <t>It should be emphasized thatwhole MPTCP connection; this isnot attempting to preventdone via theuse of middleboxes that wantMP_FASTCLOSE option.</t> <t pn="section-3.5-2">MP_FASTCLOSE is used toadjust the payload. An MPTCP-aware middlebox could provide such functionality by also rewriting checksums.</t> </section> <section title="Error Handling" anchor="sec_errors"> <t>In additionindicate to thefallback mechanism as described above,peer that thestandard classes of TCP errors may need toconnection will behandled inabruptly closed and no data will be accepted anymore. The reasons for triggering anMPTCP-specific way. Note that changing semantics -- such asMP_FASTCLOSE are implementation specific. Regular TCP does not allow therelevancesending of a RST-- are coveredwhile the connection is in a synchronized state <xreftarget="sec_semantics"/>. Where possible, we do not want to deviate from regular TCP behavior.</t> <t>The following list covers possible errors andtarget="RFC0793" format="default" sectionFormat="of" derivedContent="RFC0793"/>. Nevertheless, implementations allow theappropriate MPTCP behavior: <list style="symbols"> <t>Unknown token in MP_JOIN (or HMAC failure in MP_JOIN ACK, or missing MP_JOIN in SYN/ACK response): send RST (analogous to TCP's behavior on an unknown port)</t> <t>DSN outsending ofwindow (during normal operation): drop the data, do not send Data ACKs</t> <t>Remove request for unknown address ID: silently ignore</t> </list> </t> </section> <section title="Heuristics" anchor="heuristics"> <t>There areanumber of heuristics that are needed for performance or deployment but that are not required for protocol correctness. InRST in thissection, we detail such heuristics. Note that discussionstate if, for example, the operating system is running out ofbuffering and certain sender and receiver window behaviors are presented in Sections <xref target="sec_rwin" format="counter"/> and <xref target="sec_sender" format="counter"/>, as well as retransmissionresources. In these cases, MPTCP should send the MP_FASTCLOSE. This option is illustrated in <xreftarget="sec_retransmit"/>.</t> <section title="Port Usage"> <t>Under typical operation,target="tcpm_fastclose" format="default" sectionFormat="of" derivedContent="Figure 14"/>.</t> <figure anchor="tcpm_fastclose" align="left" suppress-title="false" pn="figure-14"> <name slugifiedName="name-fast-close-mp_fastclose-opt">Fast Close (MP_FASTCLOSE) Option</name> <artwork align="left" name="" type="" alt="" pn="section-3.5-3.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----------------------+ | Kind | Length |Subtype| (reserved) | +---------------+---------------+-------+-----------------------+ | Option Receiver's Key | | (64 bits) | | | +---------------------------------------------------------------+ </artwork> </figure> <t pn="section-3.5-4">If Host A wants to force the closure of an MPTCPimplementation SHOULD useconnection, it can do so via two options: </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.5-5"> <li pn="section-3.5-5.1">Option A (ACK): Host A sends an ACK containing thesame portsMP_FASTCLOSE option on one subflow, containing the key of Host B asalreadydeclared inuse. In other words,thedestination port ofinitial connection handshake. On all the other subflows, Host A sends aSYNregular TCP RST to close these subflows and tears them down. Host A now enters FASTCLOSE_WAIT state.</li> <li pn="section-3.5-5.2">Option R (RST): Host A sends a RST containingan MP_JOIN option SHOULD bethesame asMP_FASTCLOSE option on all subflows, containing theremote portkey ofthe first subflowHost B as declared in theconnection. The local port for such SYNs SHOULD also beinitial connection handshake. Host A can tear down thesame as forsubflows and thefirst subflow (and as such, an implementation SHOULD reserve ephemeral ports across all local IP addresses), although there may be cases where this is infeasible. This strategy is intendedconnection immediately.</li> </ul> <t pn="section-3.5-6">If Host A decides tomaximize the probability offorce theSYN being permittedclosure bya firewall or NAT at the recipient and to avoid confusing any network monitoring software.</t> <t>There may also be cases, however, where a host wishes to signal that a specific port should be used,using Option A andthis facility is provided insending an ACK with theADD_ADDR optionMP_FASTCLOSE option, the connection shall proceed asdocumented in <xref target="sec_add_address"/>. It is therefore feasible to allow multiple subflows betweenfollows: </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.5-7"> <li pn="section-3.5-7.1">Upon receipt of an ACK with MP_FASTCLOSE by Host B, containing the valid key, Host B answers on the sametwo addresses but using different port pairs, and suchsubflow with afacility could be usedTCP RST and tears down all subflows also through sending TCP RST signals. Host B can now close the whole MPTCP connection (it transitions directly toallow load balancing withinCLOSED state).</li> <li pn="section-3.5-7.2">As soon as Host A has received thenetwork based on 5-tuples (e.g., some ECMP implementations <xref target="RFC2992"/>).</t> </section> <section title="Delayed Subflow Start and Subflow Symmetry"> <t>ManyTCPconnections are short-livedRST on the remaining subflow, it can close this subflow andconsist onlytear down the whole connection (transition from FASTCLOSE_WAIT state to CLOSED state). If Host A receives an MP_FASTCLOSE instead of afew segments,TCP RST, both hosts attempted fast closure simultaneously. Host A should reply with a TCP RST andsotear down theoverheads of using MPTCP outweigh any benefits.connection.</li> <li pn="section-3.5-7.3">If Host Aheuristic is required, therefore, to decide when to start using additional subflows in an MPTCP connection. Experimental deployments have shown that MPTCP can be applied indoes not receive arange of scenarios so an implementation is likely to needTCP RST in reply totake into account factors including the typeits MP_FASTCLOSE after one retransmission timeout (RTO) (the RTO oftrafficthe subflow where the MP_FASTCLOSE has been sent), it <bcp14>SHOULD</bcp14> retransmit the MP_FASTCLOSE. To keep this connection from beingsent and durationretained for a long time, the number ofsession, and this information MAYretransmissions <bcp14>SHOULD</bcp14> besignalled by the application layer.</t> <t>However, for standardlimited; this limit is implementation specific. A <bcp14>RECOMMENDED</bcp14> number is 3. If no TCPtraffic,RST is received in response, Host A <bcp14>SHOULD</bcp14> send asuggested general-purpose heuristic that an implementation MAY chooseTCP RST with the MP_FASTCLOSE option itself when it releases state in order toemploy isclear any remaining state at middleboxes.</li> </ul> <t pn="section-3.5-8">If, however, Host A decides to force the closure by using Option R and sending a RST with the MP_FASTCLOSE option, Host B will act asfollows.</t> <t>Iffollows: upon receipt of ahost has data buffered for its peer (which implies thatRST with MP_FASTCLOSE, containing theapplication has receivedvalid key, Host B tears down all subflows by sending arequest for data),TCP RST. Host B can now close thehost opens one subflow for each initial window's worthwhole MPTCP connection (it transitions directly to CLOSED state).</t> </section> <section anchor="sec_reset" numbered="true" toc="include" removeInRFC="false" pn="section-3.6"> <name slugifiedName="name-subflow-reset">Subflow Reset</name> <t pn="section-3.6-1">An implementation ofdata that is buffered.</t> <t>Consideration shouldMPTCP may alsobe givenneed tolimiting the rate of adding new subflows, as well as limitingsend a regular TCP RST to force thetotal numberclosure ofsubflows open foraparticular connection.subflow. A hostmay choosesends a TCP RST in order tovary these values based on its loadclose a subflow orknowledge of traffic and path characteristics.</t> <t>Note that this heuristic alone is probably insufficient. Traffic for many common applications, such as downloads, is highly asymmetric andreject an attempt to open a subflow (MP_JOIN). In order to let the receiving hostthatknow why a subflow ismultihomed may well bebeing closed or rejected, theclient that will never fill its buffers, and thus neverTCP RST packet <bcp14>MAY</bcp14> include the MP_TCPRST option (<xref target="tcpm_reset" format="default" sectionFormat="of" derivedContent="Figure 15"/>). The host <bcp14>MAY</bcp14> useMPTCP according tothisheuristic. Advanced APIs that allow an applicationinformation tosignal its traffic requirements would aid in these decisions.</t> <t>An additional time-based heuristic could be applied, opening additional subflows afterdecide, for example, whether it tries to re-establish the subflow immediately, later, or never.</t> <figure anchor="tcpm_reset" align="left" suppress-title="false" pn="figure-15"> <name slugifiedName="name-tcp-rst-reason-mp_tcprst-op">TCP RST Reason (MP_TCPRST) Option</name> <artwork align="left" name="" type="" alt="" pn="section-3.6-2.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+-----------------------+ | Kind | Length |Subtype|U|V|W|T| Reason | +---------------+---------------+-------+-----------------------+ </artwork> </figure> <t pn="section-3.6-3">The MP_TCPRST option contains agiven periodreason code that allows the sender oftime has passed. This would alleviatetheabove issue, and alsooption to provideresiliencemore information about the reason forlow-bandwidth but long-lived applications.</t> <t>Another issue is that both communicating hosts may simultaneously try to set up a subflow betweenthesame pairtermination ofaddresses. This leads to an inefficient usethe subflow. Using 12 bits ofresources.</t> <t>Ifoption space, thesame portsfirst 4 bits areused on all subflows, as recommended above, then standard TCP simultaneous open logic should take carereserved for flags (only one ofthis situationwhich is currently defined), andonly one subflow will be established betweentheaddress pairs. However, this relies on the same ports beingremaining octet is usedat both end hosts. Ifto express ahost does not support TCP simultaneous open, itreason code for this subflow termination, from which a receiver <bcp14>MAY</bcp14> infer information about the usability of this path.</t> <t pn="section-3.6-4">The "T" flag isRECOMMENDEDused by the sender to indicate whether the error condition thatsome element of randomizationisappliedreported is Transient ("T" bit set to 1) or Permanent ("T" bit set to 0). If thetimeerror condition is considered towait before opening new subflows, so that only onebe Transient by the sender of the RST segment, the recipient of this segment <bcp14>MAY</bcp14> try to re-establish a subflowis created betweenfor this connection over the failed path. The time at which agiven address pair. If, however, hosts signal additional portsreceiver may try touse (for example, for leveraging ECMP on-path),re‑establish thisheuristicsubflow isnot appropriate.</t> <t>This section has shown someimplementation specific but <bcp14>SHOULD</bcp14> take into account the properties of theconsiderations that an implementer should give when developing MPTCP heuristics, butfailure as defined by the provided reason code. If the error condition isnot intendedconsidered to beprescriptive.</t> </section> <section title="Failure Handling"> <t>Requirements for MPTCP's handlingPermanent, the receiver ofunexpected signals have been given in <xref target="sec_errors"/>. There are other failure cases, however, where a hosts can choose appropriate behavior.</t> <t>For example, <xref target="sec_init"/> suggests that a host SHOULD fall backthe RST segment <bcp14>SHOULD NOT</bcp14> try totrying regular TCP SYNs after one or more failures of MPTCP SYNs for a connection. A host may keepre‑establish asystem-wide cache of such information, so that it can back off from using MPTCP, firstlysubflow forthat particular destination host, and eventually on a whole interface, if MPTCP connections continue failing.this connection over this path. Theduration"U", "V", and "W" flags are not defined by this specification and are reserved for future use. An implementation ofsuch a cache would be implementation-specific.</t> <t>Another failure could occur when the MP_JOIN handshake fails. <xref target="sec_errors"/> specifies that an incorrect handshake MUST leadthis specification <bcp14>MUST</bcp14> set these flags tothe subflow being closed with0, and aRST. A host operatingreceiver <bcp14>MUST</bcp14> ignore them.</t> <t pn="section-3.6-5">"Reason" is anactive intrusion detection system may choose to start blocking MP_JOIN packets from8-bit field that indicates thesource host if multiple failed MP_JOIN attempts are seen. Fromreason code for theconnection initiator's pointtermination ofview, if an MP_JOIN fails, it SHOULD NOT attempt to connect tothesame IP address and port duringsubflow. The following codes are defined in this document: </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.6-6"> <li pn="section-3.6-6.1">Unspecified error (code 0x00). This is thelifetimedefault error; it implies that the subflow is no longer available. The presence of this option shows that theconnection, unlessRST was generated by an MPTCP-aware device.</li> <li pn="section-3.6-6.2">MPTCP-specific error (code 0x01). An error has been detected in theother host refreshesprocessing of MPTCP options. This is theinformation with another ADD_ADDR option. Note thatusual reason code to return in theADD_ADDR optioncases where a RST isinformational only, and does not guarantee the other host will attemptbeing sent to close aconnection.</t> <t>In addition,subflow because of animplementation may learn, over a numberinvalid response.</li> <li pn="section-3.6-6.3">Lack ofconnections,resources (code 0x02). This code indicates thatcertain interfaces or destination addresses consistently fail and may default tothe sending host does nottrying to use MPTCP for these. Behavior could also be learned for particularly badly performing subflows or subflows that regularly fail during use, in order to temporarily choose not to use these paths.</t> </section> </section> </section> <section title="Semantic Issues" anchor="sec_semantics"> <t>In order to support multipath operation, the semantics of some TCP componentshavechanged. To aid clarity, this section collects these semantic changes as a reference. <list style="hanging"> <t hangText="Sequence number:"> The (in-header) TCP sequence number is specificenough resources to support thesubflow. To allowterminated subflow.</li> <li pn="section-3.6-6.4">Administratively prohibited (code 0x03). This code indicates that thereceiver to reorder application data, an additional data-level sequence spacerequested subflow isused. In this data-level sequence space, the initial SYN andprohibited by thefinal DATA_FIN occupy 1 octetpolicies ofsequence space. This is to ensure these signals are acknowledged attheconnection level. Theresending host.</li> <li pn="section-3.6-6.5">Too much outstanding data (code 0x04). This code indicates that there is anexplicit mappingexcessive amount of datasequence spacethat needs tosubflow sequence space, which is signaled through TCP options in data packets.</t> <t hangText="ACK:"> The ACK field in the TCP header acknowledges onlybe transmitted over the terminated subflowsequence number, not the data-level sequence space. Implementations SHOULD NOT attempt to inferwhile having already been acknowledged over one or more other subflows. This may occur if adata-level acknowledgment frompath has been unavailable for a short period and it is more efficient to reset and start again than it is to retransmit thesubflow ACKs.queued data.</li> <li pn="section-3.6-6.6">Unacceptable performance (code 0x05). Thisseparates subflow- and connection-level processing at an end host.</t> <t hangText="Duplicate ACK:"> A duplicate ACKcode indicates thatincludes any MPTCP signaling (withtheexceptionperformance of this subflow was too low compared to theDSS option) MUST NOTother subflows of this Multipath TCP connection.</li> <li pn="section-3.6-6.7">Middlebox interference (code 0x06). Middlebox interference has been detected over this subflow, making MPTCP signaling invalid. For example, this may betreated assent if the checksum does not validate.</li> </ul> </section> <section anchor="sec_fallback" numbered="true" toc="include" removeInRFC="false" pn="section-3.7"> <name slugifiedName="name-fallback">Fallback</name> <t pn="section-3.7-1">Sometimes, middleboxes will exist on asignal of congestion. To limitpath that could prevent thechancesoperation ofnon-MPTCP-aware entities mistakenly interpreting duplicate ACKs asMPTCP. MPTCP has been designed to cope with many middlebox modifications (see <xref target="sec_middleboxes" format="default" sectionFormat="of" derivedContent="Section 6"/>), but there are still some cases where asignal of congestion,subflow could fail to operate within the MPTCPSHOULD NOT send more than two duplicate ACKs containing (non-DSS)requirements. Notably, these cases are the following: the loss of MPTCPsignals inoptions on arow.</t> <t hangText="Receive Window:">The receive window inpath and the modification of payload data. If such an event occurs, it is necessary to "fall back" to the previous, safe operation. This may be either falling back to regular TCPheader indicatesor removing a problematic subflow.</t> <t pn="section-3.7-2">At theamountstart offree buffer space for the whole data-levelan MPTCP connection(as opposed(i.e., the first subflow), it is important tofor this subflow)ensure thatis available atthereceiver. Thispath is fully MPTCP capable and thesame semanticsnecessary MPTCP options can reach each host. The handshake asregular TCP, butdescribed in <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/> <bcp14>SHOULD</bcp14> fall back tomaintain these semanticsregular TCP if either of thereceive window must be interpreted atSYN messages does not have thesender as relative toMPTCP options: this is thesequence number givensame, and desired, behavior in theDATA_ACK rather thancase where a host is not MPTCP capable or thesubflow ACK in the TCP header. In this way,path does not support theoriginal flow control roleMPTCP options. When attempting to join an existing MPTCP connection (<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>), if a path ispreserved. Note that some middleboxes may change the receive window,not MPTCP capable andso a host SHOULD usethemaximum value of those recently seenMPTCP options do not get through on theconstituent subflows forSYNs, theconnection-level receive window, and also needssubflow will be closed according tomaintain a subflow-level window for subflow-level processing.</t>the MP_JOIN logic.</t> <thangText="FIN:"> The FIN flag inpn="section-3.7-3">There is, however, another corner case that should be addressed: theTCP header applies only tocase where MPTCP options get through on thesubflow it is sent on,SYN but notto the whole connection. For connection-level FIN semantics,on regular packets. If theDATA_FIN optionsubflow isused.</t> <t hangText="RST:"> The RST flag in the TCP header applies only tothe first subflowitand thus all data in flight issent on, not tocontiguous, this situation can be resolved by using thewhole connection. The MP_FASTCLOSEfollowing rules:</t> <ul spacing="normal" bare="false" empty="false" pn="section-3.7-4"> <li pn="section-3.7-4.1">A sender <bcp14>MUST</bcp14> include a DSS optionprovides the fast close functionalitywith Data Sequence Mapping in every segment until one ofa RST attheMPTCP connection level.</t> <t hangText="Address List:"> Address list management (i.e., knowledgesent segments has been acknowledged with a DSS option containing a Data ACK. Upon reception of thelocalacknowledgment, the sender has the confirmation that the DSS option passes in both directions andremote hosts' lists of available IP addresses) is handled on a per-connection basis (as opposedmay choose to send fewer DSS options than once persubflow, per host, or per pair of communicating hosts). This permits the application of per-connection local policy. Adding an address to one connection (either explicitly throughsegment.</li> <li pn="section-3.7-4.2">If, however, anAdd Address message, or implicitly through a Join) has no implicationACK is received for data (not just forother connections between the same pair of hosts.</t> <t hangText="5-tuple:"> The 5-tuple (protocol, local address, local port, remote address, remote port) presented by kernel APIs totheapplication layer inSYN) without anon-multipath-aware application isDSS option containing a Data ACK, the sender determines thatofthefirst subflow, even ifpath is not MPTCP capable. In the case of this occurring on an additional subflowhas since been closed and removed from(i.e., one started with MP_JOIN), theconnection. This decision, and other related API issues, are discussed in more detail in <xref target="RFC6897"/>.</t> </list> </t> </section> <section title="Security Considerations" anchor="sec_security"> <t>As identified in <xref target="RFC6181"/>,host <bcp14>MUST</bcp14> close theaddition of multipath capability to TCP will bringsubflow withitanumber of new classes of threat. In order to prevent these, <xref target="RFC6182"/> presentsRST, which <bcp14>SHOULD</bcp14> contain an MP_TCPRST option (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) with aset"Middlebox interference" reason code.</li> <li pn="section-3.7-4.3">In the case ofrequirements for a security solution for MPTCP. The fundamental goal is forsuch an ACK being received on thesecurityfirst subflow (i.e., that started with MP_CAPABLE), before any additional subflows are added, the implementation <bcp14>MUST</bcp14> drop out of MPTCP mode and fall back tobe "no worse" thanregularTCP today, andTCP. The sender will send one final Data Sequence Mapping, with thekey security requirements are: <list style="symbols"> <t>Provide a mechanism to confirm thatData-Level Length value of 0 indicating an infinite mapping (to inform thepartiesother end ina subflow handshake arecase thesame aspath drops options inthe original connection setup.</t> <t>Provide verification that the peer can receive traffic at a new address before using it as part of a connection.</t> <t>Provide replay protection, i.e., ensure that a requestone direction only), and then revert toadd/remove asending data on the single subflowis 'fresh'.</t> </list> In order to achieve these goals,without any MPTCPincludesoptions.</li> <li pn="section-3.7-4.4">If ahash-based handshake algorithm documented in Sections <xref target="sec_init" format="counter"/>subflow breaks during operation, e.g., if it is rerouted and<xref target="sec_join" format="counter"/>.</t> <t>The security of theMPTCPconnection hangs on the use of keys thatoptions aresharedno longer permitted, then onceat the start of the first subflow, and are never sent again over the network (unless used in the fast close mechanism, <xref target="sec_fastclose"/>). To ease demultiplexing while not giving away any cryptographic material, future subflows use a truncated cryptographic hash ofthiskey asis detected (by theconnection identification "token". The keys are concatenated and used as keys for creating Hash-based Message Authentication Codes (HMACs) used on subflow setup,subflow-level receive buffer filling up, since there is no mapping available in order toverify that the parties in the handshake areDATA_ACK this data), thesamesubflow <bcp14>SHOULD</bcp14> be treated asin the original connection setup. It also provides verification that the peerbroken and closed with a RST, since no data canreceive traffic at this new address. Replay attacks would stillbepossible when only keys are used; therefore, the handshakes use single-use random numbers (nonces) at both ends -- this ensuresdelivered to theHMAC will neverapplication layer and no fallback signal can be reliably sent. This RST <bcp14>SHOULD</bcp14> include thesame on two handshakes. Guidance on generating random numbers suitable for use as keys is given in <xref target="RFC4086"/>MP_TCPRST option (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) with a "Middlebox interference" reason code.</li> </ul> <t pn="section-3.7-5">These rules should cover all cases where such a failure could happen -- whether it's on the forward or reverse path and whether the server or the client first sends data.</t> <t pn="section-3.7-6">So far, this section has discussedin <xref target="sec_init"/>. The nonces are valid forthelifetimeloss of MPTCP options, either initially or during theTCP connection attempt. HMACcourse of the connection. As described in <xref target="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>, each portion of data for which there is a mapping is protected by a checksum, if checksums have been negotiated. This mechanism isalsoused tosecure the ADD_ADDR option, duedetect if middleboxes have made any adjustments to thethreats identified in <xref target="RFC7430"/>.</t> <t>The use of crypto capability bits inpayload (added, removed, or changed data). A checksum will fail if theinitial connection handshake to negotiatedata has been changed in any way. The use of aparticular algorithm allowschecksum will also detect whether thedeploymentlength ofadditional crypto mechanisms in the future. This negotiation would nevertheless be susceptible to a bid-down attack by an on-path active attacker who could modifydata on thecrypto capability bits insubflow is increased or decreased, and this means theresponse fromData Sequence Mapping is no longer valid. The sender no longer knows what subflow-level sequence number the receiverto use a less secure crypto mechanism. The security mechanism presentedis genuinely operating at (the middlebox will be faking ACKs inthis document should therefore protect against all forms of floodingreturn), andhijacking attacks discussedit cannot signal any further mappings. Furthermore, in<xref target="RFC6181"/>.</t> <t>The version negotiation specified in <xref target="sec_init"/>, if differing MPTCP versions shared a common negotiation format, would allow an on-path attackeraddition toapply a theoretical bid-down attack. Sincethev1 and v0 protocols have a different handshake,possibility of payload modifications that are valid at the application layer, it is possible that suchan attack would requiremodifications could be triggered across MPTCP segment boundaries, corrupting theclient to re-establishdata. Therefore, all data from theconnection using v0, andstart of the segment that failed the checksum onward is not trustworthy.</t> <t pn="section-3.7-7">Note that if checksum usage has not been negotiated, thisbeing supported byfallback mechanism cannot be used unless there is some higher-layer or lower‑layer signal to inform theserver. NoteMPTCP implementation thatan on-path attacker wouldthe payload has been tampered with.</t> <t pn="section-3.7-8">When multiple subflows are in use, the data in flight on a subflow will likely involve data that is not contiguously part of the connection-level stream, since segments will be spread across the multiple subflows. Due to the problems identified above, it is not possible to determine what adjustments haveaccessbeen done to theraw data, negatingdata (notably, anyother TCP-level security mechanisms. Also a change from RFC6824 has removedchanges to the subflowidentifier from the MP_PRIO option (<xref target="sec_policy"/>),sequence numbering). Therefore, it is not possible toremoverecover thetheoretical attack where asubflow, and the affected subflowcouldmust beplaced in "backup" mode byimmediately closed with a RST that includes anattacker.</t> <t>During normal operation, regular TCP protection mechanisms (such as ensuringMP_FAIL option (<xref target="tcpm_fallback" format="default" sectionFormat="of" derivedContent="Figure 16"/>), which defines the data sequencenumbers are in-window) will providenumber at thesame levelstart ofprotection against attacks on individual TCP subflows as exists for regular TCP today. Implementations will introduce additional buffers compared to regular TCP, to reassemble data attheconnection level. The application of window sizing will minimizesegment (defined by therisk of denial-of-service attacks consuming resources.</t> <t>As discussed in <xref target="sec_add_address"/>, a host may advertise its private addresses, but these might point to different hosts inData Sequence Mapping) that had thereceiver's network. The MP_JOIN handshake (<xref target="sec_join"/>) will ensurechecksum failure. Note thatthis does not succeed in setting up a subflow totheincorrect host. However, it could still create unwanted TCP handshake traffic. This featureMP_FAIL option requires the use ofMPTCP could be a target for denial-of-service exploits, with malicious participants in MPTCP connections encouragingtherecipient to target other hostsfull 64-bit sequence number, even if 32-bit sequence numbers are normally in use in thenetwork. Therefore, implementations should consider heuristics (<xref target="heuristics"/>) at bothDSS signals on thesender andpath.</t> <figure anchor="tcpm_fallback" align="left" suppress-title="false" pn="figure-16"> <name slugifiedName="name-fallback-mp_fail-option">Fallback (MP_FAIL) Option</name> <artwork align="left" name="" type="" alt="" pn="section-3.7-9.1"> 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +---------------+---------------+-------+----------------------+ | Kind | Length=12 |Subtype| (reserved) | +---------------+---------------+-------+----------------------+ | | | Data Sequence Number (8 octets) | | | +--------------------------------------------------------------+ </artwork> </figure> <t pn="section-3.7-10">The receiverto reduce the impactofthis.</t> <t>To further protect against malicious ADD_ADDR messages sent by an off-path attacker, the ADD_ADDR includes an HMAC using the keys negotiated duringthis option <bcp14>MUST</bcp14> discard all data following thehandshake. This effectively prevents an attacker from diverting an MPTCP connection through an off-path ADD_ADDR injection into the stream.</t> <t>A small security risk could theoretically existdata sequence number specified. Failed data <bcp14>MUST NOT</bcp14> be DATA_ACKed and so will be retransmitted on other subflows (<xref target="sec_retransmit" format="default" sectionFormat="of" derivedContent="Section 3.3.6"/>). </t> <t pn="section-3.7-11">A special case is when there is a single subflow and it fails withkey reuse, but in order to accomplishareplay attack, bothchecksum error. If it is known that all unacknowledged data in flight is contiguous (which will usually be thesender and receiver keys, andcase with a single subflow), an infinite mapping can be applied to thesender and receiver random numbers, insubflow without theMP_JOIN handshake (<xref target="sec_join"/>) would haveneed tomatch.</t> <t>Whilstclose it first, essentially turning off all further MPTCP signaling. In thisspecification definescase, if a"medium" security solution, meetingreceiver identifies a checksum failure when there is only one path, it will send back an MP_FAIL option on thecriteria specified atsubflow-level ACK, referring to the data-level sequence number of the start ofthis section andthethreat analysis (<xref target="RFC6181"/>), since attacks only ever get worse, itsegment on which the checksum error was detected. The sender will receive this information and, if all unacknowledged data in flight islikely that a future version of MPTCP would need tocontiguous, will signal an infinite mapping. This infinite mapping will beablea DSS option (<xref target="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/>) on the first new packet, containing a Data Sequence Mapping that acts retroactively, referring tosupport stronger security. There are several waysthesecuritystart ofMPTCP could potentially be improved; somethe subflow sequence number ofthese would be compatible with MPTCP as defined in this document, whilst others may not be. For now,thebest approach ismost recent segment that was known toget experience with the current approach, establish what might work, and checkbe delivered intact (i.e., was successfully DATA_ACKed). From that point onward, data can be altered by a middlebox without affecting MPTCP, as thethreat analysisdata stream isstill accurate.</t> <t>Possible ways of improving MPTCP security could include:<list style="symbols"> <t>definingequivalent to anew MPCTP cryptographic algorithm, as negotiatedregular, legacy TCP session. While inMP_CAPABLE. A sub-case couldtheory paths may only beto include an additional deployment assumption, such as stateful servers,damaged inorder to allow a more powerful algorithm to be used.</t> <t>defining how to secure data transfer with MPTCP, whilst not changingone direction -- and thesignaling partMP_FAIL signal affects only one direction ofthe protocol.</t> <t>defining security that requires more option space, perhaps in conjunction with a "long options" proposaltraffic -- forextending the TCP options space (such as those surveyed in <xref target="TCPLO"/>), or perhaps building onsimplicity of implementation, thecurrent approach with a second stagereceiver ofMPTCP-option-based security.</t> <t>revisitingan MP_FAIL <bcp14>MUST</bcp14> also respond with an MP_FAIL in theworking group's decisionreverse direction and entirely revert toexclusively usea regular TCPoptions for MPTCP signaling, and instead look at also making use ofsession.</t> <t pn="section-3.7-12">In theTCP payloads.</t> </list></t> <t>MPTCPrare case that the data is not contiguous (which could happen when there is only one subflow but it is retransmitting data from a subflow that has recently beendesigneduncleanly closed), the receiver <bcp14>MUST</bcp14> close the subflow withseveral methods availablea RST with MP_FAIL. The receiver <bcp14>MUST</bcp14> discard all data that follows the data sequence number specified. The sender <bcp14>MAY</bcp14> attempt toindicatecreate a newsecurity mechanism, including: <list style="symbols"> <t>available flags in MP_CAPABLE (<xref target="tcpm_capable"/>);</t> <t>available subtypes insubflow belonging to theMPTCP option (<xref target="fig_option"/>);</t> <t>the version fieldsame connection and, if it chooses to do so, <bcp14>SHOULD</bcp14> immediately place the single subflow inMP_CAPABLE (<xref target="tcpm_capable"/>);</t> </list></t> </section> <section title="Interactions with Middleboxes" anchor="sec_middleboxes"> <t>Multipath TCPsingle-path mode by setting an infinite Data Sequence Mapping. This mapping will begin from the data-level sequence number that wasdesigned to be deployabledeclared in thepresent world. Its design takes into account "reasonable" existing middlebox behavior. In this section, we outlineMP_FAIL.</t> <t pn="section-3.7-13">After afew representative middlebox-related failure scenarios and show how Multipath TCP handles them. Next, we listsender signals an infinite mapping, it <bcp14>MUST</bcp14> only use subflow ACKs to clear its send buffer. This is because Data ACKs may become misaligned with thedesign decisions multipathsubflow ACKs when middleboxes insert or delete data. The receiver <bcp14>SHOULD</bcp14> stop generating Data ACKs after it receives an infinite mapping.</t> <t pn="section-3.7-14">When a connection hasmadefallen back with an infinite mapping, only one subflow can send data; otherwise, the receiver would not know how toaccommodatereorder thedifferent middleboxes.</t> <t>A primary concerndata. In practice, this means that all MPTCP subflows will have to be terminated except one. Once MPTCP falls back to regular TCP, it <bcp14>MUST NOT</bcp14> revert to MPTCP later in the connection.</t> <t pn="section-3.7-15">It should be emphasized that MPTCP isournot attempting to prevent the use ofa new TCP option. Middleboxes should forward packets with unknown options unchanged, yet there are somemiddleboxes thatdon't. These we expect will either strip optionswant to adjust the payload. An MPTCP-aware middlebox could provide such functionality by also rewriting checksums.</t> </section> <section anchor="sec_errors" numbered="true" toc="include" removeInRFC="false" pn="section-3.8"> <name slugifiedName="name-error-handling">Error Handling</name> <t pn="section-3.8-1">In addition to the fallback mechanism described above, the standard classes of TCP errors may need to be handled in an MPTCP‑specific way. Note that changing semantics -- such as the relevance of a RST -- are covered in <xref target="sec_semantics" format="default" sectionFormat="of" derivedContent="Section 4"/>. Where possible, we do not want to deviate from regular TCP behavior.</t> <t pn="section-3.8-2">The following list covers possible errors and the appropriate MPTCP behavior: </t> <ul spacing="normal" bare="false" empty="false" pn="section-3.8-3"> <li pn="section-3.8-3.1">Unknown token in MP_JOIN (or HMAC failure in MP_JOIN ACK, or missing MP_JOIN in SYN/ACK response): send RST (analogous to TCP's behavior on an unknown port)</li> <li pn="section-3.8-3.2">DSN out of window (during normal operation): drop the data; do not send Data ACKs</li> <li pn="section-3.8-3.3">Remove request for unknown Address ID: silently ignore</li> </ul> </section> <section anchor="heuristics" numbered="true" toc="include" removeInRFC="false" pn="section-3.9"> <name slugifiedName="name-heuristics">Heuristics</name> <t pn="section-3.9-1">There are a number of heuristics that are needed for performance or deployment but that are not required for protocol correctness. In this section, we detail such heuristics. Note that discussions of buffering and certain sender and receiver window behaviors are presented in Sections <xref target="sec_rwin" format="counter" sectionFormat="of" derivedContent="3.3.4"/> and <xref target="sec_sender" format="counter" sectionFormat="of" derivedContent="3.3.5"/>, and retransmission is discussed in <xref target="sec_retransmit" format="default" sectionFormat="of" derivedContent="Section 3.3.6"/>.</t> <section numbered="true" toc="include" removeInRFC="false" pn="section-3.9.1"> <name slugifiedName="name-port-usage">Port Usage</name> <t pn="section-3.9.1-1">Under typical operation, an MPTCP implementation <bcp14>SHOULD</bcp14> use the same ports as the ports that are already in use. In other words, the destination port of a SYN containing an MP_JOIN option <bcp14>SHOULD</bcp14> be the same as the remote port of the first subflow in the connection. The local port for such SYNs <bcp14>SHOULD</bcp14> also be the same as the port for the first subflow (and as such, an implementation <bcp14>SHOULD</bcp14> reserve ephemeral ports across all local IP addresses), although there may be cases where this is infeasible. This strategy is intended to maximize the probability of the SYN being permitted by a firewall or NAT at the recipient and to avoid confusing any network-monitoring software.</t> <t pn="section-3.9.1-2">There may also be cases, however, where a host wishes to signal that a specific port should be used; this facility is provided in the ADD_ADDR option as documented in <xref target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>. It is therefore feasible to allow multiple subflows between the same two addresses but using different port pairs, and such a facility could be used to allow load balancing within the network based on 5-tuples (e.g., some ECMP implementations <xref target="RFC2992" format="default" sectionFormat="of" derivedContent="RFC2992"/>).</t> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-3.9.2"> <name slugifiedName="name-delayed-subflow-start-and-s">Delayed Subflow Start and Subflow Symmetry</name> <t pn="section-3.9.2-1">Many TCP connections are short-lived and consist only of a few segments, and so the overhead of using MPTCP outweighs any benefits. A heuristic is required, therefore, to decide when to start using additional subflows in an MPTCP connection. Experimental deployments have shown that MPTCP can be applied in a range of scenarios, so an implementation will likely need to take into account such factors as the type of traffic being sent and the duration of the session; this information <bcp14>MAY</bcp14> be signaled by the application layer.</t> <t pn="section-3.9.2-2">However, for standard TCP traffic, a suggested general-purpose heuristic that an implementation <bcp14>MAY</bcp14> choose to employ is as follows.</t> <t pn="section-3.9.2-3">If a host has data buffered for its peer (which implies that the application has received a request for data), the host opens one subflow for each initial window's worth of data that is buffered.</t> <t pn="section-3.9.2-4">Consideration should also be given to limiting the rate of adding new subflows, as well as limiting the total number of subflows open for a particular connection. A host may choose to vary these values based on its load or knowledge of traffic and path characteristics.</t> <t pn="section-3.9.2-5">Note that this heuristic alone is probably insufficient. Traffic for many common applications, such as downloads, is highly asymmetric, and the host that is multihomed may well be the client that will never fill its buffers and thus never use MPTCP according to this heuristic. Advanced APIs that allow an application to signal its traffic requirements would aid in these decisions.</t> <t pn="section-3.9.2-6">An additional time-based heuristic could be applied, opening additional subflows after a given period of time has passed. This would alleviate the above issue and also provide resilience for low‑bandwidth but long-lived applications.</t> <t pn="section-3.9.2-7">Another issue is that both communicating hosts may simultaneously try to set up a subflow between the same pair of addresses. This leads to an inefficient use of resources.</t> <t pn="section-3.9.2-8">If the same ports are used on all subflows, as recommended above, then standard TCP simultaneous-open logic should take care of this situation and only one subflow will be established between the address pairs. However, this relies on the same ports being used at both end hosts. If a host does not support TCP simultaneous open, it is <bcp14>RECOMMENDED</bcp14> that some element of randomization be applied to the time to wait before opening new subflows, so that only one subflow is created between a given address pair. If, however, hosts signal additional ports to use (for example, for leveraging ECMP on-path), this heuristic is not appropriate.</t> <t pn="section-3.9.2-9">This section has shown some of the factors that an implementer should consider when developing MPTCP heuristics, but it is not intended to be prescriptive.</t> </section> <section numbered="true" toc="include" removeInRFC="false" pn="section-3.9.3"> <name slugifiedName="name-failure-handling">Failure Handling</name> <t pn="section-3.9.3-1">Requirements for MPTCP's handling of unexpected signals are given in <xref target="sec_errors" format="default" sectionFormat="of" derivedContent="Section 3.8"/>. There are other failure cases, however, where hosts can choose appropriate behavior.</t> <t pn="section-3.9.3-2">For example, <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/> suggests that a host <bcp14>SHOULD</bcp14> fall back to trying regular TCP SYNs after one or more failures of MPTCP SYNs for a connection. A host may keep a system-wide cache of such information, so that it can back off from using MPTCP, firstly for that particular destination host and, eventually, on a whole interface, if MPTCP connections continue to fail. The duration of such a cache would be implementation specific.</t> <t pn="section-3.9.3-3">Another failure could occur when the MP_JOIN handshake fails. <xref target="sec_errors" format="default" sectionFormat="of" derivedContent="Section 3.8"/> specifies that an incorrect handshake <bcp14>MUST</bcp14> lead to the subflow being closed with a RST. A host operating an active intrusion-detection system may choose to start blocking MP_JOIN packets from the source host if multiple failed MP_JOIN attempts are seen. From the connection initiator's point of view, if an MP_JOIN fails, it <bcp14>SHOULD NOT</bcp14> attempt to connect to the same IP address and port during the lifetime of the connection, unless the other host refreshes the information with another ADD_ADDR option. Note that the ADD_ADDR option is informational only and does not guarantee that the other host will attempt a connection.</t> <t pn="section-3.9.3-4">In addition, an implementation may learn, over a number of connections, that certain interfaces or destination addresses consistently fail and may default to not trying to use MPTCP for such interfaces or addresses. The behavior of subflows that perform particularly badly or subflows that regularly fail during use could also be learned, so that an implementation can temporarily choose not to use these paths.</t> </section> </section> </section> <section anchor="sec_semantics" numbered="true" toc="include" removeInRFC="false" pn="section-4"> <name slugifiedName="name-semantic-issues">Semantic Issues</name> <t pn="section-4-1">In order to support multipath operation, the semantics of some TCP components have changed. To help clarify, this section lists these semantic changes as a point of reference. </t> <dl newline="false" spacing="normal" indent="3" pn="section-4-2"> <dt pn="section-4-2.1">Sequence number:</dt> <dd pn="section-4-2.2"> The (in-header) TCP sequence number is specific to the subflow. To allow the receiver to reorder application data, an additional data-level sequence space is used. In this data‑level sequence space, the initial SYN and the final DATA_FIN occupy 1 octet of sequence space. This is done to ensure that these signals are acknowledged at the connection level. There is an explicit mapping of data sequence space to subflow sequence space, which is signaled through TCP options in data packets.</dd> <dt pn="section-4-2.3">ACK:</dt> <dd pn="section-4-2.4"> The ACK field in the TCP header acknowledges only the subflow sequence number -- not the data-level sequence space. Implementations <bcp14>SHOULD NOT</bcp14> attempt to infer a data-level acknowledgment from the subflow ACKs. This separates subflow-level and connection-level processing at an end host.</dd> <dt pn="section-4-2.5">Duplicate ACK:</dt> <dd pn="section-4-2.6"> A duplicate ACK that includes any MPTCP signaling (with the exception of the DSS option) <bcp14>MUST NOT</bcp14> be treated as a signal of congestion. To limit the chances of non-MPTCP-aware entities mistakenly interpreting duplicate ACKs as a signal of congestion, MPTCP <bcp14>SHOULD NOT</bcp14> send more than two duplicate ACKs containing (non-DSS) MPTCP signals in a row.</dd> <dt pn="section-4-2.7">Receive Window:</dt> <dd pn="section-4-2.8">The receive window in the TCP header indicates the amount of free buffer space for the whole data-level connection (as opposed to the amount of space for this subflow) that is available at the receiver. The semantics are the same as for regular TCP, but to maintain these semantics the receive window must be interpreted at the sender as relative to the sequence number given in the DATA_ACK rather than the subflow ACK in the TCP header. In this way, the original role of flow control is preserved. Note that some middleboxes may change the receive window, and so a host <bcp14>SHOULD</bcp14> use the maximum value of those recently seen on the constituent subflows for the connection-level receive window and also needs to maintain a subflow-level window for subflow-level processing.</dd> <dt pn="section-4-2.9">FIN:</dt> <dd pn="section-4-2.10"> The FIN flag in the TCP header applies only to the subflow it is sent on -- not to the whole connection. For connection-level FIN semantics, the DATA_FIN option is used.</dd> <dt pn="section-4-2.11">RST:</dt> <dd pn="section-4-2.12"> The RST flag in the TCP header applies only to the subflow it is sent on -- not to the whole connection. The MP_FASTCLOSE option provides the Fast Close functionality of a RST at the MPTCP connection level.</dd> <dt pn="section-4-2.13">Address List:</dt> <dd pn="section-4-2.14"> Address list management (i.e., knowledge of the local and remote hosts' lists of available IP addresses) is handled on a per-connection basis (as opposed to per subflow, per host, or per pair of communicating hosts). This permits the application of per-connection local policy. Adding an address to one connection (either explicitly through an ADD_ADDR message or implicitly through an MP_JOIN) has no implications for other connections between the same pair of hosts.</dd> <dt pn="section-4-2.15">5-tuple:</dt> <dd pn="section-4-2.16"> The 5-tuple (protocol, local address, local port, remote address, remote port) presented by kernel APIs to the application layer in a non-multipath-aware application is that of the first subflow, even if the subflow has since been closed and removed from the connection. This decision, and other related API issues, are discussed in more detail in <xref target="RFC6897" format="default" sectionFormat="of" derivedContent="RFC6897"/>.</dd> </dl> </section> <section anchor="sec_security" numbered="true" toc="include" removeInRFC="false" pn="section-5"> <name slugifiedName="name-security-considerations">Security Considerations</name> <t pn="section-5-1">As identified in <xref target="RFC6181" format="default" sectionFormat="of" derivedContent="RFC6181"/>, the addition of multipath capability to TCP will bring with it a number of new classes of threats. In order to prevent these threats, <xref target="RFC6182" format="default" sectionFormat="of" derivedContent="RFC6182"/> presents a set of requirements for a security solution for MPTCP. The fundamental goal is for the security of MPTCP to be "no worse" than regular TCP today. The key security requirements are as follows: </t> <ul spacing="normal" bare="false" empty="false" pn="section-5-2"> <li pn="section-5-2.1">Provide a mechanism to confirm that the parties in a subflow handshake are the same as the parties in the original connection setup.</li> <li pn="section-5-2.2">Provide verification that the peer can receive traffic at a new address before using it as part of a connection.</li> <li pn="section-5-2.3">Provide replay protection, i.e., ensure that a request to add/remove a subflow is "fresh".</li> </ul> <t pn="section-5-3"> In order to achieve these goals, MPTCP includes a hash-based handshake algorithm, as documented in Sections <xref target="sec_init" format="counter" sectionFormat="of" derivedContent="3.1"/> and <xref target="sec_join" format="counter" sectionFormat="of" derivedContent="3.2"/>.</t> <t pn="section-5-4">The security of the MPTCP connection hangs on the use of keys that are shared once at the start of the first subflow and are never sent again over the network (unless used in the Fast Close mechanism (<xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5"/>)). To ease demultiplexing while not giving away any cryptographic material, future subflows use a truncated cryptographic hash of this key as the connection identification "token". The keys are concatenated and used as keys for creating Hash-based Message Authentication Codes (HMACs) used on subflow setup, in order to verify that the parties in the handshake are the same as the parties in the original connection setup. It also provides verification that the peer can receive traffic at this new address. Replay attacks would still be possible when only keys are used; therefore, the handshakes use single-use random numbers (nonces) at both ends -- this ensures that the HMAC will never be the same on two handshakes. Guidance on generating random numbers suitable for use as keys is given in <xref target="RFC4086" format="default" sectionFormat="of" derivedContent="RFC4086"/> and discussed in <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>. The nonces are valid for the lifetime of the TCP connection attempt. HMAC is also used to secure the ADD_ADDR option, due to the threats identified in <xref target="RFC7430" format="default" sectionFormat="of" derivedContent="RFC7430"/>.</t> <t pn="section-5-5">The use of crypto capability bits in the initial connection handshake to negotiate the use of a particular algorithm allows the deployment of additional crypto mechanisms in the future. This negotiation would nevertheless be susceptible to a bid-down attack by an on-path active attacker who could modify the crypto capability bits in the response from the receiver to use a less secure crypto mechanism. The security mechanism presented in this document should therefore protect against all forms of flooding and hijacking attacks discussed in <xref target="RFC6181" format="default" sectionFormat="of" derivedContent="RFC6181"/>.</t> <t pn="section-5-6">The version negotiation specified in <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>, if differing MPTCP versions shared a common negotiation format, would allow an on-path attacker to apply a theoretical bid-down attack. Since the v1 and v0 protocols have a different handshake, such an attack would require that the client re-establish the connection using v0 and that the server support v0. Note that an on-path attacker would have access to the raw data, negating any other TCP-level security mechanisms. As also noted in <xref target="app_changelog" format="default" sectionFormat="of" derivedContent="Appendix E"/>, this document specifies the removal of the AddrID field <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> in the MP_PRIO option (<xref target="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>). This change eliminates the possibility of a theoretical attack where a subflow could be placed in "backup" mode by an attacker.</t> <t pn="section-5-7">During normal operation, regular TCP protection mechanisms (such as ensuring that sequence numbers are in-window) will provide the same level of protection against attacks on individual TCP subflows as the level of protection that exists for regular TCP today. Implementations will introduce additional buffers compared to regular TCP, to reassemble data at the connection level. The application of window sizing will minimize the risk of denial-of-service attacks consuming resources.</t> <t pn="section-5-8">As discussed in <xref target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>, a host may advertise its private addresses, but these might point to different hosts in the receiver's network. The MP_JOIN handshake (<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) will ensure that this does not succeed in setting up a subflow to the incorrect host. However, it could still create unwanted TCP handshake traffic. This feature of MPTCP could be a target for denial-of-service exploits, with malicious participants in MPTCP connections encouraging the recipient to target other hosts in the network. Therefore, implementations should consider heuristics (<xref target="heuristics" format="default" sectionFormat="of" derivedContent="Section 3.9"/>) at both the sender and receiver to reduce the impact of this.</t> <t pn="section-5-9">To further protect against malicious ADD_ADDR messages sent by an off-path attacker, the ADD_ADDR includes an HMAC using the keys negotiated during the handshake. This effectively prevents an attacker from diverting an MPTCP connection through an off-path ADD_ADDR injection into the stream.</t> <t pn="section-5-10">A small security risk could theoretically exist with key reuse, but in order to accomplish a replay attack, both the sender and receiver keys, and the sender and receiver random numbers, in the MP_JOIN handshake (<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) would have to match.</t> <t pn="section-5-11">While this specification defines a "medium" security solution, meeting the criteria specified at the start of this section and in the threat analysis document <xref target="RFC6181" format="default" sectionFormat="of" derivedContent="RFC6181"/>, since attacks only ever get worse, it is likely that a future version of MPTCP would need to be able to support stronger security. There are several ways the security of MPTCP could potentially be improved; some of these would be compatible with MPTCP as defined in this document, while others may not be. For now, the best approach is to gain experience with the current approach, establish what might work, and check that the threat analysis is still accurate.</t> <t pn="section-5-12">Possible ways of improving MPTCP security could include:</t> <ul spacing="normal" bare="false" empty="false" pn="section-5-13"> <li pn="section-5-13.1">defining a new MPTCP cryptographic algorithm, as negotiated in MP_CAPABLE. If an implementation was being deployed in a controlled environment where additional assumptions could be made, such as the ability for the servers to store state during the TCP handshake, then it may be possible to use a stronger cryptographic algorithm than would otherwise be possible.</li> <li pn="section-5-13.2">defining how to secure data transfer with MPTCP, while not changing the signaling part of the protocol.</li> <li pn="section-5-13.3">defining security that requires more option space, perhaps in conjunction with a "long options" proposal for extending the TCP option space (such as those surveyed in <xref target="I-D.ananth-tcpm-tcpoptext" format="default" sectionFormat="of" derivedContent="TCPLO"/>), or perhaps building on the current approach with a second stage of security based on MPTCP options.</li> <li pn="section-5-13.4">revisiting the working group's decision to exclusively use TCP options for MPTCP signaling and instead looking at the possibility of using TCP payloads as well.</li> </ul> <t pn="section-5-14">MPTCP has been designed with several methods available to indicate a new security mechanism, including: </t> <ul spacing="normal" bare="false" empty="false" pn="section-5-15"> <li pn="section-5-15.1">available flags in MP_CAPABLE (<xref target="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>).</li> <li pn="section-5-15.2">available subtypes in the MPTCP option (<xref target="fig_option" format="default" sectionFormat="of" derivedContent="Figure 3"/>).</li> <li pn="section-5-15.3">the Version field in MP_CAPABLE (<xref target="tcpm_capable" format="default" sectionFormat="of" derivedContent="Figure 4"/>).</li> </ul> </section> <section anchor="sec_middleboxes" numbered="true" toc="include" removeInRFC="false" pn="section-6"> <name slugifiedName="name-interactions-with-middlebox">Interactions with Middleboxes</name> <t pn="section-6-1">Multipath TCP was designed to be deployable in the present world. Its design takes into account "reasonable" existing middlebox behavior. In this section, we outline a few representative middlebox-related failure scenarios and show how Multipath TCP handles them. Next, we list the design decisions Multipath TCP has made to accommodate the different middleboxes.</t> <t pn="section-6-2">A primary concern is our use of a new TCP option. Middleboxes should forward packets with unknown options unchanged, yet there are some that don't. We expect these middleboxes to strip options and pass the data, drop packets with new options, copy the same option into multiple segments (e.g., when doing segmentation), or drop options during segment coalescing.</t> <t pn="section-6-3">MPTCP uses a single new TCP option called "Kind", and all message types are defined by "subtype" values (see <xref target="IANA" format="default" sectionFormat="of" derivedContent="Section 7"/>). This should reduce the chances of only some types of MPTCP options being passed; instead, the key differing characteristics are different paths and the presence of the SYN flag.</t> <t pn="section-6-4">MPTCP SYN packets on the first subflow of a connection contain the MP_CAPABLE option (<xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>). If this is dropped, MPTCP <bcp14>SHOULD</bcp14> fall back to regular TCP. If packets with the MP_JOIN option (<xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/>) are dropped, the paths will simply not be used.</t> <t pn="section-6-5">If a middlebox strips options but otherwise passes the packets unchanged, MPTCP will behave safely. If an MP_CAPABLE option is dropped on either the outgoing path or the return path, the initiating host can fall back to regular TCP, as illustrated in <xref target="fig_syn" format="default" sectionFormat="of" derivedContent="Figure 17"/> and discussed in <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/>.</t> <figure anchor="fig_syn" align="left" suppress-title="false" pn="figure-17"> <name slugifiedName="name-connection-setup-with-middl">Connection Setup with Middleboxes That Strip Options from Packets</name> <artwork align="left" name="" type="" alt="" pn="section-6-6.1"> Host A Host B | Middlebox M | | | | | SYN (MP_CAPABLE) | SYN | |-------------------|---------------->| | SYN/ACK | |<------------------------------------| a) MP_CAPABLE option stripped on outgoing path Host A Host B | SYN (MP_CAPABLE) | |-------------------------------------->| | Middlebox M | | | | | SYN/ACK |SYN/ACK (MP_CAPABLE)| |<-----------------|--------------------| b) MP_CAPABLE option stripped on return path </artwork> </figure> <t pn="section-6-7">Subflow SYNs contain the MP_JOIN option. If this option is stripped on the outgoing path, the SYN will appear to be a regular SYN to Host B. Depending on whether there is a listening socket on the target port, Host B will reply with either a SYN/ACK or a RST (subflow connection fails). When Host A receives the SYN/ACK, it sends a RST because the SYN/ACK does not contain the MP_JOIN option and its token. Either way, the subflow setup fails but otherwise does not affect the MPTCP connection as a whole.</t> <t pn="section-6-8">We now examine data flow with MPTCP, assuming that the flow is correctly set up, which implies that the options in the SYN packets were allowed through by the relevant middleboxes. If options are allowed through and there is no resegmentation or coalescing to TCP segments, Multipath TCP flows can proceed without problems.</t> <t pn="section-6-9">The case when options get stripped on data packets is discussed in <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>. If only some MPTCP options are stripped, behavior is not deterministic. If some Data Sequence Mappings are lost, the connection can continue so long as mappings exist for the subflow-level data (e.g., if multiple maps have been sent that reinforce each other). If some subflow-level space is left unmapped, however, the subflow is treated as broken and is closed, using the process described in <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>. MPTCP should survive with a loss of some Data ACKs, but performance will degrade as the fraction of stripped options increases. We do not expect such cases to appear in practice, though: most middleboxes will either strip all options or let them all through.</t> <t pn="section-6-10">We end this section with a list of middlebox classes, their behavior, and the elements in the MPTCP design that allow operation through such middleboxes. Issues surrounding dropping packets with options or stripping options were discussed above and are not included here: </t> <ul spacing="normal" bare="false" empty="false" pn="section-6-11"> <li pn="section-6-11.1">NATs (Network Address (and port) Translators) <xref target="RFC3022" format="default" sectionFormat="of" derivedContent="RFC3022"/> change the source address (and often the source port) of packets. This means that a host will not know its public-facing address for signaling in MPTCP. Therefore, MPTCP permits implicit address addition via the MP_JOIN option, and the handshake mechanism ensures that connection attempts to private addresses <xref target="RFC1918" format="default" sectionFormat="of" derivedContent="RFC1918"/>, since they are authenticated, will only set up subflows to the correct hosts. Explicit address removal is undertaken by an Address ID to allow no knowledge of the source address.</li> <li pn="section-6-11.2">Performance Enhancing Proxies (PEPs) <xref target="RFC3135" format="default" sectionFormat="of" derivedContent="RFC3135"/> might proactively ACK data to increase performance. MPTCP, however, relies on accurate congestion control signals from the end host, and non‑MPTCP-aware PEPs will not be able to provide such signals. MPTCP will, therefore, fall back to single-path TCP or close the problematic subflow (see <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/>).</li> <li pn="section-6-11.3">Traffic normalizers <xref target="norm" format="default" sectionFormat="of" derivedContent="norm"/> may not allow holes in sequence numbers, and they may cache packets and retransmit the same data. MPTCP looks like standard TCP on the wire and will not retransmit different data on the same subflow sequence number. In the event of a retransmission, the same data will be retransmitted on the original TCP subflow even if it is additionally retransmitted at the connection level on a different subflow.</li> <li pn="section-6-11.4">Firewalls <xref target="RFC2979" format="default" sectionFormat="of" derivedContent="RFC2979"/> might perform Initial Sequence Number (ISN) randomization on TCP connections. MPTCP uses relative sequence numbers in Data Sequence Mappings to cope with this. Like NATs, firewalls will not permit many incoming connections, so MPTCP supports address signaling (ADD_ADDR) so that a multiaddressed host can invite its peer behind the firewall/NAT to connect out to its additional interface.</li> <li pn="section-6-11.5">Intrusion Detection Systems / Intrusion Prevention Systems (IDSs/IPSs) observe packet streams for patterns and content that could threaten a network. MPTCP may require the instrumentation of additional paths, and an MPTCP-aware IDS or IPS would need to read MPTCP tokens to correlate data from multiple subflows to maintain comparable visibility into all of the traffic between devices. Without such changes, an IDS would get an incomplete view of the traffic, increasing the risk of missing traffic of interest (false negatives) and increasing the chances of erroneously identifying a subflow as a risk due to only seeing partial data (false positives).</li> <li pn="section-6-11.6">Application-level middleboxes such as content-aware firewalls may alter the payload within a subflow -- for example, rewriting URIs in HTTP traffic. MPTCP will detect such changes using the checksum and close the affected subflow(s), if there are other subflows that can be used. If all subflows are affected, MPTCP will fall back to TCP, allowing such middleboxes to change the payload. MPTCP-aware middleboxes should be able to adjust the payload and MPTCP metadata in order not to break the connection.</li> </ul> <t pn="section-6-12"> In addition, all classes of middleboxes may affect TCP traffic in the following ways: </t> <ul spacing="normal" bare="false" empty="false" pn="section-6-13"> <li pn="section-6-13.1">TCP options may be removed, or packets with unknown options dropped, by many classes of middleboxes. It is intended that the initial SYN exchange, with a TCP option, will be sufficient to identify the path's capabilities. If such a packet does not get through, MPTCP will end up falling back to regular TCP.</li> <li pn="section-6-13.2">Segmentation/coalescing (e.g., TCP segmentation offloading) might copy options between packets and might strip some options. MPTCP's Data Sequence Mapping includes the relative subflow sequence number instead of using the sequence number in the segment. In this way, the mapping is independent of the packets that carry it.</li> <li pn="section-6-13.3">The receive window may be shrunk by some middleboxes at the subflow level. MPTCP will use the maximum window at the data level but will also obey subflow-specific windows.</li> </ul> </section> <section anchor="IANA" numbered="true" toc="include" removeInRFC="false" pn="section-7"> <name slugifiedName="name-iana-considerations">IANA Considerations</name> <t pn="section-7-1">This document obsoletes <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>. As such, IANA has updated several registries to point to this document. In addition, this document creates one new registry. These topics are described in the following subsections.</t> <section anchor="IANA-TCP-Option-Kind" numbered="true" toc="include" removeInRFC="false" pn="section-7.1"> <name slugifiedName="name-tcp-option-kind-numbers">TCP Option Kind Numbers</name> <t pn="section-7.1-1">IANA has updated the "TCP Option Kind Numbers" registry to point to this document for Multipath TCP, as shown in <xref target="table_tcpo" format="default" sectionFormat="of" derivedContent="Table 1"/>:</t> <table anchor="table_tcpo" align="center" pn="table-1"> <name slugifiedName="name-tcp-option-kind-numbers-2">TCP Option Kind Numbers</name> <thead> <tr> <th align="center" colspan="1" rowspan="1">Kind</th> <th align="center" colspan="1" rowspan="1">Length</th> <th align="center" colspan="1" rowspan="1">Meaning</th> <th align="center" colspan="1" rowspan="1">Reference</th> </tr> </thead> <tbody> <tr> <td align="center" colspan="1" rowspan="1">30</td> <td align="center" colspan="1" rowspan="1">N</td> <td align="center" colspan="1" rowspan="1">Multipath TCP (MPTCP)</td> <td align="center" colspan="1" rowspan="1">RFC 8684</td> </tr> </tbody> </table> </section> <section anchor="IANA_subtypes" numbered="true" toc="include" removeInRFC="false" pn="section-7.2"> <name slugifiedName="name-mptcp-option-subtypes">MPTCP Option Subtypes</name> <t pn="section-7.2-1">The 4-bit MPTCP subtype in the "MPTCP Option Subtypes" subregistry under the "Transmission Control Protocol (TCP) Parameters" registry was defined in <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>. Since <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> is an Experimental RFC and not a Standards Track RFC, and since no further entries have occurred beyond those pointing to <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>, IANA has replaced the existing registry with the contents of <xref target="table_iana" format="default" sectionFormat="of" derivedContent="Table 2"/> and with the following explanatory note.</t> <t pn="section-7.2-2">Note: This registry specifies the MPTCP Option Subtypes for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please refer to <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>.</t> <table anchor="table_iana" align="center" pn="table-2"> <name slugifiedName="name-mptcp-option-subtypes-2">MPTCP Option Subtypes</name> <thead> <tr> <th align="center" colspan="1" rowspan="1">Value</th> <th align="center" colspan="1" rowspan="1">Symbol</th> <th align="center" colspan="1" rowspan="1">Name</th> <th align="center" colspan="1" rowspan="1">Reference</th> </tr> </thead> <tbody> <tr> <td align="center" colspan="1" rowspan="1">0x0</td> <td align="center" colspan="1" rowspan="1">MP_CAPABLE</td> <td align="center" colspan="1" rowspan="1">Multipath Capable</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x1</td> <td align="center" colspan="1" rowspan="1">MP_JOIN</td> <td align="center" colspan="1" rowspan="1">Join Connection</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x2</td> <td align="center" colspan="1" rowspan="1">DSS</td> <td align="center" colspan="1" rowspan="1">Data Sequence Signal (Data ACK and Data Sequence Mapping)</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_generalop" format="default" sectionFormat="of" derivedContent="Section 3.3"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x3</td> <td align="center" colspan="1" rowspan="1">ADD_ADDR</td> <td align="center" colspan="1" rowspan="1">Add Address</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x4</td> <td align="center" colspan="1" rowspan="1">REMOVE_ADDR</td> <td align="center" colspan="1" rowspan="1">Remove Address</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_remove_addr" format="default" sectionFormat="of" derivedContent="Section 3.4.2"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x5</td> <td align="center" colspan="1" rowspan="1">MP_PRIO</td> <td align="center" colspan="1" rowspan="1">Change Subflow Priority</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x6</td> <td align="center" colspan="1" rowspan="1">MP_FAIL</td> <td align="center" colspan="1" rowspan="1">Fallback</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_fallback" format="default" sectionFormat="of" derivedContent="Section 3.7"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x7</td> <td align="center" colspan="1" rowspan="1">MP_FASTCLOSE</td> <td align="center" colspan="1" rowspan="1">Fast Close</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_fastclose" format="default" sectionFormat="of" derivedContent="Section 3.5"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x8</td> <td align="center" colspan="1" rowspan="1">MP_TCPRST</td> <td align="center" colspan="1" rowspan="1">Subflow Reset</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0xf</td> <td align="center" colspan="1" rowspan="1">MP_EXPERIMENTAL</td> <td align="center" colspan="1" rowspan="1">Reserved for Private Use</td> <td align="center" colspan="1" rowspan="1"/> </tr> </tbody> </table> <t pn="section-7.2-4">Values 0x9 through 0xe are currently unassigned. Option 0xf is reserved for use by private experiments. Its use may be formalized in a future specification. Future assignments in this registry are to be defined by Standards Action as defined by <xref target="RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>. Assignments consist of the MPTCP subtype's symbolic name, its associated value, and a reference to its specification.</t> </section> <section anchor="IANA_handshake" numbered="true" toc="include" removeInRFC="false" pn="section-7.3"> <name slugifiedName="name-mptcp-handshake-algorithms">MPTCP Handshake Algorithms</name> <t pn="section-7.3-1">The "MPTCP Handshake Algorithms" subregistry under the "Transmission Control Protocol (TCP) Parameters" registry was defined in <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>. Since <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> is an Experimental RFC and not a Standards Track RFC, and since no further entries have occurred beyond those pointing to <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>, IANA has replaced the existing registry with the contents of <xref target="table_crypto" format="default" sectionFormat="of" derivedContent="Table 3"/> and with the following explanatory note.</t> <t pn="section-7.3-2">Note: This registry specifies the MPTCP Handshake Algorithms for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please refer to <xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>.</t> <table anchor="table_crypto" align="center" pn="table-3"> <name slugifiedName="name-mptcp-handshake-algorithms-2">MPTCP Handshake Algorithms</name> <thead> <tr> <th align="center" colspan="1" rowspan="1">Flag Bit</th> <th align="center" colspan="1" rowspan="1">Meaning</th> <th align="center" colspan="1" rowspan="1">Reference</th> </tr> </thead> <tbody> <tr> <td align="center" colspan="1" rowspan="1">A</td> <td align="center" colspan="1" rowspan="1">Checksum required</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">B</td> <td align="center" colspan="1" rowspan="1">Extensibility</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">C</td> <td align="center" colspan="1" rowspan="1">Do not attempt to establish new subflows to the source address.</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">D-G</td> <td align="center" colspan="1" rowspan="1">Unassigned</td> <td align="center" colspan="1" rowspan="1"/> </tr> <tr> <td align="center" colspan="1" rowspan="1">H</td> <td align="center" colspan="1" rowspan="1">HMAC-SHA256</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_join" format="default" sectionFormat="of" derivedContent="Section 3.2"/></td> </tr> </tbody> </table> <t pn="section-7.3-4">Note that the meanings of bits "D" through "H" can be dependent upon bit "B", depending on how the Extensibility parameter is defined in future specifications; see <xref target="sec_init" format="default" sectionFormat="of" derivedContent="Section 3.1"/> for more information.</t> <t pn="section-7.3-5">Future assignments in this registry are also to be defined by Standards Action as defined by <xref target="RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>. Assignments consist of the value of the flags, a symbolic name for the algorithm, and a reference to its specification.</t> </section> <section anchor="IANA_rst" numbered="true" toc="include" removeInRFC="false" pn="section-7.4"> <name slugifiedName="name-mp_tcprst-reason-codes">MP_TCPRST Reason Codes</name> <t pn="section-7.4-1">IANA has created a further subregistry, "MPTCP MP_TCPRST Reason Codes" under the "Transmission Control Protocol (TCP) Parameters" registry, based on the reason code in the MP_TCPRST (<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>) message. Initial values for this registry are given in <xref target="table_rstcodes" format="default" sectionFormat="of" derivedContent="Table 4"/>; future assignments are to be defined by Specification Required as defined by <xref target="RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>. Assignments consist of the value of the code, a short description of its meaning, and a reference to its specification. The maximum value is 0xff.</t> <table anchor="table_rstcodes" align="center" pn="table-4"> <name slugifiedName="name-mptcp-mp_tcprst-reason-code">MPTCP MP_TCPRST Reason Codes</name> <thead> <tr> <th align="center" colspan="1" rowspan="1">Code</th> <th align="center" colspan="1" rowspan="1">Meaning</th> <th align="center" colspan="1" rowspan="1">Reference</th> </tr> </thead> <tbody> <tr> <td align="center" colspan="1" rowspan="1">0x00</td> <td align="center" colspan="1" rowspan="1">Unspecified error</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x01</td> <td align="center" colspan="1" rowspan="1">MPTCP-specific error</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x02</td> <td align="center" colspan="1" rowspan="1">Lack of resources</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x03</td> <td align="center" colspan="1" rowspan="1">Administratively prohibited</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x04</td> <td align="center" colspan="1" rowspan="1">Too much outstanding data</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x05</td> <td align="center" colspan="1" rowspan="1">Unacceptable performance</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> <tr> <td align="center" colspan="1" rowspan="1">0x06</td> <td align="center" colspan="1" rowspan="1">Middlebox interference</td> <td align="center" colspan="1" rowspan="1">RFC 8684, <xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/></td> </tr> </tbody> </table> <t pn="section-7.4-3">As guidance to the designated expert <xref target="RFC8126" format="default" sectionFormat="of" derivedContent="RFC8126"/>, assignments should not normally be refused unless codepoint space is becoming scarce, provided that there is a clear distinction from other, already-existing codes and also provided that there is sufficient guidance for implementers both sending and receiving these codes.</t> </section> </section> </middle> <back> <displayreference target="I-D.ananth-tcpm-tcpoptext" to="TCPLO"/> <references pn="section-8"> <name slugifiedName="name-references">References</name> <references pn="section-8.1"> <name slugifiedName="name-normative-references">Normative References</name> <reference anchor="RFC0793" target="https://www.rfc-editor.org/info/rfc793" quoteTitle="true" derivedAnchor="RFC0793"> <front> <title>Transmission Control Protocol</title> <author initials="J." surname="Postel" fullname="J. Postel"> <organization showOnFrontPage="true"/> </author> <date year="1981" month="September"/> </front> <seriesInfo name="STD" value="7"/> <seriesInfo name="RFC" value="793"/> <seriesInfo name="DOI" value="10.17487/RFC0793"/> </reference> <reference anchor="RFC2104" target="https://www.rfc-editor.org/info/rfc2104" quoteTitle="true" derivedAnchor="RFC2104"> <front> <title>HMAC: Keyed-Hashing for Message Authentication</title> <author initials="H." surname="Krawczyk" fullname="H. Krawczyk"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Bellare" fullname="M. Bellare"> <organization showOnFrontPage="true"/> </author> <author initials="R." surname="Canetti" fullname="R. Canetti"> <organization showOnFrontPage="true"/> </author> <date year="1997" month="February"/> <abstract> <t>This document describes HMAC, a mechanism for message authentication using cryptographic hash functions. HMAC can be used with any iterative cryptographic hash function, e.g., MD5, SHA-1, in combination with a secret shared key. The cryptographic strength of HMAC depends on the properties of the underlying hash function. This memo provides information for the Internet community. This memo does not specify an Internet standard of any kind</t> </abstract> </front> <seriesInfo name="RFC" value="2104"/> <seriesInfo name="DOI" value="10.17487/RFC2104"/> </reference> <reference anchor="RFC2119" target="https://www.rfc-editor.org/info/rfc2119" quoteTitle="true" derivedAnchor="RFC2119"> <front> <title>Key words for use in RFCs to Indicate Requirement Levels</title> <author initials="S." surname="Bradner" fullname="S. Bradner"> <organization showOnFrontPage="true"/> </author> <date year="1997" month="March"/> <abstract> <t>In many standards track documents several words are used to signify the requirements in the specification. These words are often capitalized. This document defines these words as they should be interpreted in IETF documents. This document specifies an Internet Best Current Practices for the Internet Community, and requests discussion and suggestions for improvements.</t> </abstract> </front> <seriesInfo name="BCP" value="14"/> <seriesInfo name="RFC" value="2119"/> <seriesInfo name="DOI" value="10.17487/RFC2119"/> </reference> <reference anchor="RFC5961" target="https://www.rfc-editor.org/info/rfc5961" quoteTitle="true" derivedAnchor="RFC5961"> <front> <title>Improving TCP's Robustness to Blind In-Window Attacks</title> <author initials="A." surname="Ramaiah" fullname="A. Ramaiah"> <organization showOnFrontPage="true"/> </author> <author initials="R." surname="Stewart" fullname="R. Stewart"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Dalal" fullname="M. Dalal"> <organization showOnFrontPage="true"/> </author> <date year="2010" month="August"/> <abstract> <t>TCP has historically been considered to be protected against spoofed off-path packet injection attacks by relying on the fact that it is difficult to guess the 4-tuple (the source and destination IP addresses and the source and destination ports) in combination with the 32-bit sequence number(s). A combination of increasing window sizes and applications using longer-term connections (e.g., H-323 or Border Gateway Protocol (BGP) [STANDARDS-TRACK]</t> </abstract> </front> <seriesInfo name="RFC" value="5961"/> <seriesInfo name="DOI" value="10.17487/RFC5961"/> </reference> <reference anchor="RFC6234" target="https://www.rfc-editor.org/info/rfc6234" quoteTitle="true" derivedAnchor="RFC6234"> <front> <title>US Secure Hash Algorithms (SHA and SHA-based HMAC and HKDF)</title> <author initials="D." surname="Eastlake 3rd" fullname="D. Eastlake 3rd"> <organization showOnFrontPage="true"/> </author> <author initials="T." surname="Hansen" fullname="T. Hansen"> <organization showOnFrontPage="true"/> </author> <date year="2011" month="May"/> <abstract> <t>Federal Information Processing Standard, FIPS</t> </abstract> </front> <seriesInfo name="RFC" value="6234"/> <seriesInfo name="DOI" value="10.17487/RFC6234"/> </reference> <reference anchor="RFC8174" target="https://www.rfc-editor.org/info/rfc8174" quoteTitle="true" derivedAnchor="RFC8174"> <front> <title>Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words</title> <author initials="B." surname="Leiba" fullname="B. Leiba"> <organization showOnFrontPage="true"/> </author> <date year="2017" month="May"/> <abstract> <t>RFC 2119 specifies common key words that may be used in protocol specifications. This document aims to reduce the ambiguity by clarifying that only UPPERCASE usage of the key words have the defined special meanings.</t> </abstract> </front> <seriesInfo name="BCP" value="14"/> <seriesInfo name="RFC" value="8174"/> <seriesInfo name="DOI" value="10.17487/RFC8174"/> </reference> </references> <references pn="section-8.2"> <name slugifiedName="name-informative-references">Informative References</name> <reference anchor="deployments" target="https://www.ietfjournal.org/multipath-tcp-deployments/" quoteTitle="true" derivedAnchor="deployments"> <front> <title abbrev="MPTCP Deployments">Multipath TCP Deployments</title> <seriesInfo name="IETF Journal" value="2016"/> <author initials="O." surname="Bonaventure" fullname="Olivier Bonaventure"> <organization showOnFrontPage="true">Universite Catholique de Louvain</organization> </author> <author initials="S." surname="Seo" fullname="SungHoon Seo"/> <date month="November" year="2016"/> </front> </reference> <reference anchor="howhard" target="https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/raiciu" quoteTitle="true" derivedAnchor="howhard"> <front> <title abbrev="How Hard Can It Be? Designing and Implementing a Deployable Multipath TCP">How Hard Can It Be? Designing and Implementing a Deployable Multipath TCP</title> <seriesInfo name="Usenix Symposium on Networked Systems Design and Implementation" value="2012"/> <author initials="C." surname="Raiciu" fullname="Costin Raiciu"> <organization showOnFrontPage="true">Universitatea Politehnica Bucuresti</organization> </author> <author initials="C." surname="Paasch" fullname="Christoph Paasch"> <organization showOnFrontPage="true">Universite Catholique de Louvain</organization> </author> <author initials="S." surname="Barre" fullname="Sebastien Barre"> <organization showOnFrontPage="true">Universite Catholique de Louvain</organization> </author> <author initials="A." surname="Ford" fullname="Alan Ford"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Honda" fullname="Michio Honda"> <organization showOnFrontPage="true">Keio University</organization> </author> <author initials="F." surname="Duchene" fullname="Fabien Duchene"> <organization showOnFrontPage="true">Universite Catholique de Louvain</organization> </author> <author initials="O." surname="Bonaventure" fullname="Olivier Bonaventure"> <organization showOnFrontPage="true">Universite Catholique de Louvain</organization> </author> <author initials="M." surname="Handley" fullname="Mark Handley"> <organization showOnFrontPage="true">University College London</organization> </author> <date month="April" year="2012"/> </front> </reference> <reference anchor="norm" target="https://www.usenix.org/legacy/events/sec01/full_papers/handley/handley.pdf" quoteTitle="true" derivedAnchor="norm"> <front> <title abbrev="Network Intrusion Detection: Evasion, Traffic Normalization, and End-to-End Protocol Semantics">Network Intrusion Detection: Evasion, Traffic Normalization, andpass the data, drop packets with new options, copyEnd-to-End Protocol Semantics</title> <seriesInfo name="Usenix Security Symposium" value="2001"/> <author initials="M." surname="Handley" fullname="Mark Handley"> <organization showOnFrontPage="true">ACIRI</organization> </author> <author initials="V." surname="Paxson" fullname="Vern Paxson"> <organization showOnFrontPage="true">ACIRI</organization> </author> <author initials="C." surname="Kreibich" fullname="Christian Kreibich"> <organization showOnFrontPage="true">Technische Universitat Munchen</organization> </author> <date month="August" year="2001"/> </front> </reference> <reference anchor="RFC1122" target="https://www.rfc-editor.org/info/rfc1122" quoteTitle="true" derivedAnchor="RFC1122"> <front> <title>Requirements for Internet Hosts - Communication Layers</title> <author initials="R." surname="Braden" fullname="R. Braden" role="editor"> <organization showOnFrontPage="true"/> </author> <date year="1989" month="October"/> <abstract> <t>This RFC is an official specification for thesame option into multiple segments (e.g., when doing segmentation), or drop options during segment coalescing.</t> <t>MPTCP uses a single new TCP option "Kind", and all message types are definedInternet community. It incorporates by"subtype" values (see <xref target="IANA"/>).reference, amends, corrects, and supplements the primary protocol standards documents relating to hosts. [STANDARDS-TRACK]</t> </abstract> </front> <seriesInfo name="STD" value="3"/> <seriesInfo name="RFC" value="1122"/> <seriesInfo name="DOI" value="10.17487/RFC1122"/> </reference> <reference anchor="RFC1918" target="https://www.rfc-editor.org/info/rfc1918" quoteTitle="true" derivedAnchor="RFC1918"> <front> <title>Address Allocation for Private Internets</title> <author initials="Y." surname="Rekhter" fullname="Y. Rekhter"> <organization showOnFrontPage="true"/> </author> <author initials="B." surname="Moskowitz" fullname="B. Moskowitz"> <organization showOnFrontPage="true"/> </author> <author initials="D." surname="Karrenberg" fullname="D. Karrenberg"> <organization showOnFrontPage="true"/> </author> <author initials="G. J." surname="de Groot" fullname="G. J. de Groot"> <organization showOnFrontPage="true"/> </author> <author initials="E." surname="Lear" fullname="E. Lear"> <organization showOnFrontPage="true"/> </author> <date year="1996" month="February"/> <abstract> <t>This document describes address allocation for private internets. Thisshould reducedocument specifies an Internet Best Current Practices for thechancesInternet Community, and requests discussion and suggestions for improvements.</t> </abstract> </front> <seriesInfo name="BCP" value="5"/> <seriesInfo name="RFC" value="1918"/> <seriesInfo name="DOI" value="10.17487/RFC1918"/> </reference> <reference anchor="RFC2018" target="https://www.rfc-editor.org/info/rfc2018" quoteTitle="true" derivedAnchor="RFC2018"> <front> <title>TCP Selective Acknowledgment Options</title> <author initials="M." surname="Mathis" fullname="M. Mathis"> <organization showOnFrontPage="true"/> </author> <author initials="J." surname="Mahdavi" fullname="J. Mahdavi"> <organization showOnFrontPage="true"/> </author> <author initials="S." surname="Floyd" fullname="S. Floyd"> <organization showOnFrontPage="true"/> </author> <author initials="A." surname="Romanow" fullname="A. Romanow"> <organization showOnFrontPage="true"/> </author> <date year="1996" month="October"/> <abstract> <t>This memo proposes an implementation ofonly some typesSACK and discusses its performance and related issues. [STANDARDS-TRACK]</t> </abstract> </front> <seriesInfo name="RFC" value="2018"/> <seriesInfo name="DOI" value="10.17487/RFC2018"/> </reference> <reference anchor="RFC2979" target="https://www.rfc-editor.org/info/rfc2979" quoteTitle="true" derivedAnchor="RFC2979"> <front> <title>Behavior ofMPTCP options being passed,andinstead the key differingRequirements for Internet Firewalls</title> <author initials="N." surname="Freed" fullname="N. Freed"> <organization showOnFrontPage="true"/> </author> <date year="2000" month="October"/> <abstract> <t>This memo defines behavioral characteristicsare different paths, and the presenceof and interoperability requirements for Internet firewalls. This memo provides information for theSYN flag.</t> <t>MPTCP SYN packets on the first subflowInternet community.</t> </abstract> </front> <seriesInfo name="RFC" value="2979"/> <seriesInfo name="DOI" value="10.17487/RFC2979"/> </reference> <reference anchor="RFC2992" target="https://www.rfc-editor.org/info/rfc2992" quoteTitle="true" derivedAnchor="RFC2992"> <front> <title>Analysis ofa connection contain the MP_CAPABLE option (<xref target="sec_init"/>). If thisan Equal-Cost Multi-Path Algorithm</title> <author initials="C." surname="Hopps" fullname="C. Hopps"> <organization showOnFrontPage="true"/> </author> <date year="2000" month="November"/> <abstract> <t>Equal-cost multi-path (ECMP) isdropped, MPTCP SHOULD fall back to regular TCP. If packets with the MP_JOIN option (<xref target="sec_join"/>) are dropped, thea routing technique for routing packets along multiple pathswill simply not be used.</t> <t>Ifof equal cost. The forwarding engine identifies paths by next-hop. When forwarding amiddlebox strips options but otherwise passespacket thepackets unchanged, MPTCP will behave safely. Ifrouter must decide which next-hop (path) to use. This document gives anMP_CAPABLE option is dropped on eitheranalysis of one method for making that decision. The analysis includes theoutgoing orperformance of thereturn path,algorithm and theinitiating host can fall backdisruption caused by changes toregular TCP, as illustratedthe set of next-hops. This memo provides information for the Internet community.</t> </abstract> </front> <seriesInfo name="RFC" value="2992"/> <seriesInfo name="DOI" value="10.17487/RFC2992"/> </reference> <reference anchor="RFC3022" target="https://www.rfc-editor.org/info/rfc3022" quoteTitle="true" derivedAnchor="RFC3022"> <front> <title>Traditional IP Network Address Translator (Traditional NAT)</title> <author initials="P." surname="Srisuresh" fullname="P. Srisuresh"> <organization showOnFrontPage="true"/> </author> <author initials="K." surname="Egevang" fullname="K. Egevang"> <organization showOnFrontPage="true"/> </author> <date year="2001" month="January"/> <abstract> <t>The NAT operation described in<xref target="fig_syn"/> and discussedthis document extends address translation introduced in<xref target="sec_init"/>.</t> <t>Subflow SYNs contain the MP_JOIN option. IfRFC 1631 and includes a new type of network address and TCP/UDP port translation. In addition, thisoption is stripped ondocument corrects theoutgoing path,Checksum adjustment algorithm published in RFC 1631 and attempts to discuss NAT operation and limitations in detail. This memo provides information for theSYN will appearInternet community.</t> </abstract> </front> <seriesInfo name="RFC" value="3022"/> <seriesInfo name="DOI" value="10.17487/RFC3022"/> </reference> <reference anchor="RFC3135" target="https://www.rfc-editor.org/info/rfc3135" quoteTitle="true" derivedAnchor="RFC3135"> <front> <title>Performance Enhancing Proxies Intended tobeMitigate Link-Related Degradations</title> <author initials="J." surname="Border" fullname="J. Border"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Kojo" fullname="M. Kojo"> <organization showOnFrontPage="true"/> </author> <author initials="J." surname="Griner" fullname="J. Griner"> <organization showOnFrontPage="true"/> </author> <author initials="G." surname="Montenegro" fullname="G. Montenegro"> <organization showOnFrontPage="true"/> </author> <author initials="Z." surname="Shelby" fullname="Z. Shelby"> <organization showOnFrontPage="true"/> </author> <date year="2001" month="June"/> <abstract> <t>This document is aregular SYNsurvey of Performance Enhancing Proxies (PEPs) often employed to improve degraded TCP performance caused by characteristics of specific link environments, for example, in satellite, wireless WAN, and wireless LAN environments. This memo provides information for the Internet community.</t> </abstract> </front> <seriesInfo name="RFC" value="3135"/> <seriesInfo name="DOI" value="10.17487/RFC3135"/> </reference> <reference anchor="RFC4086" target="https://www.rfc-editor.org/info/rfc4086" quoteTitle="true" derivedAnchor="RFC4086"> <front> <title>Randomness Requirements for Security</title> <author initials="D." surname="Eastlake 3rd" fullname="D. Eastlake 3rd"> <organization showOnFrontPage="true"/> </author> <author initials="J." surname="Schiller" fullname="J. Schiller"> <organization showOnFrontPage="true"/> </author> <author initials="S." surname="Crocker" fullname="S. Crocker"> <organization showOnFrontPage="true"/> </author> <date year="2005" month="June"/> <abstract> <t>Security systems are built on strong cryptographic algorithms that foil pattern analysis attempts. However, the security of these systems is dependent on generating secret quantities for passwords, cryptographic keys, and similar quantities. The use of pseudo-random processes toHost B. Depending on whether there is a listening socket on the target port, Host B will reply either with SYN/ACK or RST (subflow connection fails). When Hostgenerate secret quantities can result in pseudo-security. Areceives the SYN/ACKsophisticated attacker may find itsends a RST becauseeasier to reproduce theSYN/ACK does not containenvironment that produced theMP_JOIN optionsecret quantities andits token. Either way, the subflow setup fails, but otherwise does not affect the MPTCP connection as a whole.</t> <figure align="center" anchor="fig_syn" title="Connection Setup with Middleboxes that Strip Options from Packets"> <artwork align="left"><![CDATA[ Host A Host B | Middlebox M | | | | | SYN(MP_CAPABLE) | SYN | |-------------------|---------------->| | SYN/ACK | |<------------------------------------| a) MP_CAPABLE option stripped on outgoing path Host A Host B | SYN(MP_CAPABLE) | |------------------------------------>| | Middlebox M | | | | | SYN/ACK |SYN/ACK(MP_CAPABLE)| |<----------------|-------------------| b) MP_CAPABLE option stripped on return path ]]></artwork> </figure> <t>We now examine data flow with MPTCP, assumingto search theflow is correctlyresulting small setup, which impliesof possibilities than to locate theoptionsquantities in theSYN packets were allowed through bywhole of therelevant middleboxes. If options are allowed throughpotential number space.</t> <t>Choosing random quantities to foil a resourceful andtheremotivated adversary isno resegmentationsurprisingly difficult. This document points out many pitfalls in using poor entropy sources orcoalescingtraditional pseudo-random number generation techniques for generating such quantities. It recommends the use of truly random hardware techniques and shows that the existing hardware on many systems can be used for this purpose. It provides suggestions to ameliorate the problem when a hardware solution is not available, and it gives examples of how large such quantities need to be for some applications. This document specifies an Internet Best Current Practices for the Internet Community, and requests discussion and suggestions for improvements.</t> </abstract> </front> <seriesInfo name="BCP" value="106"/> <seriesInfo name="RFC" value="4086"/> <seriesInfo name="DOI" value="10.17487/RFC4086"/> </reference> <reference anchor="RFC4987" target="https://www.rfc-editor.org/info/rfc4987" quoteTitle="true" derivedAnchor="RFC4987"> <front> <title>TCP SYN Flooding Attacks and Common Mitigations</title> <author initials="W." surname="Eddy" fullname="W. Eddy"> <organization showOnFrontPage="true"/> </author> <date year="2007" month="August"/> <abstract> <t>This document describes TCPsegments, Multipath TCP flows can proceed without problems.</t> <t>The case when options get stripped on data packets hasSYN flooding attacks, which have beendiscussed inwell-known to theFallback section. If only some MPTCP options are stripped, behavior is not deterministic. If some data sequence mappingscommunity for several years. Various countermeasures against these attacks, and the trade-offs of each, arelost,described. This document archives explanations of theconnection can continue so long as mappings existattack and common defense techniques for thesubflow-level data (e.g., if multiple maps have been sent that reinforce each other). If some subflow-level space is left unmapped, however,benefit of TCP implementers and administrators of TCP servers or networks, but does not make any standards-level recommendations. This memo provides information for thesubflow is treated as brokenInternet community.</t> </abstract> </front> <seriesInfo name="RFC" value="4987"/> <seriesInfo name="DOI" value="10.17487/RFC4987"/> </reference> <reference anchor="RFC5681" target="https://www.rfc-editor.org/info/rfc5681" quoteTitle="true" derivedAnchor="RFC5681"> <front> <title>TCP Congestion Control</title> <author initials="M." surname="Allman" fullname="M. Allman"> <organization showOnFrontPage="true"/> </author> <author initials="V." surname="Paxson" fullname="V. Paxson"> <organization showOnFrontPage="true"/> </author> <author initials="E." surname="Blanton" fullname="E. Blanton"> <organization showOnFrontPage="true"/> </author> <date year="2009" month="September"/> <abstract> <t>This document defines TCP's four intertwined congestion control algorithms: slow start, congestion avoidance, fast retransmit, andis closed, throughfast recovery. In addition, theprocess described in <xref target="sec_fallback"/>. MPTCPdocument specifies how TCP shouldsurvive withbegin transmission after aloss of some Data ACKs, but performance will degraderelatively long idle period, as well as discussing various acknowledgment generation methods. This document obsoletes RFC 2581. [STANDARDS-TRACK]</t> </abstract> </front> <seriesInfo name="RFC" value="5681"/> <seriesInfo name="DOI" value="10.17487/RFC5681"/> </reference> <reference anchor="RFC6181" target="https://www.rfc-editor.org/info/rfc6181" quoteTitle="true" derivedAnchor="RFC6181"> <front> <title>Threat Analysis for TCP Extensions for Multipath Operation with Multiple Addresses</title> <author initials="M." surname="Bagnulo" fullname="M. Bagnulo"> <organization showOnFrontPage="true"/> </author> <date year="2011" month="March"/> <abstract> <t>Multipath TCP (MPTCP for short) describes thefractionextensions proposed for TCP so that endpoints ofstripped options increases. We do not expect such cases to appear in practice, though: most middleboxes will either strip all options or let them all through.</t> <t>We end this section withalist of middlebox classes, their behavior, andgiven TCP connection can use multiple paths to exchange data. Such extensions enable theelementsexchange of segments using different source-destination address pairs, resulting in theMPTCP design that allow operation through such middleboxes. Issues surrounding dropping packets with options or stripping options were discussed above,capability of using multiple paths in a significant number of scenarios. Some level of multihoming andare not included here: <list style="symbols"> <t>NATs <xref target="RFC3022"/> (Network Address (and Port) Translators) changemobility support can be achieved through these extensions. However, thesource address (and often source port)support for multiple IP addresses per endpoint may have implications on the security ofpackets.the resulting MPTCP. Thismeans thatnote includes ahost will not know its public-facing addressthreat analysis forsignaling inMPTCP.Therefore, MPTCP permits implicit address addition viaThis document is not an Internet Standards Track specification; it is published for informational purposes.</t> </abstract> </front> <seriesInfo name="RFC" value="6181"/> <seriesInfo name="DOI" value="10.17487/RFC6181"/> </reference> <reference anchor="RFC6182" target="https://www.rfc-editor.org/info/rfc6182" quoteTitle="true" derivedAnchor="RFC6182"> <front> <title>Architectural Guidelines for Multipath TCP Development</title> <author initials="A." surname="Ford" fullname="A. Ford"> <organization showOnFrontPage="true"/> </author> <author initials="C." surname="Raiciu" fullname="C. Raiciu"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Handley" fullname="M. Handley"> <organization showOnFrontPage="true"/> </author> <author initials="S." surname="Barre" fullname="S. Barre"> <organization showOnFrontPage="true"/> </author> <author initials="J." surname="Iyengar" fullname="J. Iyengar"> <organization showOnFrontPage="true"/> </author> <date year="2011" month="March"/> <abstract> <t>Hosts are often connected by multiple paths, but TCP restricts communications to a single path per transport connection. Resource usage within theMP_JOIN option,network would be more efficient were these multiple paths able to be used concurrently. This should enhance user experience through improved resilience to network failure and higher throughput.</t> <t>This document outlines architectural guidelines for thehandshake mechanism ensures that connection attempts to private addresses <xref target="RFC1918"/>, since they are authenticated, will only set up subflowsdevelopment of a Multipath Transport Protocol, with references to how these architectural components come together in thecorrect hosts. Explicit address removaldevelopment of a Multipath TCP (MPTCP). This document lists certain high-level design decisions that provide foundations for the design of the MPTCP protocol, based upon these architectural requirements. This document is not an Internet Standards Track specification; it isundertakenpublished for informational purposes.</t> </abstract> </front> <seriesInfo name="RFC" value="6182"/> <seriesInfo name="DOI" value="10.17487/RFC6182"/> </reference> <reference anchor="RFC6356" target="https://www.rfc-editor.org/info/rfc6356" quoteTitle="true" derivedAnchor="RFC6356"> <front> <title>Coupled Congestion Control for Multipath Transport Protocols</title> <author initials="C." surname="Raiciu" fullname="C. Raiciu"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Handley" fullname="M. Handley"> <organization showOnFrontPage="true"/> </author> <author initials="D." surname="Wischik" fullname="D. Wischik"> <organization showOnFrontPage="true"/> </author> <date year="2011" month="October"/> <abstract> <t>Often endpoints are connected byan Address IDmultiple paths, but communications are usually restricted toallow no knowledge ofa single path per connection. Resource usage within thesource address.</t> <t>Performance Enhancing Proxies (PEPs) <xref target="RFC3135"/> might proactively ACK datanetwork would be more efficient were it possible for these multiple paths toincrease performance. MPTCP, however, relies on accurate congestion control signals from the end host, and non-MPTCP-aware PEPs will notbeableused concurrently. Multipath TCP is a proposal toprovideachieve multipath transport in TCP.</t> <t>New congestion control algorithms are needed for multipath transport protocols suchsignals. MPTCP will, therefore, fall back to single-pathas Multipath TCP,or close the problematic subflow (see <xref target="sec_fallback"/>).</t> <t>Traffic Normalizers <xref target="norm"/> may not allow holesas single path algorithms have a series of issues insequence numbers, and may cache packets and retransmitthesame data. MPTCP looks likemultipath context. One of the prominent problems is that running existing algorithms such as standard TCP independently on each path would give thewire, andmultipath flow more than its fair share at a bottleneck link traversed by more than one of its subflows. Further, it is desirable that a source with multiple paths available willnot retransmit different data ontransfer more traffic using thesame subflow sequence number. Inleast congested of theeventpaths, achieving a property called "resource pooling" where a bundle of links effectively behaves like one shared link with bigger capacity. This would increase the overall efficiency of the network and also its robustness to failure.</t> <t>This document presents aretransmission,congestion control algorithm that couples thesame data will be retransmittedcongestion control algorithms running on different subflows by linking their increase functions, and dynamically controls theoriginal TCP subflow even if itoverall aggressiveness of the multipath flow. The result isadditionally retransmitteda practical algorithm that is fair to TCP at bottlenecks while moving traffic away from congested links. This document defines an Experimental Protocol for theconnection level on a different subflow.</t> <t>Firewalls <xref target="RFC2979"/> might perform initial sequence number randomization onInternet community.</t> </abstract> </front> <seriesInfo name="RFC" value="6356"/> <seriesInfo name="DOI" value="10.17487/RFC6356"/> </reference> <reference anchor="RFC6528" target="https://www.rfc-editor.org/info/rfc6528" quoteTitle="true" derivedAnchor="RFC6528"> <front> <title>Defending against Sequence Number Attacks</title> <author initials="F." surname="Gont" fullname="F. Gont"> <organization showOnFrontPage="true"/> </author> <author initials="S." surname="Bellovin" fullname="S. Bellovin"> <organization showOnFrontPage="true"/> </author> <date year="2012" month="February"/> <abstract> <t>This document specifies an algorithm for the generation of TCPconnections. MPTCP uses relativeInitial Sequence Numbers (ISNs), such that the chances of an off-path attacker guessing the sequence numbers indata sequence mapping to cope with this. Like NATs, firewalls will not permit many incoming connections, so MPTCP supports address signaling (ADD_ADDR) so thatuse by amultiaddressed host can invite its peer behindtarget connection are reduced. This document revises (and formally obsoletes) RFC 1948, and takes thefirewall/NATISN generation algorithm originally proposed in that document toconnect outStandards Track, formally updating RFC 793. [STANDARDS-TRACK]</t> </abstract> </front> <seriesInfo name="RFC" value="6528"/> <seriesInfo name="DOI" value="10.17487/RFC6528"/> </reference> <reference anchor="RFC6824" target="https://www.rfc-editor.org/info/rfc6824" quoteTitle="true" derivedAnchor="RFC6824"> <front> <title>TCP Extensions for Multipath Operation with Multiple Addresses</title> <author initials="A." surname="Ford" fullname="A. Ford"> <organization showOnFrontPage="true"/> </author> <author initials="C." surname="Raiciu" fullname="C. Raiciu"> <organization showOnFrontPage="true"/> </author> <author initials="M." surname="Handley" fullname="M. Handley"> <organization showOnFrontPage="true"/> </author> <author initials="O." surname="Bonaventure" fullname="O. Bonaventure"> <organization showOnFrontPage="true"/> </author> <date year="2013" month="January"/> <abstract> <t>TCP/IP communication is currently restricted toits additional interface.</t> <t>Intrusion Detection/Prevention Systems (IDS/IPS) observe packet streamsa single path per connection, yet multiple paths often exist between peers. The simultaneous use of these multiple paths forpatterns and content that could threatenanetwork. MPTCP may requireTCP/IP session would improve resource usage within theinstrumentation of additional paths,network and, thus, improve user experience through higher throughput andan MPTCP-aware IDS/IPS would need to read MPTCP tokens to correlate data from mutliple subflowsimproved resilience tomaintain comparable visibility into all ofnetwork failure.</t> <t>Multipath TCP provides thetrafficability to simultaneously use multiple paths betweendevices. Without such changes, an IDS would get an incomplete view of the traffic, increasing the risk of missing trafficpeers. This document presents a set ofinterest (false negatives), and increasingextensions to traditional TCP to support multipath operation. The protocol offers thechancessame type oferroneously identifying a subflow as a risk dueservice toonly seeing partial data (false positives).</t> <t>Application-level middleboxes such as content-aware firewalls may alter the payload within a subflow, suchapplications asrewriting URIs in HTTP traffic. MPTCP will detect these using the checksumTCP (i.e., reliable bytestream), andclose the affected subflow(s), if there are other subflows that can be used. If all subflows are affected, multipath will fall back to TCP, allowing such middleboxes to changeit provides thepayload. MPTCP-aware middleboxes should be ablecomponents necessary toadjust the payloadestablish andMPTCP metadata in order not to breakuse multiple TCP flows across potentially disjoint paths. This document defines an Experimental Protocol for theconnection.</t> </list> In addition, all classes of middleboxes may affectInternet community.</t> </abstract> </front> <seriesInfo name="RFC" value="6824"/> <seriesInfo name="DOI" value="10.17487/RFC6824"/> </reference> <reference anchor="RFC6897" target="https://www.rfc-editor.org/info/rfc6897" quoteTitle="true" derivedAnchor="RFC6897"> <front> <title>Multipath TCPtraffic in(MPTCP) Application Interface Considerations</title> <author initials="M." surname="Scharf" fullname="M. Scharf"> <organization showOnFrontPage="true"/> </author> <author initials="A." surname="Ford" fullname="A. Ford"> <organization showOnFrontPage="true"/> </author> <date year="2013" month="March"/> <abstract> <t>Multipath TCP (MPTCP) adds thefollowing ways: <list style="symbols"> <t>TCP options may be removed, or packets with unknown options dropped, by many classescapability ofmiddleboxes. It is intended that the initial SYN exchange, withusing multiple paths to aTCP option, willregular TCP session. Even though it is designed to besufficienttotally backward compatible toidentifyapplications, thepath capabilities. If such a packet does not get through, MPTCP will end up falling backdata transport differs compared to regularTCP.</t> <t>Segmentation/Coalescing (e.g., TCP segmentation offloading) might copy options between packetsTCP, andmight strip some options. MPTCP's data sequence mapping includes the relative subflow sequence number instead of using the sequence number in the segment. In this way, the mapping is independentthere are several additional degrees ofthe packetsfreedom thatcarry it.</t> <t>The receive windowapplications maybe shrunk by some middleboxes atwish to exploit. This document summarizes thesubflow level.impact that MPTCPwill usemay have on applications, such as changes in performance. Furthermore, it discusses compatibility issues of MPTCP in combination with non-MPTCP-aware applications. Finally, themaximum window at data level, but will also obey subflow-specific windows.</t> </list> </t> </section> <section anchor="Acknowledgments" title="Acknowledgments"> <!-- <t>The authors were originally supported by Trilogy (http://www.trilogy-project.org),document describes aresearch project (ICT-216372) partially funded by the European Community under its Seventh Framework Program.</t> <t>Alan Ford was originally supported by Roke Manor Research and later Cisco Systems.</t> --> <t>The authors gratefully acknowledge significant input into thisbasic application interface that is a simple extension of TCP's interface for MPTCP-aware applications.</t> </abstract> </front> <seriesInfo name="RFC" value="6897"/> <seriesInfo name="DOI" value="10.17487/RFC6897"/> </reference> <reference anchor="RFC7323" target="https://www.rfc-editor.org/info/rfc7323" quoteTitle="true" derivedAnchor="RFC7323"> <front> <title>TCP Extensions for High Performance</title> <author initials="D." surname="Borman" fullname="D. Borman"> <organization showOnFrontPage="true"/> </author> <author initials="B." surname="Braden" fullname="B. Braden"> <organization showOnFrontPage="true"/> </author> <author initials="V." surname="Jacobson" fullname="V. Jacobson"> <organization showOnFrontPage="true"/> </author> <author initials="R." surname="Scheffenegger" fullname="R. Scheffenegger" role="editor"> <organization showOnFrontPage="true"/> </author> <date year="2014" month="September"/> <abstract> <t>This documentfrom Sébastien Barréspecifies a set of TCP extensions to improve performance over paths with a large bandwidth * delay product andAndrew McDonald.</t> <t>The authors also wishtoacknowledge reviewsprovide reliable operation over very high-speed paths. It defines the TCP Window Scale (WS) option andcontributions from Iljitsch van Beijnum, Lars Eggert, Marcelo Bagnulo, Robert Hancock, Pasi Sarolahti, Toby Moncaster, Philip Eardley, Sergio Lembo, Lawrence Conroy, Yoshifumi Nishida, Bob Briscoe, Stein Gjessing, Andrew McGregor, Georg Hampel, Anumita Biswas, Wes Eddy, Alexey Melnikov, Francis Dupont, Adrian Farrel, Barry Leiba, Robert Sparks, Sean Turner, Stephen Farrell, Martin Stiemerling, Gregory Detal, Fabien Duchene, Xavier de Foy, Rahul Jadhav, Klemens Schragel, Mirja Kuehlewind, Sheng Jiang, Alissa Cooper, Ines Robles, Roman Danyliw, Adam Roach, Barry Leiba, Alexey Melnikov, Eric Vyncke,the TCP Timestamps (TS) option andBen Kaduk.</t> </section> <section anchor="IANA" title="IANA Considerations">their semantics. The Window Scale option is used to support larger receive windows, while the Timestamps option can be used for at least two distinct mechanisms, Protection Against Wrapped Sequences (PAWS) and Round-Trip Time Measurement (RTTM), that are also described herein.</t> <t>This document obsoletesRFC6824RFC 1323 andas such IANA is requested to update the TCP option space registry to point to thisdescribes changes from it.</t> </abstract> </front> <seriesInfo name="RFC" value="7323"/> <seriesInfo name="DOI" value="10.17487/RFC7323"/> </reference> <reference anchor="RFC7413" target="https://www.rfc-editor.org/info/rfc7413" quoteTitle="true" derivedAnchor="RFC7413"> <front> <title>TCP Fast Open</title> <author initials="Y." surname="Cheng" fullname="Y. Cheng"> <organization showOnFrontPage="true"/> </author> <author initials="J." surname="Chu" fullname="J. Chu"> <organization showOnFrontPage="true"/> </author> <author initials="S." surname="Radhakrishnan" fullname="S. Radhakrishnan"> <organization showOnFrontPage="true"/> </author> <author initials="A." surname="Jain" fullname="A. Jain"> <organization showOnFrontPage="true"/> </author> <date year="2014" month="December"/> <abstract> <t>This documentfor Multipath TCP, as follows:</t> <texttable anchor="table_tcpo" title="TCP Option Kind Numbers"> <ttcol align="center">Kind</ttcol> <ttcol align="center">Length</ttcol> <ttcol align="center">Meaning</ttcol> <ttcol align="center">Reference</ttcol> <c>30</c> <c>N</c> <c>Multipath TCP (MPTCP)</c> <c>This document</c> </texttable> <section anchor="IANA_subtypes" title="MPTCP Option Subtypes"> <t>The 4-bit MPTCP subtype sub-registry ("MPTCP Option Subtypes" under the "Transmission Control Protocol (TCP) Parameters" registry) was defined in RFC6824. Since RFC6824 wasdescribes anExperimental not Standards Track RFC, and since no further entries have occurred beyond those pointing to RFC6824, IANA is requestedexperimental TCP mechanism called TCP Fast Open (TFO). TFO allows data toreplacebe carried in theexisting registry with <xref target="table_iana"/>SYN andwith the following explanatory note.</t> <t>Note: This registry specifiesSYN-ACK packets and consumed by theMPTCP Option Subtypes for MPTCP v1, which obsoletesreceiving end during theExperimental MPTCP v0. Forinitial connection handshake, and saves up to one full round-trip time (RTT) compared to theMPTCP v0 subtypes, please referstandard TCP, which requires a three-way handshake (3WHS) toRFC6824.</t> <texttable anchor="table_iana" title="MPTCP Option Subtypes"> <ttcol align="center">Value</ttcol> <ttcol align="center">Symbol</ttcol> <ttcol align="center">Name</ttcol> <ttcol align="center">Reference</ttcol> <c>0x0</c> <c>MP_CAPABLE</c> <c>Multipath Capable</c> <c>This document, <xref target="sec_init"/></c> <c>0x1</c> <c>MP_JOIN</c> <c>Join Connection</c> <c>This document, <xref target="sec_join"/></c> <c>0x2</c> <c>DSS</c> <c>Data Sequence Signal (Data ACK andcomplete before datasequence mapping)</c> <c>This document, <xref target="sec_generalop"/></c> <c>0x3</c> <c>ADD_ADDR</c> <c>Add Address</c> <c>This document, <xref target="sec_add_address"/></c> <c>0x4</c> <c>REMOVE_ADDR</c> <c>Remove Address</c> <c>This document, <xref target="sec_remove_addr"/></c> <c>0x5</c> <c>MP_PRIO</c> <c>Change Subflow Priority</c> <c>This document, <xref target="sec_policy"/></c> <c>0x6</c> <c>MP_FAIL</c> <c>Fallback</c> <c>This document, <xref target="sec_fallback"/></c> <c>0x7</c> <c>MP_FASTCLOSE</c> <c>Fast Close</c> <c>This document, <xref target="sec_fastclose"/></c> <c>0x8</c> <c>MP_TCPRST</c> <c>Subflow Reset</c> <c>This document, <xref target="sec_reset"/></c> <c>0xf</c> <c>MP_EXPERIMENTAL</c> <c>Reserved for private experiments</c> <c></c> </texttable> <t>Values 0x9 through 0xe are currently unassigned. Option 0xf is reserved for use by private experiments. Its use maycan beformalizedexchanged. However, TFO deviates from the standard TCP semantics, since the data ina future specification. Future assignmentsthe SYN could be replayed to an application in some rare circumstances. Applications should not use TFO unless they can tolerate thisregistry are to be defined by Standards Actionissue, asdefined by <xref target="RFC8126"/>. Assignments consist ofdetailed in theMPTCP subtype's symbolic name and its associated value,Applicability section.</t> </abstract> </front> <seriesInfo name="RFC" value="7413"/> <seriesInfo name="DOI" value="10.17487/RFC7413"/> </reference> <reference anchor="RFC7430" target="https://www.rfc-editor.org/info/rfc7430" quoteTitle="true" derivedAnchor="RFC7430"> <front> <title>Analysis of Residual Threats anda reference to its specification.</t> </section> <section anchor="IANA_handshake" title="MPTCP Handshake Algorithms"> <t>The "MPTCP Handshake Algorithms" sub-registry underPossible Fixes for Multipath TCP (MPTCP)</title> <author initials="M." surname="Bagnulo" fullname="M. Bagnulo"> <organization showOnFrontPage="true"/> </author> <author initials="C." surname="Paasch" fullname="C. Paasch"> <organization showOnFrontPage="true"/> </author> <author initials="F." surname="Gont" fullname="F. Gont"> <organization showOnFrontPage="true"/> </author> <author initials="O." surname="Bonaventure" fullname="O. Bonaventure"> <organization showOnFrontPage="true"/> </author> <author initials="C." surname="Raiciu" fullname="C. Raiciu"> <organization showOnFrontPage="true"/> </author> <date year="2015" month="July"/> <abstract> <t>This document analyzes the"Transmission Control Protocol (TCP) Parameters" registry was defined in RFC6824. Since RFC6824 was an Experimental not Standards Track RFC,residual threats for Multipath TCP (MPTCP) andsince no further entries have occurred beyond those pointing to RFC6824, IANA is requestedexplores possible solutions toreplace the existing registryaddress them.</t> </abstract> </front> <seriesInfo name="RFC" value="7430"/> <seriesInfo name="DOI" value="10.17487/RFC7430"/> </reference> <reference anchor="RFC8041" target="https://www.rfc-editor.org/info/rfc8041" quoteTitle="true" derivedAnchor="RFC8041"> <front> <title>Use Cases and Operational Experience with<xref target="table_crypto"/>Multipath TCP</title> <author initials="O." surname="Bonaventure" fullname="O. Bonaventure"> <organization showOnFrontPage="true"/> </author> <author initials="C." surname="Paasch" fullname="C. Paasch"> <organization showOnFrontPage="true"/> </author> <author initials="G." surname="Detal" fullname="G. Detal"> <organization showOnFrontPage="true"/> </author> <date year="2017" month="January"/> <abstract> <t>This document discusses both use cases and operational experience withthe following explanatory note.</t> <t>Note: This registry specifies the MPTCP Handshake Algorithms for MPTCP v1, which obsoletes the Experimental MPTCP v0. For the MPTCP v0 subtypes, please referMultipath TCP (MPTCP) in real networks. It lists several prominent use cases where Multipath TCP has been considered and is being used. It also gives insight toRFC6824.</t> <texttable anchor="table_crypto" title="MPTCP Handshake Algorithms"> <ttcol align="center">Flag Bit</ttcol> <ttcol align="center">Meaning</ttcol> <ttcol align="center">Reference</ttcol> <c>A</c> <c>Checksum required</c> <c>This document, <xref target="sec_init"/></c> <c>B</c> <c>Extensibility</c> <c>This document, <xref target="sec_init"/></c> <c>C</c> <c>Do not attemptsome heuristics and decisions that have helped toestablish new subflowsrealize these use cases and suggests possible improvements.</t> </abstract> </front> <seriesInfo name="RFC" value="8041"/> <seriesInfo name="DOI" value="10.17487/RFC8041"/> </reference> <reference anchor="RFC8126" target="https://www.rfc-editor.org/info/rfc8126" quoteTitle="true" derivedAnchor="RFC8126"> <front> <title>Guidelines for Writing an IANA Considerations Section in RFCs</title> <author initials="M." surname="Cotton" fullname="M. Cotton"> <organization showOnFrontPage="true"/> </author> <author initials="B." surname="Leiba" fullname="B. Leiba"> <organization showOnFrontPage="true"/> </author> <author initials="T." surname="Narten" fullname="T. Narten"> <organization showOnFrontPage="true"/> </author> <date year="2017" month="June"/> <abstract> <t>Many protocols make use of points of extensibility that use constants tothe source address.</c> <c>This document, <xref target="sec_init"/></c> <c>D-G</c> <c>Unassigned</c> <c></c> <c>H</c> <c>HMAC-SHA256</c> <c>This document, <xref target="sec_join"/></c> </texttable> <t>Noteidentify various protocol parameters. To ensure that themeanings of bits D through H can be dependent upon bit B, depending on how Extensibility is defined in future specifications; see <xref target="sec_init"/> for more information.</t> <t>Future assignmentsvalues inthis registry are alsothese fields do not have conflicting uses and tobe definedpromote interoperability, their allocations are often coordinated byStandards Action as defineda central record keeper. For IETF protocols, that role is filled by<xref target="RFC8126"/>. Assignments consist ofthevalue of the flags,Internet Assigned Numbers Authority (IANA).</t> <t>To make assignments in asymbolic name forgiven registry prudently, guidance describing thealgorithm,conditions under which new values should be assigned, as well as when anda referencehow modifications toits specification.</t> </section> <section anchor="IANA_rst" title="MP_TCPRST Reason Codes"> <t>IANAexisting values can be made, isrequested to createneeded. This document defines afurther sub-registry, "MPTCP MP_TCPRST Reason Codes" under the "Transmission Control Protocol (TCP) Parameters" registry, based on the reason code in MP_TCPRST (<xref target="sec_reset"/>) message. Initial valuesframework forthis registry are given in <xref target="table_rstcodes"/>; future assignments are to be defined by Specification Required as defined by <xref target="RFC8126"/>. Assignments consist ofthevalue of the code, a short descriptiondocumentation ofits meaning, and a referencethese guidelines by specification authors, in order toits specification. The maximum value is 0xff.</t> <t>Asassure that the provided guidancetofor theDesignated Expert <xref target="RFC8126"/>, assignments should not normally be refused unless codepoint space is becoming scarce, providing that thereIANA Considerations isacleardistinction from other, already-existing codes, and also providing there is sufficient guidance for implementors both sendingandreceiving these codes.</t> <texttable anchor="table_rstcodes" title="MPTCP MP_TCPRST Reason Codes"> <ttcol align="center">Code</ttcol> <ttcol align="center">Meaning</ttcol> <ttcol align="center">Reference</ttcol> <c>0x00</c> <c>Unspecified TCP error</c> <c>This document, <xref target="sec_reset"/></c> <c>0x01</c> <c>MPTCP specific error</c> <c>This document, <xref target="sec_reset"/></c> <c>0x02</c> <c>Lack of resources</c> <c>This document, <xref target="sec_reset"/></c> <c>0x03</c> <c>Administratively prohibited</c> <c>This document, <xref target="sec_reset"/></c> <c>0x04</c> <c>Too much outstanding data</c> <c>This document, <xref target="sec_reset"/></c> <c>0x05</c> <c>Unacceptable performance</c> <c>This document, <xref target="sec_reset"/></c> <c>0x06</c> <c>Middlebox interference</c> <c>This document, <xref target="sec_reset"/></c> </texttable> </section> </section> </middle> <!-- *****BACK MATTER ***** --> <back> <references title="Normative References"> &RFC0793; &RFC2104; &RFC2119; &RFC5961; &RFC6234; &RFC8174; </references> <references title="Informative References"> &RFC1122; &RFC7323; &RFC1918; &RFC2018; &RFC5681; &RFC2979; &RFC2992; &RFC3022; &RFC3135; &RFC4086; &RFC4987; &RFC8126; &RFC6181; &RFC6356; &RFC6897; &RFC6182; &RFC6528; &RFC7413; &RFC7430; &RFC8041; <!-- &TCPLO; draft-ananth-tcpm-tcpoptext-00; Expired-->addresses the various issues that are likely in the operation of a registry.</t> <t>This is the third edition of this document; it obsoletes RFC 5226.</t> </abstract> </front> <seriesInfo name="BCP" value="26"/> <seriesInfo name="RFC" value="8126"/> <seriesInfo name="DOI" value="10.17487/RFC8126"/> </reference> <referenceanchor='TCPLO'>anchor="I-D.ananth-tcpm-tcpoptext" quoteTitle="true" target="https://tools.ietf.org/html/draft-ananth-tcpm-tcpoptext-00" derivedAnchor="TCPLO"> <front> <title>TCP option space extension</title> <authorinitials='A' surname='Ramaiah' fullname='Anantha Ramaiah'>initials="A" surname="Ramaiah" fullname="Anantha Ramaiah"> <organization/>showOnFrontPage="true"/> </author> <datemonth='March' day='26' year='2012' /> <abstract><t>Themonth="March" day="26" year="2012"/> <abstract> <t>The document goals are as follows: Firstly, this document summarizes the motivations for extending TCP option space. Secondly, It tries to summarize the various known issues that needs to be taken into account while extending the TCP option space. Thirdly, it briefly provides a short summary of the various TCP option space proposals that has been proposed so far. Some additional proposals which includes variations to the existing proposals are also presented. The goal of this document is to rejuvenate the discussions on this topic and eventually to converge on a scheme for extending TCP optionspace.</t></abstract> </front> <seriesInfo name='Work in' value='Progress' /> </reference> <reference anchor='norm' target="http://www.usenix.org/events/sec01/full_papers/handley/handley.pdf"><front><title abbrev="Network Intrusion Detection: Evasion, Traffic Normalization, and End-to-End Protocol Semantics ">Network Intrusion Detection: Evasion, Traffic Normalization, and End-to-End Protocol Semantics</title><author initials='M.' surname='Handley' fullname='Mark Handley'><organization>ACIRI</organization></author><author initials='V.' surname='Paxson' fullname='Vern Paxson'><organization>ACIRI</organization></author><author initials='C.' surname='Kreibich' fullname='Christian Kreibich'><organization>Technische Universitat Munchen</organization></author><date year="2001"/></front><seriesInfo name="Usenix Security" value="2001"/></reference> <reference anchor='howhard' target="https://www.usenix.org/conference/nsdi12/how-hard-can-it-be-designing-and-implementing-deployable-multipath-tcp"> <front><title abbrev="How Hard Can It Be? Designing and Implementing a Deployable Multipath TCP">How Hard Can It Be? Designing and Implementing a Deployable Multipath TCP</title> <author initials='C.' surname='Raiciu' fullname='Costin Raiciu'><organization>Universitatea Politehnica Bucuresti</organization></author> <author initials='C.' surname='Paasch' fullname='Christoph Paasch'><organization>Universite Catholique de Louvain</organization></author> <author initials='S.' surname='Barre' fullname='Sebastien Barre'><organization>Universite Catholique de Louvain</organization></author> <author initials='A.' surname='Ford' fullname='Alan Ford'><organization/></author> <author initials='M.' surname='Honda' fullname='Michio Honda'><organization>Keio University</organization></author> <author initials='F.' surname='Duchene' fullname='Fabien Duchene'><organization>Universite Catholique de Louvain</organization></author> <author initials='O.' surname='Bonaventure' fullname='Olivier Bonaventure'><organization>Universite Catholique de Louvain</organization></author> <author initials='M.' surname='Handley' fullname='Mark Handley'><organization>University College London</organization></author> <date year="2012" />space.</t> </abstract> </front> <seriesInfoname="Usenix Symposium on Networked Systems Design and Implementation" value="2012"/>name="Internet-Draft" value="draft-ananth-tcpm-tcpoptext-00"/> <format type="TXT" target="http://www.ietf.org/internet-drafts/draft-ananth-tcpm-tcpoptext-00.txt"/> <refcontent>Work in Progress</refcontent> </reference><reference anchor='deployments' target="https://www.ietfjournal.org/multipath-tcp-deployments/"><front><title abbrev="MPTCP Deployments">Multipath TCP Deployments</title><author initials='O.' surname='Bonaventure' fullname='Olivier Bonaventure'><organization>Universite Catholique de Louvain</organization></author><author initials='S.' surname='Seo' fullname='SungHoon Seo'></author><date day="1" month="November" year="2016"/></front><seriesInfo name="IETF Journal" value="2016"/></reference></references> </references> <sectiontitle="Notesanchor="app_options" numbered="true" toc="include" removeInRFC="false" pn="section-appendix.a"> <name slugifiedName="name-notes-on-use-of-tcp-options">Notes on Use of TCPOptions" anchor="app_options"> <t>TheOptions</name> <t pn="section-appendix.a-1">The TCP option space is limited due to the length of the Data Offset field in the TCP header (4 bits), which defines the TCP header length in 32-bit words. With the standard TCP header being 20 bytes, this leaves a maximum of 40 bytes for options, and many of these may already be used by options such as timestamp and SACK.</t><t>We have<t pn="section-appendix.a-2">We performed a brief study on the commonly used TCP options in SYN, data, and pure ACKpackets,packets and found that there is enough room to fit all the optionswe propose usingdiscussed in this document.</t><t>SYN<t pn="section-appendix.a-3">SYN packets typically include the following options: Maximum Segment Size (MSS) (4 bytes), window scale (3 bytes), SACK permitted(2 bytes),(2 bytes), and timestamp(10 bytes) options. Together these(10 bytes). The sumto 19 bytes.of these options is 19 bytes. Some operating systems appear to pad each option up to a word boundary, thus using 24 bytes (a brief survey suggests that Windows XP and Mac OS X do this, whereas Linux does not). Optimistically, therefore, we have 21 bytesspare,available, or 16 ifit hasoptions have to be word-aligned. In either case, however, the SYN versions ofMultipath Capable (12 bytes)MP_CAPABLE (12 bytes) andJoinMP_JOIN (12 or16 bytes) options16 bytes) will fit in this remaining space.</t><t>Note<t pn="section-appendix.a-4">Note that due to the use of a 64-bit data-level sequence space, it is feasible that MPTCP will not require the timestamp option for protection against wrapped sequence numbers(PAWS(per the Protection Against Wrapped Sequences (PAWS) mechanism, as described in <xreftarget="RFC7323"/>),target="RFC7323" format="default" sectionFormat="of" derivedContent="RFC7323"/>), since the data-level sequence space has far less chance of wrapping. Confirmation of the validity of thisoptimisationoptimization is left for further study.</t><t>TCP<t pn="section-appendix.a-5">TCP data packets typically carry timestamp options in every packet, taking 10 bytes (or1212, with padding). That leaves 30 bytes (or 28, if word-aligned). TheData Sequence Signal (DSS)DSS option varies inlengthlength, depending onwhether(1) whether thedata sequence mapping and DATA_ACKData Sequence Mapping, DATA_ACK, or both are included,and whether(2) whether the sequence numbers in use are 4 or 8octets.octets, and (3) whether the checksum is present. The maximum size of the DSS option is 28 bytes, so even that will fit in the available space. But unless a connection is both bidirectional and high-bandwidth, it is unlikely that all that option space will be required on each DSS option.</t><t>Within<t pn="section-appendix.a-6">Within the DSS option, it is not necessary to include thedata sequence mappingData Sequence Mapping and DATA_ACK in each packet, and in many cases it may be possible to alternate their presence (so long as the mapping covers the data being sent in thefollowingsubsequent packet). It would also be possible to alternate between4-4-byte and 8-byte sequence numbers in each option.</t><t>On<t pn="section-appendix.a-7">On subflow and connection setup, an MPTCP option is also set on the third packet (an ACK). These are 20 bytes (forMultipath Capable)MP_CAPABLE) and24 bytes24 bytes (forJoin),MP_JOIN), both of which will fit in the available option space.</t><t>Pure<t pn="section-appendix.a-8">Pure ACKs in TCP typically contain only timestamps (10 bytes). Here, Multipath TCP typically needs to encode only the DATA_ACK (maximum of 12 bytes). Occasionally, ACKs will contain SACK information. Depending on the number of lost packets, SACK may utilize the entire option space. If a DATA_ACK had to be included, then it is probably necessary to reduce the number of SACK blocks to accommodate the DATA_ACK. However, the presence of the DATA_ACK is unlikely to be necessary in a case where SACK is in use, since until at least some of the SACK blocks have been retransmitted, the cumulative data-level ACK will not be moving forward (or if it does, due to retransmissions on another path, then that path can also be used to transmit the new DATA_ACK).</t><t>The<t pn="section-appendix.a-9">The ADD_ADDR option can be between 16 and 30 bytes, depending onwhether(1) whether IPv4 or IPv6 isused,used andwhether(2) whether or not the port number is present. It is unlikely that such signaling would fit in a data packet (although if there is space, it is fine to include it). It is recommendedto usethat duplicate ACKs not be used withnoany other payload oroptionsoptions, in order to transmit these rare signals. Note that this is the reason for mandating that duplicate ACKs with MPTCP optionsarenot be taken as a signal of congestion.</t> </section> <sectiontitle="TCPanchor="app_tfo" numbered="true" toc="include" removeInRFC="false" pn="section-appendix.b"> <name slugifiedName="name-tcp-fast-open-and-mptcp">TCP Fast Open andMPTCP" anchor="app_tfo"> <t>TCPMPTCP</name> <t pn="section-appendix.b-1">TCP Fast Open (TFO) is an experimental TCP extension, described in <xreftarget="RFC7413"/>,target="RFC7413" format="default" sectionFormat="of" derivedContent="RFC7413"/>, which has been introduced to allow the sending of data one RTT earlier than with regular TCP. This is considered a valuablegaingain, as very short connections are very common, especially for HTTP request/response schemes. It achieves this by sending theSYN-segmentSYN segment together with the application's data and allowing the listener to reply immediately with data after the SYN/ACK. <xreftarget="RFC7413"/>target="RFC7413" format="default" sectionFormat="of" derivedContent="RFC7413"/> secures thismechanism,mechanism by using a new TCP option that includes a cookiewhichthat is negotiated in a preceding connection.</t><t>When<t pn="section-appendix.b-2">When usingTCP Fast OpenTFO in conjunction with MPTCP, there are two key points to take into account, as detailedhereafter.</t>below.</t> <sectiontitle="TFO cookie requestanchor="tfocookie" numbered="true" toc="include" removeInRFC="false" pn="section-b.1"> <name slugifiedName="name-tfo-cookie-request-with-mpt">TFO Cookie Request withMPTCP" anchor="tfocookie"> <t>WhenMPTCP</name> <t pn="section-b.1-1">When a TFO initiator first connects to a listener, it cannot immediately include data in the SYN for security reasons <xreftarget="RFC7413"/>.target="RFC7413" format="default" sectionFormat="of" derivedContent="RFC7413"/>. Instead, it requests a cookie that will be used in subsequent connections. This is done with the TCP cookie request/response options, ofrespectively2 bytes and 6-18bytesbytes, respectively (depending on the chosen cookie length).</t><t>TFO<t pn="section-b.1-2">TFO and MPTCP can becombinedcombined, provided that the total length of all the options does not exceed the maximum 40 bytes possible in TCP:<list style="symbols"> <t>In</t> <ul spacing="normal" bare="false" empty="false" pn="section-b.1-3"> <li pn="section-b.1-3.1">In the SYN: MPTCP uses a4-bytes long4-byte MP_CAPABLE option. The sum of the MPTCP and TFO optionssum up tois 6 bytes. With typicalTCP-optionsTCP options using up to 19 bytes in the SYN (24 bytes if options are padded at a word boundary), there is enough space to combine the MP_CAPABLE with the TFOCookie Request.</t> <t>Incookie request.</li> <li pn="section-b.1-3.2">In theSYN+ACK:SYN + ACK: MPTCP uses a12-bytes long12-byte MP_CAPABLE option, but now the TFO option can be as long as 18 bytes. Since the maximum option length may be exceeded, it is up to the listener tosolveavoid this problem by using a shorter cookie. As an example, if we consider that 19 bytes are used for classical TCP options, the maximum possible cookie length would beof7 bytes. Notethatthat, for the SYN packet, the same limitation applies to subsequentconnections, for the SYN packetconnections (because the initiator then echoesbackthe cookie back to the listener). Finally, if the security impact of reducing the cookie size is not deemed acceptable, the listener can reduce the amount of space used by otherTCP-optionsTCP options by omitting the TCP timestamps (as outlined in <xreftarget="app_options"/>).</t> </list></t>target="app_options" format="default" sectionFormat="of" derivedContent="Appendix A"/>).</li> </ul> </section> <sectiontitle="Data sequence mappinganchor="tfodata" numbered="true" toc="include" removeInRFC="false" pn="section-b.2"> <name slugifiedName="name-data-sequence-mapping-under">Data Sequence Mapping underTFO" anchor="tfodata"> <t>MPTCP uses, inTFO</name> <t pn="section-b.2-1">In the TCP establishment phase, MPTCP uses a key exchange that is used to generate the Initial Data Sequence Numbers (IDSNs). In particular, the SYN with MP_CAPABLE occupies the first octet ofthedata sequence space. With TFO, one way to handle the data sent together with the SYN would be to consider an implicit DSS mapping that covers that SYN segment (since there is not enough space in the SYN to include a DSS option). The problem with that approach is that if a middlebox modifies the TFO data, this will not be noticed by MPTCP because of the absence of aDSS-checksum.DSS checksum. For example, aTCPTCP‑aware (but notMPTCP)-awareMPTCP-aware) middlebox could insert bytes at the beginning of the stream and adapt the TCP checksum and sequence numbers accordingly. With an implicit mapping, this information would give to the initiator and listener a different viewonof theDSS-mapping, withDSS mapping; there would be no way to detect thisinconsistency asinconsistency, because the DSS checksum is not present.</t><t>To<t pn="section-b.2-2">To solvethis,this issue, the TFO data must not be considered part of theData Sequence Numberdata sequence number space: the SYN with MP_CAPABLE still occupies the first octet of data sequence space, but then the first non-TFO data byte occupies the second octet. This guarantees that, if the use ofDSS-checksumthe DSS checksum is negotiated, all data in the data sequence number space is checksummed. We also note that this does not entail a loss of functionality, becauseTFO-dataTFO data is always only sent on the initialsubflowsubflow, before any attempt to create additional subflows.</t> </section> <sectiontitle="Connection establishment examples" anchor="tfoexamples"> <t>The following shows aanchor="tfoexamples" numbered="true" toc="include" removeInRFC="false" pn="section-b.3"> <name slugifiedName="name-connection-establishment-ex">Connection Establishment Examples</name> <t pn="section-b.3-1">A few examples of possibleTFO+MPTCP"TFO + MPTCP" establishmentscenarios.</t> <t>Beforescenarios are shown below.</t> <t pn="section-b.3-2">Before an initiator can send data together with the SYN, it must request a cookietofrom the listener, as shown in <xreftarget="fig_tfocookie"/>.target="fig_tfocookie" format="default" sectionFormat="of" derivedContent="Figure 18"/>. (Note: The sequence number and length are annotated in <xref target="fig_tfocookie" format="default" sectionFormat="of" derivedContent="Figure 18"/> as Seq(Length) (e.g., "S. 0(0)") and used as such in the subsequent figures (e.g., "S 0(20)" in <xref target="fig_tfodata" format="default" sectionFormat="of" derivedContent="Figure 19"/>).) This is done by simply combining the TFO and MPTCP options.</t> <figurealign="center"anchor="fig_tfocookie"title="Cookie request - sequence number and length are annotated as Seq(Length) and used hereafter in the figures.">align="left" suppress-title="false" pn="figure-18"> <name slugifiedName="name-cookie-request">Cookie Request</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-b.3-3.1"> initiator listener | | | S Seq=0(Length=0)<MP_CAPABLE>, <TFO<MP_CAPABLE>, <TFO cookierequest>request> | |----------------------------------------------------------->--------------------------------------------------------> | | | | S. 0(0) ack 1<MP_CAPABLE>, <TFO cookie><MP_CAPABLE>, <TFO cookie> | |<-----------------------------------------------------------<-------------------------------------------------------- | | | | . 0(0) ack 1<MP_CAPABLE><MP_CAPABLE> | |----------------------------------------------------------->--------------------------------------------------------> | | |]]></artwork></artwork> </figure><t>Once<t pn="section-b.3-4">Once this is done, the received cookie can be used for TFO, as shown in <xreftarget="fig_tfodata"/>.target="fig_tfodata" format="default" sectionFormat="of" derivedContent="Figure 19"/>. In this example, the initiator first sends 20 bytes in the SYN. The listener immediately replies with 100 bytes following theSYN-ACK uponSYN-ACK, to which the initiator replies with 20 more bytes. Note that the last segment in the figure has a TCP sequence number of 21, while the DSS subflow sequence number is 1 (because the TFO data is not part of the data sequence number space, as explained inSection<xreftarget="tfodata"/>.</t>target="tfodata" format="default" sectionFormat="of" derivedContent="Appendix B.2"/>.</t> <figurealign="center"anchor="fig_tfodata"title="The listener supports TFO">align="left" suppress-title="false" pn="figure-19"> <name slugifiedName="name-the-listener-supports-tfo">The Listener Supports TFO</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-b.3-5.1"> initiator listener | | | S 0(20)<MP_CAPABLE>, <TFO cookie><MP_CAPABLE>, <TFO cookie> | |----------------------------------------------------------->--------------------------------------------------------> | | | | S. 0(0) ack 21<MP_CAPABLE><MP_CAPABLE> | |<-----------------------------------------------------------<-------------------------------------------------------- | | | | . 1(100) ack 21<DSS<DSS ack=1 seq=1 ssn=1dlen=100>dlen=100> | |<-----------------------------------------------------------<-------------------------------------------------------- | | | | . 21(0) ack 1<MP_CAPABLE><MP_CAPABLE> | |----------------------------------------------------------->--------------------------------------------------------> | | | | . 21(20) ack 101<DSS<DSS ack=101 seq=1 ssn=1dlen=20>dlen=20> | |----------------------------------------------------------->--------------------------------------------------------> | | |]]></artwork></artwork> </figure><t>In<t pn="section-b.3-6">In <xreftarget="fig_tfofallback"/>,target="fig_tfofallback" format="default" sectionFormat="of" derivedContent="Figure 20"/>, the listener does not support TFO. The initiator detects that no state is created in the listener (as no data isacked),ACKed) and now sends the MP_CAPABLE in the thirdack,packet, in order for the listener to build its MPTCP context atthenthe end of the establishment. Now, thetfoTFO data, when retransmitted, becomes part of thedata sequence mappingData Sequence Mapping because it is effectively sent (in factre-sent)re‑sent) after the establishment.</t> <figurealign="center"anchor="fig_tfofallback"title="The listener does not support TFO">align="left" suppress-title="false" pn="figure-20"> <name slugifiedName="name-the-listener-does-not-suppo">The Listener Does Not Support TFO</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-b.3-7.1"> initiator listener | | | S 0(20)<MP_CAPABLE>, <TFO cookie><MP_CAPABLE>, <TFO cookie> | |----------------------------------------------------------->--------------------------------------------------------> | | | | S. 0(0) ack 1<MP_CAPABLE><MP_CAPABLE> | |<-----------------------------------------------------------<-------------------------------------------------------- | | | | . 1(0) ack 1<MP_CAPABLE><MP_CAPABLE> | |----------------------------------------------------------->--------------------------------------------------------> | | | | . 1(20) ack 1<DSS<DSS ack=1 seq=1 ssn=1dlen=20>dlen=20> | |----------------------------------------------------------->--------------------------------------------------------> | | | | . 0(0) ack 21<DSS<DSS ack=21 seq=1 ssn=1dlen=0>dlen=0> | |<-----------------------------------------------------------<-------------------------------------------------------- | | |]]></artwork></artwork> </figure><t>It<t pn="section-b.3-8">It is also possible that the listener acknowledges only part of the TFO data, as illustrated in <xreftarget="fig_tfopartial"/>.target="fig_tfopartial" format="default" sectionFormat="of" derivedContent="Figure 21"/>. The initiator will simply retransmit the missing data together with aDSS-mapping.</t>DSS mapping.</t> <figurealign="center"anchor="fig_tfopartial"title="Partial data acknowledgement">align="left" suppress-title="false" pn="figure-21"> <name slugifiedName="name-partial-data-acknowledgment">Partial Data Acknowledgment</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-b.3-9.1"> initiator listener | | | S 0(1000)<MP_CAPABLE>, <TFO cookie><MP_CAPABLE>, <TFO cookie> | |----------------------------------------------------------->--------------------------------------------------------> | | | | S. 0(0) ack 501<MP_CAPABLE><MP_CAPABLE> | |<-----------------------------------------------------------<-------------------------------------------------------- | | | | . 501(0) ack 1<MP_CAPABLE><MP_CAPABLE> | |----------------------------------------------------------->--------------------------------------------------------> | | | | . 501(500) ack 1<DSS<DSS ack=1 seq=1 ssn=1dlen=500>dlen=500> | |----------------------------------------------------------->--------------------------------------------------------> | | |]]></artwork></artwork> </figure> </section> </section> <sectiontitle="Control Blocks" anchor="app_tcb"> <t>Conceptually,anchor="app_tcb" numbered="true" toc="include" removeInRFC="false" pn="section-appendix.c"> <name slugifiedName="name-control-blocks">Control Blocks</name> <t pn="section-appendix.c-1">Conceptually, an MPTCP connection can be represented as an MPTCP protocol control block (PCB) that contains several variables that track the progress and the state of the MPTCP connection and a set of linked TCP control blocks that correspond to the subflows that have been established.</t><t>RFC<t pn="section-appendix.c-2">RFC 793 <xreftarget="RFC0793"/>target="RFC0793" format="default" sectionFormat="of" derivedContent="RFC0793"/> specifies several state variables. Whenever possible, we reuse the same terminology asRFC 793RFC 793 to describe the state variables that are maintained by MPTCP.</t> <sectiontitle="MPTCPnumbered="true" toc="include" removeInRFC="false" pn="section-c.1"> <name slugifiedName="name-mptcp-control-block">MPTCP ControlBlock"> <t>TheBlock</name> <t pn="section-c.1-1">The MPTCP control block contains the followingvariablevariables per connection.</t> <sectiontitle="Authentication and Metadata"> <t><list style="hanging"> <t hangText="Local.Tokennumbered="true" toc="include" removeInRFC="false" pn="section-c.1.1"> <name slugifiedName="name-authentication-and-metadata">Authentication and Metadata</name> <dl newline="false" spacing="normal" indent="3" pn="section-c.1.1-1"> <dt pn="section-c.1.1-1.1">Local.Token (32bits):">bits):</dt> <dd pn="section-c.1.1-1.2"> This is the token chosen by the local host on this MPTCP connection. The token must be unique among all established MPTCPconnections,connections and is generated from the localkey.</t> <t hangText="Local.Keykey.</dd> <dt pn="section-c.1.1-1.3">Local.Key (64bits):">bits):</dt> <dd pn="section-c.1.1-1.4"> This is the key sent by the local host on this MPTCPconnection.</t> <t hangText="Remote.Tokenconnection.</dd> <dt pn="section-c.1.1-1.5">Remote.Token (32bits):">bits):</dt> <dd pn="section-c.1.1-1.6"> This is the token chosen by the remote host on this MPTCP connection, generated from the remotekey.</t> <t hangText="Remote.Keykey.</dd> <dt pn="section-c.1.1-1.7">Remote.Key (64bits):">bits):</dt> <dd pn="section-c.1.1-1.8"> This is the key chosen by the remote host on this MPTCPconnection</t> <t hangText="MPTCP.Checksum (flag):">connection.</dd> <dt pn="section-c.1.1-1.9">MPTCP.Checksum (flag):</dt> <dd pn="section-c.1.1-1.10"> This flag is set to true if at least one of the hosts has set theA"A" bit in the MP_CAPABLE options exchanged during connectionestablishment, andestablishment; otherwise, it is set tofalse otherwise.false. If this flag is set, the checksum must be computed in all DSSoptions.</t> </list></t>options.</dd> </dl> </section> <sectiontitle="Sending Side"> <t><list style="hanging"> <t hangText="SND.UNAnumbered="true" toc="include" removeInRFC="false" pn="section-c.1.2"> <name slugifiedName="name-sending-side">Sending Side</name> <dl newline="false" spacing="normal" indent="3" pn="section-c.1.2-1"> <dt pn="section-c.1.2-1.1">SND.UNA (64bits):">bits):</dt> <dd pn="section-c.1.2-1.2"> This is the data sequence number of the next byte to be acknowledged, at the MPTCP connection level. This variable is updated upon reception of a DSS option containing aDATA_ACK.</t> <t hangText="SND.NXTDATA_ACK.</dd> <dt pn="section-c.1.2-1.3">SND.NXT (64bits):">bits):</dt> <dd pn="section-c.1.2-1.4"> This is the data sequence number of the next byte to be sent. SND.NXT is used to determine the value of the DSN in the DSSoption.</t> <t hangText="SND.WNDoption.</dd> <dt pn="section-c.1.2-1.5">SND.WND (32bits with RFC 7323, 16 bits otherwise):">bits):</dt> <dd pn="section-c.1.2-1.6"> This is thesendingsend window. 32 bits if the features in RFC 7323 are used; 16 bits otherwise. MPTCP maintains thesendingsend window at the MPTCP connectionlevellevel, and the same window is shared by all subflows. All subflows use the MPTCPconnection levelconnection-level SND.WND to compute the SEQ.WND value that is sent in each transmittedsegment.</t> </list></t>segment.</dd> </dl> </section> <sectiontitle="Receiving Side"> <t><list style="hanging"> <t hangText="RCV.NXTnumbered="true" toc="include" removeInRFC="false" pn="section-c.1.3"> <name slugifiedName="name-receiving-side">Receiving Side</name> <dl newline="false" spacing="normal" indent="3" pn="section-c.1.3-1"> <dt pn="section-c.1.3-1.1">RCV.NXT (64bits):">bits):</dt> <dd pn="section-c.1.3-1.2"> This is the data sequence number of the next byte that is expected on the MPTCP connection. This state variable is modified upon reception of in-order data. The value of RCV.NXT is used to specify the DATA_ACK that is sent in the DSS option on allsubflows.</t> <t hangText="RCV.WNDsubflows.</dd> <dt pn="section-c.1.3-1.3">RCV.WND (32bits with RFC 7323, 16 bits otherwise):">bits):</dt> <dd pn="section-c.1.3-1.4"> This is the connection-level receive window, which is the maximum of the RCV.WND on all thesubflows.</t> </list></t>subflows. 32 bits if the features in RFC 7323 are used; 16 bits otherwise.</dd> </dl> </section> </section> <sectiontitle="TCPnumbered="true" toc="include" removeInRFC="false" pn="section-c.2"> <name slugifiedName="name-tcp-control-blocks">TCP ControlBlocks"> <t>TheBlocks</name> <t pn="section-c.2-1">The MPTCP control block also contains a list of the TCP control blocks that are associated with the MPTCP connection.</t><t>Note<t pn="section-c.2-2">Note that the TCP control block on the TCP subflows does not contain the RCV.WND and SND.WND statevariablesvariables, as these are maintained at the MPTCP connection level and not at the subflow level.</t><t>Inside<t pn="section-c.2-3">Inside each TCP control block, the following state variables are defined.</t> <sectiontitle="Sending Side"> <t><list style="hanging"> <t hangText="SND.UNAnumbered="true" toc="include" removeInRFC="false" pn="section-c.2.1"> <name slugifiedName="name-sending-side-2">Sending Side</name> <dl newline="false" spacing="normal" indent="3" pn="section-c.2.1-1"> <dt pn="section-c.2.1-1.1">SND.UNA (32bits):">bits):</dt> <dd pn="section-c.2.1-1.2"> This is the sequence number of the next byte to be acknowledged on the subflow. This variable is updated upon reception of each TCP acknowledgment on thesubflow.</t> <t hangText="SND.NXTsubflow.</dd> <dt pn="section-c.2.1-1.3">SND.NXT (32bits):">bits):</dt> <dd pn="section-c.2.1-1.4"> This is the sequence number of the next byte to be sent on the subflow. SND.NXT is used to set the value of SEG.SEQ upon transmission of the nextsegment.</t> </list></t>segment.</dd> </dl> </section> <sectiontitle="Receiving Side"> <t><list style="hanging"> <t hangText="RCV.NXTnumbered="true" toc="include" removeInRFC="false" pn="section-c.2.2"> <name slugifiedName="name-receiving-side-2">Receiving Side</name> <dl newline="false" spacing="normal" indent="3" pn="section-c.2.2-1"> <dt pn="section-c.2.2-1.1">RCV.NXT (32bits):">bits):</dt> <dd pn="section-c.2.2-1.2"> This is the sequence number of the next byte that is expected on the subflow. This state variable is modified upon reception of in-order segments. The value of RCV.NXT is copied to the SEG.ACK field of the next segments transmitted on thesubflow.</t> <t hangText="RCV.WNDsubflow.</dd> <dt pn="section-c.2.2-1.3">RCV.WND (32bits with RFC 7323, 16 bits otherwise):"> Thisbits):</dt> <dd pn="section-c.2.2-1.4">This is the subflow-level receive window that is updated with the window field from the segments received on thissubflow.</t> </list></t>subflow. 32 bits if the features in RFC 7323 are used; 16 bits otherwise.</dd> </dl> </section> </section> </section> <sectiontitle="Finiteanchor="app_fsm" numbered="true" toc="include" removeInRFC="false" pn="section-appendix.d"> <name slugifiedName="name-finite-state-machine">Finite StateMachine" anchor="app_fsm"> <t>TheMachine</name> <t pn="section-appendix.d-1">The diagram in <xreftarget="fig_fsm"/>target="fig_fsm" format="default" sectionFormat="of" derivedContent="Figure 22"/> shows the Finite State Machine for connection-level closure. This illustrates how the DATA_FIN connection-level signal (indicated in the diagram as the DFIN flag on a DATA_ACK) (1) interacts with subflow-levelFINs,FINs and (2) permits"break-before-make"break-before-make handover between subflows.</t> <figurealign="center"anchor="fig_fsm"title="Finitealign="left" suppress-title="false" pn="figure-22"> <name slugifiedName="name-finite-state-machine-for-co">Finite State Machine for ConnectionClosure">Closure</name> <artworkalign="left"><![CDATA[align="left" name="" type="" alt="" pn="section-appendix.d-2.1"> +---------+ | M_ESTAB | +---------+ M_CLOSE | | rcv DATA_FIN ------- | | ------- +---------+ snd DATA_FIN / \ snd DATA_ACK[DFIN]+---------++-------+ | M_FIN|<----------------- ------------------->| M_CLOSE ||<----------------- ------------------->|M_CLOSE| | WAIT-1 |--------------------------- | WAIT | +---------+ rcv DATA_FIN \+---------++-------+ | rcv DATA_ACK[DFIN] ------- | M_CLOSE | | -------------- snd DATA_ACK | ------- | | CLOSE all subflows | snd DATA_FIN | V V V +-----------+ +-----------++-----------++----------+ |M_FINWAIT-2| | M_CLOSING || M_LAST-ACK| +-----------+|M_LAST-ACK| +-----------+ +-----------+ +----------+ | rcv DATA_ACK[DFIN] | rcv DATA_ACK[DFIN] | | rcv DATA_FIN -------------- | -------------- | | ------- CLOSE all subflows | CLOSE all subflows | | snd DATA_ACK[DFIN] V delete MPTCP PCB V \ +-----------++---------+ ------------------------>|M_TIME WAIT|----------------->| M_CLOSED|+--------+ ------------------------>|M_TIME WAIT|---------------->|M_CLOSED| +-----------++---------++--------+ All subflows in CLOSED ------------ delete MPTCP PCB]]></artwork></artwork> </figure> </section> <sectiontitle="Changesanchor="app_changelog" numbered="true" toc="include" removeInRFC="false" pn="section-appendix.e"> <name slugifiedName="name-changes-from-rfc-6824">Changes fromRFC6824" anchor="app_changelog"> <t>This sectionRFC 6824</name> <t pn="section-appendix.e-1">This appendix lists the key technical changes betweenRFC6824, specifying<xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>, which specifies MPTCPv0,v0; and this document, which obsoletesRFC6824<xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> and specifies MPTCP v1. Note that this specification is notbackwardsbackward compatible withRFC6824. <list style="symbols"> <t>The<xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>. </t> <ul spacing="normal" bare="false" empty="false" pn="section-appendix.e-2"> <li pn="section-appendix.e-2.1">This document incorporates lessonslearntlearned from the various implementations,deploymentsdeployments, and experiments gathered in the documents "Use Cases and Operational Experience with Multipath TCP" <xreftarget="RFC8041"/>target="RFC8041" format="default" sectionFormat="of" derivedContent="RFC8041"/> and the IETF Journal article "Multipath TCP Deployments" <xreftarget="deployments"/>.</t> <t>Connectiontarget="deployments" format="default" sectionFormat="of" derivedContent="deployments"/>.</li> <li pn="section-appendix.e-2.2">Connection initiation, through the exchange of the MP_CAPABLE MPTCP option, is different fromRFC6824.<xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/>. The SYN no longer includes the initiator's key,allowingto allow the MP_CAPABLE option on the SYN to be shorter inlength,length and to avoid duplicating the sending of keyingmaterial.</t> <t>Thismaterial.</li> <li pn="section-appendix.e-2.3">This also ensures reliable delivery of the key on the MP_CAPABLE option by allowing its transmission to be combined with data and thus using TCP'sin-builtbuilt-in reliability mechanism. If the initiator does not immediately have data to send, the MP_CAPABLE option with the keys will be repeated on the first data packet. If the other end is the first to send, then the presence of the DSS option implicitly confirms the receipt of theMP_CAPABLE.</t> <t>InMP_CAPABLE.</li> <li pn="section-appendix.e-2.4">In the Flags field of MP_CAPABLE,C"C" is now assigned to mean that the sender of this option will not accept additional MPTCP subflows to the source address and port. Thisis animproves efficiencyimprovement,-- forexampleexample, in cases where the sender is behind a strictNAT.</t> <t>InNAT.</li> <li pn="section-appendix.e-2.5">In the Flags field of MP_CAPABLE,H"H" now indicates the use of HMAC-SHA256 (rather thanHMAC-SHA1).</t> <t>ConnectionHMAC-SHA1).</li> <li pn="section-appendix.e-2.6">Connection initiation also defines the procedure for version negotiation, for implementations that support both v0(RFC6824)<xref target="RFC6824" format="default" sectionFormat="of" derivedContent="RFC6824"/> and v1 (thisdocument).</t> <t>Thedocument).</li> <li pn="section-appendix.e-2.7">The HMAC-SHA256 (rather than HMAC-SHA1) algorithm is used, asthe algorithmit provides better security. It is used to generate the token in the MP_JOIN and ADD_ADDRmessages,messages and to set theinitial data sequence number.</t> <t>AIDSN.</li> <li pn="section-appendix.e-2.8">A new subflow-level option exists to signal reasons for sending a RST on a subflow (MP_TCPRST<xref target="sec_reset"/>), which(<xref target="sec_reset" format="default" sectionFormat="of" derivedContent="Section 3.6"/>)); this can help an implementation decide whether to attempt laterre-connection.</t> <t>Thereconnection.</li> <li pn="section-appendix.e-2.9">The MP_PRIO option (<xreftarget="sec_policy"/>),target="sec_policy" format="default" sectionFormat="of" derivedContent="Section 3.3.8"/>), which is used to signal a change of priority for a subflow, no longer includes the AddrID field. Its purpose was to allow the changed priority to be applied on a subflow other than the one it was sent on. However, ithas been realisedwas determined that this could be used by a man-in-the-middle to divert all trafficon toonto its own path, and MP_PRIO does not include a token or other type of securitymechanism.</t> <t>Themechanism.</li> <li pn="section-appendix.e-2.10">The ADD_ADDR option (<xreftarget="sec_add_address"/>),target="sec_add_address" format="default" sectionFormat="of" derivedContent="Section 3.4.1"/>), which is used to inform the other host about another potential address, is different in several ways. It now includes an HMAC of the added address, for enhanced security. In addition, reliability for the ADD_ADDR option has been added: the IPVer field is replaced with a flag field, and one flag is assigned(E) which("E") that is used as an'Echo'"echo" so a host can indicate that it has received theoption.</t> <t>Anoption.</li> <li pn="section-appendix.e-2.11">This document describes an additional way of performing a Fast Closeis described,-- by sendingaan MP_FASTCLOSE option on a RST on all subflows. This allows the host to tear down the subflows and the connectionimmediately.</t> <t>Inimmediately.</li> <li pn="section-appendix.e-2.12">IANA has reserved theIANA registry a newMPTCP option subtypeoption, MP_EXPERIMENTAL, is reservedof value 0xf forprivate experiments. However, thePrivate Use (<xref target="IANA_subtypes" format="default" sectionFormat="of" derivedContent="Section 7.2"/>). This document doesn't define how to usethe subtype option.</t> <t>Athat value.</li> <li pn="section-appendix.e-2.13">This document adds a newAppendixappendix (<xref target="app_tfo" format="default" sectionFormat="of" derivedContent="Appendix B"/>), which discusses the usage of boththeMPTCP options andTCP Fast OpenTFO options on the samepacket (<xref target="app_tfo"/>).</t> </list></t>packet.</li> </ul> </section> <section anchor="Acknowledgments" numbered="false" toc="include" removeInRFC="false" pn="section-appendix.f"> <name slugifiedName="name-acknowledgments">Acknowledgments</name> <t pn="section-appendix.f-1">The authors gratefully acknowledge significant input into this document from <contact fullname="Sebastien Barre"/> and <contact fullname="Andrew McDonald"/>.</t> <t pn="section-appendix.f-2">The authors also wish to acknowledge reviews and contributions from <contact fullname="Iljitsch van Beijnum"/>, <contact fullname="Lars Eggert"/>, <contact fullname="Marcelo Bagnulo"/>, <contact fullname="Robert Hancock"/>, <contact fullname="Pasi Sarolahti"/>, <contact fullname="Toby Moncaster"/>, <contact fullname="Philip Eardley"/>, <contact fullname="Sergio Lembo"/>, <contact fullname="Lawrence Conroy"/>, <contact fullname="Yoshifumi Nishida"/>, <contact fullname="Bob Briscoe"/>, <contact fullname="Stein Gjessing"/>, <contact fullname="Andrew McGregor"/>, <contact fullname="Georg Hampel"/>, <contact fullname="Anumita Biswas"/>, <contact fullname="Wes Eddy"/>, <contact fullname="Alexey Melnikov"/>, <contact fullname="Francis Dupont"/>, <contact fullname="Adrian Farrel"/>, <contact fullname="Barry Leiba"/>, <contact fullname="Robert Sparks"/>, <contact fullname="Sean Turner"/>, <contact fullname="Stephen Farrell"/>, <contact fullname="Martin Stiemerling"/>, <contact fullname="Gregory Detal"/>, <contact fullname="Fabien Duchene"/>, <contact fullname="Xavier de Foy"/>, <contact fullname="Rahul Jadhav"/>, <contact fullname="Klemens Schragel"/>, <contact fullname="Mirja Kühlewind"/>, <contact fullname="Sheng Jiang"/>, <contact fullname="Alissa Cooper"/>, <contact fullname="Ines Robles"/>, <contact fullname="Roman Danyliw"/>, <contact fullname="Adam Roach"/>, <contact fullname="Eric Vyncke"/>, and <contact fullname="Ben Kaduk"/>.</t> </section> <section anchor="authors-addresses" numbered="false" removeInRFC="false" toc="include" pn="section-appendix.g"> <name slugifiedName="name-authors-addresses">Authors' Addresses</name> <author fullname="Alan Ford" initials="A." surname="Ford"> <organization showOnFrontPage="true">Pexip</organization> <address> <email>alan.ford@gmail.com</email> </address> </author> <author fullname="Costin Raiciu" initials="C." surname="Raiciu"> <organization abbrev="U. Politehnica of Bucharest" showOnFrontPage="true">University Politehnica of Bucharest</organization> <address> <postal> <street>Splaiul Independentei 313</street> <city>Bucharest</city> <country>Romania</country> </postal> <email>costin.raiciu@cs.pub.ro</email> </address> </author> <author fullname="Mark Handley" initials="M." surname="Handley"> <organization abbrev="U. College London" showOnFrontPage="true">University College London</organization> <address> <postal> <street>Gower Street</street> <city>London</city> <code>WC1E 6BT</code> <country>United Kingdom</country> </postal> <email>m.handley@cs.ucl.ac.uk</email> </address> </author> <author fullname="Olivier Bonaventure" initials="O." surname="Bonaventure"> <organization abbrev="U. catholique de Louvain" ascii="Universite catholique de Louvain" showOnFrontPage="true">Université catholique de Louvain</organization> <address> <postal> <street>Pl. Ste Barbe, 2</street> <code>1348</code> <city>Louvain-la-Neuve</city> <country>Belgium</country> </postal> <email>olivier.bonaventure@uclouvain.be</email> </address> </author> <author fullname="Christoph Paasch" initials="C." surname="Paasch"> <organization abbrev="Apple, Inc." showOnFrontPage="true">Apple, Inc.</organization> <address> <postal> <street/> <city>Cupertino</city> <region>CA</region> <country>United States of America</country> </postal> <email>cpaasch@apple.com</email> </address> </author> </section> </back> </rfc>