| rfc9626xml2.original.xml | rfc9626.xml | |||
|---|---|---|---|---|
| <?xml version="1.0" encoding="UTF-8"?> | <?xml version='1.0' encoding='utf-8'?> | |||
| <!DOCTYPE rfc > | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" version="3" category="exp" docNa | |||
| <?rfc compact="yes"?> | me="draft-ietf-avtext-framemarking-16" number="9626" consensus="true" updates="" | |||
| <?rfc subcompact="yes"?> | obsoletes="" ipr="trust200902" submissionType="IETF" symRefs="true" sortRefs="t | |||
| <?rfc iprnotified="no" ?> | rue" tocInclude="true" tocDepth="4" xml:lang="en" prepTime="2025-03-27T17:04:02" | |||
| <?rfc strict="yes"?> | indexInclude="true" scripts="Common,Latin"> | |||
| <?rfc symrefs="yes"?> | <link href="https://datatracker.ietf.org/doc/draft-ietf-avtext-framemarking-16 | |||
| <?rfc toc="yes"?> | " rel="prev"/> | |||
| <?rfc tocdepth="4"?> | <link href="https://dx.doi.org/10.17487/rfc9626" rel="alternate"/> | |||
| <link href="urn:issn:2070-1721" rel="alternate"/> | ||||
| <rfc category="exp" docName="draft-ietf-avtext-framemarking-16" ipr="trust200902 | ||||
| " submissionType="IETF"> | ||||
| <front> | <front> | |||
| <title abbrev="Video Frame Marking">Video Frame Marking RTP Header Extension </title> | <title abbrev="Video Frame Marking">Video Frame Marking RTP Header Extension </title> | |||
| <seriesInfo name="RFC" value="9626" stream="IETF"/> | ||||
| <author fullname="Mo Zanaty" initials="M" surname="Zanaty"> | <author fullname="Mo Zanaty" initials="M" surname="Zanaty"> | |||
| <organization>Cisco Systems</organization> | <organization showOnFrontPage="true">Cisco Systems</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>170 West Tasman Drive</street> | <street>170 West Tasman Drive</street> | |||
| <city>San Jose</city> | <city>San Jose</city> | |||
| <region>CA</region> | <region>CA</region> | |||
| <code>95134</code> | <code>95134</code> | |||
| <country>US</country> | <country>United States of America</country> | |||
| </postal> | </postal> | |||
| <email>mzanaty@cisco.com</email> | <email>mzanaty@cisco.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author initials="E." surname="Berger" fullname="Espen Berger"> | <author initials="E." surname="Berger" fullname="Espen Berger"> | |||
| <organization>Cisco Systems</organization> | <organization showOnFrontPage="true">Cisco Systems</organization> | |||
| <address> | <address> | |||
| <email>espeberg@cisco.com</email> | <email>espeberg@cisco.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <author fullname="Suhas Nandakumar" initials="S" surname="Nandakumar"> | <author fullname="Suhas Nandakumar" initials="S" surname="Nandakumar"> | |||
| <organization>Cisco Systems</organization> | <organization showOnFrontPage="true">Cisco Systems</organization> | |||
| <address> | <address> | |||
| <postal> | <postal> | |||
| <street>170 West Tasman Drive</street> | <street>170 West Tasman Drive</street> | |||
| <city>San Jose</city> | <city>San Jose</city> | |||
| <region>CA</region> | <region>CA</region> | |||
| <code>95134</code> | <code>95134</code> | |||
| <country>US</country> | <country>United States of America</country> | |||
| </postal> | </postal> | |||
| <email>snandaku@cisco.com</email> | <email>snandaku@cisco.com</email> | |||
| </address> | </address> | |||
| </author> | </author> | |||
| <date day="04" month="March" year="2024"/> | <date month="03" year="2025"/> | |||
| <area>WIT</area> | ||||
| <area>Applications</area> | <workgroup>avtcore</workgroup> | |||
| <keyword>Internet-Draft</keyword> | <abstract pn="section-abstract"> | |||
| <t indent="0" pn="section-abstract-1">This document describes a Video Fram | ||||
| <abstract> | e Marking RTP header extension used to | |||
| <t>This document describes a Video Frame Marking RTP header extension used | ||||
| to | ||||
| convey information about video frames that is critical for error recovery | convey information about video frames that is critical for error recovery | |||
| and packet forwarding in RTP middleboxes or network nodes. It is most | and packet forwarding in RTP middleboxes or network nodes. It is most | |||
| useful when media is encrypted, and essential when the middlebox or node | useful when media is encrypted and essential when the middlebox or node | |||
| has no access to the media decryption keys. It is also useful for | has no access to the media decryption keys. It is also useful for | |||
| codec-agnostic processing of encrypted or unencrypted media, while it also | codec-agnostic processing of encrypted or unencrypted media, while it also | |||
| supports extensions for codec-specific information.</t> | supports extensions for codec-specific information.</t> | |||
| </abstract> | </abstract> | |||
| <boilerplate> | ||||
| <section anchor="status-of-memo" numbered="false" removeInRFC="false" toc= | ||||
| "exclude" pn="section-boilerplate.1"> | ||||
| <name slugifiedName="name-status-of-this-memo">Status of This Memo</name | ||||
| > | ||||
| <t indent="0" pn="section-boilerplate.1-1"> | ||||
| This document is not an Internet Standards Track specification; it i | ||||
| s | ||||
| published for examination, experimental implementation, and | ||||
| evaluation. | ||||
| </t> | ||||
| <t indent="0" pn="section-boilerplate.1-2"> | ||||
| This document defines an Experimental Protocol for the Internet | ||||
| community. This document is a product of the Internet Engineering | ||||
| Task Force (IETF). It represents the consensus of the IETF communit | ||||
| y. | ||||
| It has received public review and has been approved for publication | ||||
| by the Internet Engineering Steering Group (IESG). Not all document | ||||
| s | ||||
| approved by the IESG are candidates for any level of Internet | ||||
| Standard; see Section 2 of RFC 7841. | ||||
| </t> | ||||
| <t indent="0" pn="section-boilerplate.1-3"> | ||||
| Information about the current status of this document, any | ||||
| errata, and how to provide feedback on it may be obtained at | ||||
| <eref target="https://www.rfc-editor.org/info/rfc9626" brackets="non | ||||
| e"/>. | ||||
| </t> | ||||
| </section> | ||||
| <section anchor="copyright" numbered="false" removeInRFC="false" toc="excl | ||||
| ude" pn="section-boilerplate.2"> | ||||
| <name slugifiedName="name-copyright-notice">Copyright Notice</name> | ||||
| <t indent="0" pn="section-boilerplate.2-1"> | ||||
| Copyright (c) 2025 IETF Trust and the persons identified as the | ||||
| document authors. All rights reserved. | ||||
| </t> | ||||
| <t indent="0" pn="section-boilerplate.2-2"> | ||||
| This document is subject to BCP 78 and the IETF Trust's Legal | ||||
| Provisions Relating to IETF Documents | ||||
| (<eref target="https://trustee.ietf.org/license-info" brackets="none | ||||
| "/>) in effect on the date of | ||||
| publication of this document. Please review these documents | ||||
| carefully, as they describe your rights and restrictions with | ||||
| respect to this document. Code Components extracted from this | ||||
| document must include Revised BSD License text as described in | ||||
| Section 4.e of the Trust Legal Provisions and are provided without | ||||
| warranty as described in the Revised BSD License. | ||||
| </t> | ||||
| </section> | ||||
| </boilerplate> | ||||
| <toc> | ||||
| <section anchor="toc" numbered="false" removeInRFC="false" toc="exclude" p | ||||
| n="section-toc.1"> | ||||
| <name slugifiedName="name-table-of-contents">Table of Contents</name> | ||||
| <ul bare="true" empty="true" indent="2" spacing="compact" pn="section-to | ||||
| c.1-1"> | ||||
| <li pn="section-toc.1-1.1"> | ||||
| <t indent="0" keepWithNext="true" pn="section-toc.1-1.1.1"><xref der | ||||
| ivedContent="1" format="counter" sectionFormat="of" target="section-1"/>. <xref | ||||
| derivedContent="" format="title" sectionFormat="of" target="name-introduction"> | ||||
| Introduction</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.2"> | ||||
| <t indent="0" keepWithNext="true" pn="section-toc.1-1.2.1"><xref der | ||||
| ivedContent="2" format="counter" sectionFormat="of" target="section-2"/>. <xref | ||||
| derivedContent="" format="title" sectionFormat="of" target="name-requirements-l | ||||
| anguage">Requirements Language</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3"> | ||||
| <t indent="0" pn="section-toc.1-1.3.1"><xref derivedContent="3" form | ||||
| at="counter" sectionFormat="of" target="section-3"/>. <xref derivedContent="" f | ||||
| ormat="title" sectionFormat="of" target="name-video-frame-marking-rtp-hea">Video | ||||
| Frame Marking RTP Header Extension</xref></t> | ||||
| <ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
| n-toc.1-1.3.2"> | ||||
| <li pn="section-toc.1-1.3.2.1"> | ||||
| <t indent="0" keepWithNext="true" pn="section-toc.1-1.3.2.1.1">< | ||||
| xref derivedContent="3.1" format="counter" sectionFormat="of" target="section-3. | ||||
| 1"/>. <xref derivedContent="" format="title" sectionFormat="of" target="name-lo | ||||
| ng-extension-for-scalable">Long Extension for Scalable Streams</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.2"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.2.1"><xref derivedContent= | ||||
| "3.2" format="counter" sectionFormat="of" target="section-3.2"/>. <xref derived | ||||
| Content="" format="title" sectionFormat="of" target="name-short-extension-for-no | ||||
| n-sca">Short Extension for Non-Scalable Streams</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.3"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.1"><xref derivedContent= | ||||
| "3.3" format="counter" sectionFormat="of" target="section-3.3"/>. <xref derived | ||||
| Content="" format="title" sectionFormat="of" target="name-lid-mappings-for-scala | ||||
| ble-s">LID Mappings for Scalable Streams</xref></t> | ||||
| <ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
| ction-toc.1-1.3.2.3.2"> | ||||
| <li pn="section-toc.1-1.3.2.3.2.1"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.2.1.1"><xref derived | ||||
| Content="3.3.1" format="counter" sectionFormat="of" target="section-3.3.1"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-vp9-lid-ma | ||||
| pping">VP9 LID Mapping</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.3.2.2"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.2.2.1"><xref derived | ||||
| Content="3.3.2" format="counter" sectionFormat="of" target="section-3.3.2"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-h265-lid-m | ||||
| apping">H265 LID Mapping</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.3.2.3"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.2.3.1"><xref derived | ||||
| Content="3.3.3" format="counter" sectionFormat="of" target="section-3.3.3"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-h264-scala | ||||
| ble-video-coding-">H264 Scalable Video Coding (SVC) LID Mapping</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.3.2.4"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.2.4.1"><xref derived | ||||
| Content="3.3.4" format="counter" sectionFormat="of" target="section-3.3.4"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-h264-advan | ||||
| ced-video-coding-">H264 Advanced Video Coding (AVC) LID Mapping</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.3.2.5"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.2.5.1"><xref derived | ||||
| Content="3.3.5" format="counter" sectionFormat="of" target="section-3.3.5"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-vp8-lid-ma | ||||
| pping">VP8 LID Mapping</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.3.2.6"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.3.2.6.1"><xref derived | ||||
| Content="3.3.6" format="counter" sectionFormat="of" target="section-3.3.6"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-future-cod | ||||
| ec-lid-mapping">Future Codec LID Mapping</xref></t> | ||||
| </li> | ||||
| </ul> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.4"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.4.1"><xref derivedContent= | ||||
| "3.4" format="counter" sectionFormat="of" target="section-3.4"/>. <xref derived | ||||
| Content="" format="title" sectionFormat="of" target="name-signaling-information" | ||||
| >Signaling Information</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.5"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.5.1"><xref derivedContent= | ||||
| "3.5" format="counter" sectionFormat="of" target="section-3.5"/>. <xref derived | ||||
| Content="" format="title" sectionFormat="of" target="name-usage-considerations"> | ||||
| Usage Considerations</xref></t> | ||||
| <ul bare="true" empty="true" indent="2" spacing="compact" pn="se | ||||
| ction-toc.1-1.3.2.5.2"> | ||||
| <li pn="section-toc.1-1.3.2.5.2.1"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.5.2.1.1"><xref derived | ||||
| Content="3.5.1" format="counter" sectionFormat="of" target="section-3.5.1"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-relation-t | ||||
| o-layer-refresh-r">Relation to Layer Refresh Request (LRR)</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.3.2.5.2.2"> | ||||
| <t indent="0" pn="section-toc.1-1.3.2.5.2.2.1"><xref derived | ||||
| Content="3.5.2" format="counter" sectionFormat="of" target="section-3.5.2"/>. < | ||||
| xref derivedContent="" format="title" sectionFormat="of" target="name-scalabilit | ||||
| y-structures">Scalability Structures</xref></t> | ||||
| </li> | ||||
| </ul> | ||||
| </li> | ||||
| </ul> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.4"> | ||||
| <t indent="0" pn="section-toc.1-1.4.1"><xref derivedContent="4" form | ||||
| at="counter" sectionFormat="of" target="section-4"/>. <xref derivedContent="" f | ||||
| ormat="title" sectionFormat="of" target="name-security-and-privacy-consid">Secur | ||||
| ity and Privacy Considerations</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.5"> | ||||
| <t indent="0" pn="section-toc.1-1.5.1"><xref derivedContent="5" form | ||||
| at="counter" sectionFormat="of" target="section-5"/>. <xref derivedContent="" f | ||||
| ormat="title" sectionFormat="of" target="name-iana-considerations">IANA Consider | ||||
| ations</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.6"> | ||||
| <t indent="0" pn="section-toc.1-1.6.1"><xref derivedContent="6" form | ||||
| at="counter" sectionFormat="of" target="section-6"/>. <xref derivedContent="" f | ||||
| ormat="title" sectionFormat="of" target="name-references">References</xref></t> | ||||
| <ul bare="true" empty="true" indent="2" spacing="compact" pn="sectio | ||||
| n-toc.1-1.6.2"> | ||||
| <li pn="section-toc.1-1.6.2.1"> | ||||
| <t indent="0" pn="section-toc.1-1.6.2.1.1"><xref derivedContent= | ||||
| "6.1" format="counter" sectionFormat="of" target="section-6.1"/>. <xref derived | ||||
| Content="" format="title" sectionFormat="of" target="name-normative-references"> | ||||
| Normative References</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.6.2.2"> | ||||
| <t indent="0" pn="section-toc.1-1.6.2.2.1"><xref derivedContent= | ||||
| "6.2" format="counter" sectionFormat="of" target="section-6.2"/>. <xref derived | ||||
| Content="" format="title" sectionFormat="of" target="name-informative-references | ||||
| ">Informative References</xref></t> | ||||
| </li> | ||||
| </ul> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.7"> | ||||
| <t indent="0" pn="section-toc.1-1.7.1"><xref derivedContent="" forma | ||||
| t="none" sectionFormat="of" target="section-appendix.a"/><xref derivedContent="" | ||||
| format="title" sectionFormat="of" target="name-acknowledgements">Acknowledgemen | ||||
| ts</xref></t> | ||||
| </li> | ||||
| <li pn="section-toc.1-1.8"> | ||||
| <t indent="0" pn="section-toc.1-1.8.1"><xref derivedContent="" forma | ||||
| t="none" sectionFormat="of" target="section-appendix.b"/><xref derivedContent="" | ||||
| format="title" sectionFormat="of" target="name-authors-addresses">Authors' Addr | ||||
| esses</xref></t> | ||||
| </li> | ||||
| </ul> | ||||
| </section> | ||||
| </toc> | ||||
| </front> | </front> | |||
| <middle> | <middle> | |||
| <section anchor="intro" numbered="true" removeInRFC="false" toc="include" pn | ||||
| <section title="Introduction" anchor="intro"> | ="section-1"> | |||
| <t>Many widely deployed RTP <xref target="RFC3550" /> topologies | <name slugifiedName="name-introduction">Introduction</name> | |||
| <xref target="RFC7667" /> used in modern voice and video | <t indent="0" pn="section-1-1">Many widely deployed RTP <xref target="RFC3 | |||
| 550" format="default" sectionFormat="of" derivedContent="RFC3550"/> topologies | ||||
| <xref target="RFC7667" format="default" sectionFormat="of" derivedConten | ||||
| t="RFC7667"/> used in modern voice and video | ||||
| conferencing systems include a centralized component that acts as an RTP s witch. | conferencing systems include a centralized component that acts as an RTP s witch. | |||
| It receives voice and video streams from each participant, which may be en crypted using | It receives voice and video streams from each participant, which may be en crypted using | |||
| SRTP <xref target="RFC3711" />, or extensions that provide participants wi | Secure Real-time Transport Protocol (SRTP) <xref target="RFC3711" format=" | |||
| th | default" sectionFormat="of" derivedContent="RFC3711"/> or extensions that provid | |||
| private media <xref target="RFC8871" /> | e participants with | |||
| private media <xref target="RFC8871" format="default" sectionFormat="of" d | ||||
| erivedContent="RFC8871"/> | ||||
| via end-to-end encryption where the switch has no access to media decrypti on keys. | via end-to-end encryption where the switch has no access to media decrypti on keys. | |||
| The goal is to provide a set of streams back to | The goal is to provide a set of streams back to | |||
| the participants which enable them to render the right media content. In a | the participants, which enable them to render the right media content. For | |||
| simple video configuration, for example, the goal will be that each partic | example, in a | |||
| ipant | simple video configuration, the goal will be that each participant | |||
| sees and hears just the active speaker. In that case, the goal of the swit ch is to | sees and hears just the active speaker. In that case, the goal of the swit ch is to | |||
| receive the voice and video streams from each participant, determine the a ctive | receive the voice and video streams from each participant, determine the a ctive | |||
| speaker based on energy in the voice packets, possibly using the client-to -mixer | speaker based on energy in the voice packets, possibly using the client-to -mixer | |||
| audio level RTP header extension <xref target="RFC6464" />, and select the | audio level RTP header extension <xref target="RFC6464" format="default" s | |||
| corresponding video | ectionFormat="of" derivedContent="RFC6464"/>, and select the corresponding video | |||
| stream for transmission to participants; see <xref target="rtpswitch" /> | stream for transmission to participants; see <xref target="rtpswitch" fo | |||
| .</t> | rmat="default" sectionFormat="of" derivedContent="Figure 1"/>.</t> | |||
| <t indent="0" pn="section-1-2">In this document, an "RTP switch" is used a | ||||
| <t>In this document, an "RTP switch" is used as a common short term for th | s shorthand for the terms | |||
| e terms | ||||
| "switching RTP mixer", "source projecting middlebox", | "switching RTP mixer", "source projecting middlebox", | |||
| "source forwarding unit/middlebox" and "video switching MCU" as | "source forwarding unit/middlebox" and "video switching Multipoint Control | |||
| discussed in <xref target="RFC7667" />.</t> | Unit (MCU)", as | |||
| discussed in <xref target="RFC7667" format="default" sectionFormat="of" de | ||||
| <figure title="RTP switch" anchor="rtpswitch"><artwork><![CDATA[ | rivedContent="RFC7667"/>.</t> | |||
| <figure anchor="rtpswitch" align="left" suppress-title="false" pn="figure- | ||||
| 1"> | ||||
| <name slugifiedName="name-rtp-switch">RTP Switch</name> | ||||
| <artwork align="left" pn="section-1-3.1"> | ||||
| +---+ +------------+ +---+ | +---+ +------------+ +---+ | |||
| | A |<---->| |<---->| B | | | A |<---->| |<---->| B | | |||
| +---+ | | +---+ | +---+ | | +---+ | |||
| | RTP | | | RTP | | |||
| +---+ | Switch | +---+ | +---+ | Switch | +---+ | |||
| | C |<---->| |<---->| D | | | C |<---->| |<---->| D | | |||
| +---+ +------------+ +---+ | +---+ +------------+ +---+ | |||
| ]]> | ||||
| </artwork></figure> | ||||
| <t>In order to properly support switching of video streams, the RTP switch t | </artwork> | |||
| ypically needs | </figure> | |||
| <t indent="0" pn="section-1-4">In order to properly support the switching | ||||
| of video streams, the RTP switch typically needs | ||||
| some critical information about video frames in order to start and stop forw arding streams. | some critical information about video frames in order to start and stop forw arding streams. | |||
| <list style="symbols"> | </t> | |||
| <t>Because of inter-frame dependencies, it should ideally switch video s | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section-1-5 | |||
| treams at a point | "> | |||
| <li pn="section-1-5.1"> | ||||
| <t indent="0" pn="section-1-5.1.1">Because of inter-frame dependencies | ||||
| , it should ideally switch video streams at a point | ||||
| where the first frame from the new speaker can be decoded by recipients without prior | where the first frame from the new speaker can be decoded by recipients without prior | |||
| frames, e.g switch on an intra-frame.</t> | frames, e.g., switch on an intra-frame.</t> | |||
| <t>In many cases, the switch may need to drop frames in order to realize | </li> | |||
| congestion control | <li pn="section-1-5.2"> | |||
| techniques, and needs to know which frames can be dropped with minimal i | <t indent="0" pn="section-1-5.2.1">In many cases, the switch may need | |||
| mpact to video quality.</t> | to drop frames in order to realize congestion control | |||
| <t>For scalable streams with dependent layers, the switch may need to se | techniques, and it needs to know which frames can be dropped with minima | |||
| lectively forward | l impact to video quality.</t> | |||
| </li> | ||||
| <li pn="section-1-5.3"> | ||||
| <t indent="0" pn="section-1-5.3.1">For scalable streams with dependent | ||||
| layers, the switch may need to selectively forward | ||||
| specific layers to specific recipients due to recipient bandwidth or dec oder limits.</t> | specific layers to specific recipients due to recipient bandwidth or dec oder limits.</t> | |||
| </list> | </li> | |||
| </t> | </ul> | |||
| <t indent="0" pn="section-1-6">Furthermore, it is highly desirable to do t | ||||
| <t>Furthermore, it is highly desirable to do this in a payload format-agno | his in a payload format-agnostic way that is not | |||
| stic way which is not | ||||
| specific to each different video codec. | specific to each different video codec. | |||
| Most modern video codecs share common concepts around frame types and ot her critical information | Most modern video codecs share common concepts around frame types and ot her critical information | |||
| to make this codec-agnostic handling possible.</t> | to make this codec-agnostic handling possible.</t> | |||
| <t indent="0" pn="section-1-7">It is also desirable to be able to do this | ||||
| <t>It is also desirable to be able to do this for SRTP without requiring t | for SRTP without requiring the video switch to | |||
| he video switch to | decrypt the packets. SRTP will encrypt the RTP payload format contents; | |||
| decrypt the packets. SRTP will encrypt the RTP payload format contents a | consequently, this | |||
| nd consequently this | ||||
| data is not usable for the switching function without decryption, which may not even | data is not usable for the switching function without decryption, which may not even | |||
| be possible in the case of end-to-end encryption of private media | be possible in the case of end-to-end encryption of private media | |||
| <xref target="RFC8871" />.</t> | <xref target="RFC8871" format="default" sectionFormat="of" derivedConten | |||
| t="RFC8871"/>.</t> | ||||
| <t>By providing meta-information about the RTP streams outside the encrypt | <t indent="0" pn="section-1-8">By providing meta-information about the RTP | |||
| ed media payload, an | streams outside the encrypted media payload, an | |||
| RTP switch can do codec-agnostic selective forwarding without decrypting t he payload. | RTP switch can do codec-agnostic selective forwarding without decrypting t he payload. | |||
| This document specifies the necessary meta-information in an RTP header ex tension. | This document specifies the necessary meta-information in an RTP header ex tension. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title="Key Words for Normative Requirements"> | <section numbered="true" removeInRFC="false" toc="include" pn="section-2"> | |||
| <t> | <name slugifiedName="name-requirements-language">Requirements Language</na | |||
| The key words "MUST", "MUST NOT", "REQUIRED&quo | me> | |||
| t;, | <t indent="0" pn="section-2-1"> | |||
| "SHALL", "SHALL NOT", "SHOULD", "SHOU | The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", | |||
| LD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY&q | "<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14> | |||
| uot;, and | ", | |||
| "OPTIONAL" in this document are to be interpreted as described | "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", | |||
| in | "<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | |||
| BCP 14 <xref target="RFC2119" /> <xref target="RFC8174" /> when, and on | "<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to | |||
| ly when, they | be | |||
| appear in all capitals, as shown here. </t> | interpreted as described in BCP 14 <xref target="RFC2119" format="default" s | |||
| ectionFormat="of" derivedContent="RFC2119"/> <xref target="RFC8174" format="defa | ||||
| ult" sectionFormat="of" derivedContent="RFC8174"/> when, and only when, they app | ||||
| ear in all capitals, as | ||||
| shown here. | ||||
| </t> | ||||
| </section> | </section> | |||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-3"> | ||||
| <section title="Frame Marking RTP Header Extension"> | <name slugifiedName="name-video-frame-marking-rtp-hea">Video Frame Marking | |||
| <t>This specification uses RTP header extensions as defined in <xref targe | RTP Header Extension</name> | |||
| t="RFC8285" />. A subset of | <t indent="0" pn="section-3-1">This specification uses RTP header extensio | |||
| ns as defined in <xref target="RFC8285" format="default" sectionFormat="of" deri | ||||
| vedContent="RFC8285"/>. A subset of | ||||
| meta-information from the video stream is provided as an RTP header extens ion to allow an RTP switch | meta-information from the video stream is provided as an RTP header extens ion to allow an RTP switch | |||
| to do generic selective forwarding of video streams encoded with potential ly different video codecs.</t> | to do generic selective forwarding of video streams encoded with potential ly different video codecs.</t> | |||
| <t indent="0" pn="section-3-2">The Video Frame Marking RTP header extensio | ||||
| <t>The Frame Marking RTP header extension is encoded | n is encoded | |||
| using the one-byte header or two-byte header as described in <xref target | using the one-byte header or two-byte header as described in <xref target | |||
| ="RFC8285" />. | ="RFC8285" format="default" sectionFormat="of" derivedContent="RFC8285"/>. | |||
| The one-byte header format is used for examples in this memo. | The one-byte header format is used for examples in this document. | |||
| The two-byte header format is used when other two-byte header extensions | The two-byte header format is used when other two-byte header extensions | |||
| are present in the same RTP packet, since mixing one-byte and two-byte e xtensions | are present in the same RTP packet since mixing one-byte and two-byte ex tensions | |||
| is not possible in the same RTP packet.</t> | is not possible in the same RTP packet.</t> | |||
| <t indent="0" pn="section-3-3">This extension is only specified for Source | ||||
| <t>This extension is only specified for Source (not Redundancy) RTP Stre | (not Redundancy) RTP Streams | |||
| ams | <xref target="RFC7656" format="default" sectionFormat="of" derivedCo | |||
| <xref target="RFC7656" /> that carry video payloads. | ntent="RFC7656"/> that carry video payloads. | |||
| It is not specified for audio payloads, nor is it specified for Redu ndancy RTP Streams. | It is not specified for audio payloads, nor is it specified for Redu ndancy RTP Streams. | |||
| The (separate) specifications for Redundancy RTP Streams often inclu de | The (separate) specifications for Redundancy RTP Streams often inclu de | |||
| provisions for recovering any header extensions that were part of th e original source packet. | provisions for recovering any header extensions that were part of th e original source packet. | |||
| Such provisions can be followed to recover the Frame Marking RTP hea der extension of the | Such provisions can be followed to recover the Video Frame Marking R TP header extension of the | |||
| original source packet. | original source packet. | |||
| Source packet frame markings may be useful when generating Redundanc y RTP Streams; | Source packet frame markings may be useful when generating Redundanc y RTP Streams; | |||
| for example, the I (Independent Frame) and D (Discardable Frame) bit s, | for example, the I (Independent Frame) and D (Discardable Frame) bit s, | |||
| defined in <xref target="mandatory-scalable" />, | defined in <xref target="mandatory-scalable" format="default" sectio nFormat="of" derivedContent="Section 3.1"/>, | |||
| can be used to generate extra or no redundancy, respectively, | can be used to generate extra or no redundancy, respectively, | |||
| and redundancy schemes with source blocks can align source block bou ndaries with | and redundancy schemes with source blocks can align source block bou ndaries with | |||
| independent frame boundaries as marked by the I bit. | independent frame boundaries as marked by the I bit. | |||
| </t> | </t> | |||
| <t>A frame, in the context of this specification, is the set of RTP pack | <t indent="0" pn="section-3-4">A frame, in the context of this specificati | |||
| ets | on, is the set of RTP packets | |||
| with the same RTP timestamp from a specific RTP synchronization source | with the same RTP timestamp from a specific RTP Synchronization Source | |||
| (SSRC). | (SSRC). | |||
| A frame within a layer is the set of RTP packets with the same RTP tim estamp, SSRC, | A frame within a layer is the set of RTP packets with the same RTP tim estamp, SSRC, | |||
| Temporal ID (TID), and Layer ID (LID).</t> | Temporal-layer ID (TID), and Layer ID (LID).</t> | |||
| <section anchor="mandatory-scalable" numbered="true" removeInRFC="false" t | ||||
| <section title="Long Extension for Scalable Streams" anchor="mandatory-sca | oc="include" pn="section-3.1"> | |||
| lable"> | <name slugifiedName="name-long-extension-for-scalable">Long Extension fo | |||
| <t>The following RTP header extension is RECOMMENDED for scalable streams | r Scalable Streams</name> | |||
| . | <t indent="0" pn="section-3.1-1">The following RTP header extension is < | |||
| It MAY also be used for non-scalable streams, in which case TID, LID | bcp14>RECOMMENDED</bcp14> for scalable streams. | |||
| and TL0PICIDX MUST be 0 or omitted. | It <bcp14>MAY</bcp14> also be used for non-scalable streams, in which | |||
| The ID is assigned per <xref target="RFC8285" />, | case the TID, LID, and TL0PICIDX <bcp14>MUST</bcp14> be 0 or omitted. | |||
| and the length is encoded as L=2 which indicates 3 octets of data whe | The ID is assigned per <xref target="RFC8285" format="default" sectio | |||
| n nothing is omitted, | nFormat="of" derivedContent="RFC8285"/>. | |||
| or L=1 for 2 octets when TL0PICIDX is omitted, or L=0 for 1 octet whe | The length is encoded as follows:</t> | |||
| n both LID and TL0PICIDX are omitted.</t> | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section-3 | |||
| <figure> | .1-2"> | |||
| <artwork><![CDATA[ | <li pn="section-3.1-2.1">L=2 to indicate 3 octets of data when nothing | |||
| is omitted,</li> | ||||
| <li pn="section-3.1-2.2">L=1 for 2 octets when TL0PICIDX is omitted, o | ||||
| r</li> | ||||
| <li pn="section-3.1-2.3">L=0 for 1 octet when both the LID and TL0PICI | ||||
| DX are omitted.</li> | ||||
| </ul> | ||||
| <artwork align="left" pn="section-3.1-3"> | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=2 |S|E|I|D|B| TID | LID | TL0PICIDX | | | ID=? | L=2 |S|E|I|D|B| TID | LID | TL0PICIDX | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| or | or | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=1 |S|E|I|D|B| TID | LID | (TL0PICIDX omitted) | | ID=? | L=1 |S|E|I|D|B| TID | LID | (TL0PICIDX omitted) | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| or | or | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=0 |S|E|I|D|B| TID | (LID and TL0PICIDX omitted) | | ID=? | L=0 |S|E|I|D|B| TID | (LID and TL0PICIDX omitted) | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| <t indent="0" pn="section-3.1-4">The following information is extracted | ||||
| <t>The following information are extracted from the media payload and se | from the media payload and sent in the Video Frame Marking RTP header extension. | |||
| nt in the Frame Marking RTP header extension. | </t> | |||
| <list style='symbols'> | <dl newline="true" indent="3" spacing="normal" pn="section-3.1-5"> | |||
| <t>S: Start of Frame (1 bit) - MUST be 1 in the first packet in a fr | <dt pn="section-3.1-5.1">S: Start of Frame (1 bit)</dt> | |||
| ame | <dd pn="section-3.1-5.2"> | |||
| within a layer; otherwise MUST be 0.</t> | <bcp14>MUST</bcp14> be 1 in the first packet in a frame | |||
| <t>E: End of Frame (1 bit) - MUST be 1 in the last packet in a frame | within a layer; otherwise, <bcp14>MUST</bcp14> be 0.</dd> | |||
| within a layer; otherwise MUST be 0. | <dt pn="section-3.1-5.3">E: End of Frame (1 bit)</dt> | |||
| Note that the RTP header marker bit MAY be used to infer the las | <dd pn="section-3.1-5.4"> | |||
| t packet of the highest enhancement layer, in payload formats with such semantic | <bcp14>MUST</bcp14> be 1 in the last packet in a frame | |||
| s.</t> | within a layer; otherwise, <bcp14>MUST</bcp14> be 0. | |||
| <t>I: Independent Frame (1 bit) - MUST be 1 for a frame within a la | Note that the RTP header marker bit <bcp14>MAY</bcp14> be used t | |||
| yer that can be | o infer the last packet of the highest enhancement layer in payload formats with | |||
| decoded independent of temporally prior frames, e.g. intra-frame, | such semantics.</dd> | |||
| VPX keyframe, | <dt pn="section-3.1-5.5">I: Independent Frame (1 bit)</dt> | |||
| H.264 IDR <xref target="RFC6184" />, | <dd pn="section-3.1-5.6"> | |||
| H.265 IDR/CRA/BLA/RAP <xref target="RFC7798" />; | <bcp14>MUST</bcp14> be 1 for a frame within a layer that can be | |||
| otherwise MUST be 0. | decoded independent of temporally prior frames, e.g., intra-frame | |||
| , VPX keyframe, | ||||
| H.264 Instantaneous Decoding Refresh (IDR) <xref target="RFC6184" | ||||
| format="default" sectionFormat="of" derivedContent="RFC6184"/>, or | ||||
| H.265 IDR / Clean Random Access (CRA) / Broken Link Access (BLA) | ||||
| / Random Access Point (RAP) <xref target="RFC7798" format="default" sectionForma | ||||
| t="of" derivedContent="RFC7798"/>; | ||||
| otherwise, <bcp14>MUST</bcp14> be 0. | ||||
| Note that this bit only signals temporal independence, so it can be | Note that this bit only signals temporal independence, so it can be | |||
| 1 in spatial or quality enhancement layers that depend on tempora lly | 1 in spatial or quality enhancement layers that depend on tempora lly | |||
| co-located layers but not temporally prior frames.</t> | co-located layers but not temporally prior frames.</dd> | |||
| <t>D: Discardable Frame (1 bit) - MUST be 1 for a frame within a lay | <dt pn="section-3.1-5.7">D: Discardable Frame (1 bit)</dt> | |||
| er the sender knows can be discarded, | <dd pn="section-3.1-5.8"> | |||
| and still provide a decodable media stream; otherwise MUST be 0. | <bcp14>MUST</bcp14> be 1 for a frame within a layer the sender knows | |||
| </t> | can be discarded | |||
| <t>B: Base Layer Sync (1 bit) - When TID is not 0, this MUST be 1 if | and still provide a decodable media stream; otherwise, <bcp14>MU | |||
| the sender knows this frame within a layer only depends | ST</bcp14> be 0. </dd> | |||
| on the base temporal layer; otherwise MUST be 0. When TID is 0 o | <dt pn="section-3.1-5.9">B: Base Layer Sync (1 bit)</dt> | |||
| r if no scalability is used, this MUST be 0.</t> | <dd pn="section-3.1-5.10">When the TID is not 0, this <bcp14>MUST</bcp | |||
| <t>TID: Temporal ID (3 bits) - Identifies the temporal layer/sub-lay | 14> be 1 if the sender knows this frame within a layer only depends | |||
| er encoded, | on the base temporal layer; otherwise, <bcp14>MUST</bcp14> be 0. | |||
| starting with 0 for the base layer, and increasing with higher te | When the TID is 0 or if no scalability is used, this <bcp14>MUST</bcp14> be 0. | |||
| mporal fidelity. | </dd> | |||
| If no scalability is used, this MUST be 0. It is implicitly 0 in | <dt pn="section-3.1-5.11">TID: Temporal-layer ID (3 bits)</dt> | |||
| the short extension format.</t> | <dd pn="section-3.1-5.12">Identifies the temporal layer/sub-layer enco | |||
| <t>LID: Layer ID (8 bits) - Identifies the spatial and quality layer | ded, | |||
| encoded, | starting with 0 for the base layer and increasing with higher tem | |||
| starting with 0 for the base layer, and increasing with higher fi | poral fidelity. | |||
| delity. | If no scalability is used, this <bcp14>MUST</bcp14> be 0. It is i | |||
| If no scalability is used, this MUST be 0 or omitted to reduce le | mplicitly 0 in the short extension format. | |||
| ngth. | </dd> | |||
| When omitted, TL0PICIDX MUST also be omitted. It is implicitly 0 | <dt pn="section-3.1-5.13">LID: Layer ID (8 bits)</dt> | |||
| in the short extension format | <dd pn="section-3.1-5.14">Identifies the spatial and quality layer enc | |||
| or when omitted in the long extension format.</t> | oded, | |||
| <t>TL0PICIDX: Temporal Layer 0 Picture Index (8 bits) - When TID is | starting with 0 for the base layer and increasing with higher fid | |||
| 0 and LID is 0, this is a cyclic counter labeling | elity. | |||
| base layer frames. When TID is not 0 or LID is not 0, | If no scalability is used, this <bcp14>MUST</bcp14> be 0 or omitt | |||
| this indicates a dependency on the given index, such that this fr | ed to reduce length. | |||
| ame within this layer | When the LID is omitted, TL0PICIDX <bcp14>MUST</bcp14> also be om | |||
| depends on the frame with this label in the layer with TID 0 and | itted. It is implicitly 0 in the short extension format | |||
| LID 0. | or when omitted in the long extension format.</dd> | |||
| If no scalability is used, or the cyclic counter is unknown, this | <dt pn="section-3.1-5.15">TL0PICIDX: Temporal Layer 0 Picture Index (8 | |||
| MUST be omitted to reduce length. | bits)</dt> | |||
| Note that 0 is a valid index value for TL0PICIDX.</t> | <dd pn="section-3.1-5.16">When the TID is 0 and the LID is 0, this is | |||
| </list> | a cyclic counter labeling | |||
| </t> | base layer frames. When the TID is not 0 or the LID is not 0, | |||
| the indication is that a dependency on the given index, such that | ||||
| <t>The layer information contained in TID and LID convey useful aspects o | this frame within this layer | |||
| f the layer structure that | depends on the frame with this label in the layer with a TID 0 an | |||
| d LID 0. | ||||
| If no scalability is used, or the cyclic counter is unknown, TL0P | ||||
| ICIDX <bcp14>MUST</bcp14> be omitted to reduce length. | ||||
| Note that 0 is a valid index value for TL0PICIDX.</dd> | ||||
| </dl> | ||||
| <t indent="0" pn="section-3.1-6">The layer information contained in the | ||||
| TID and LID convey useful aspects of the layer structure that | ||||
| can be utilized in selective forwarding.</t> | can be utilized in selective forwarding.</t> | |||
| <t>Without further information about the layer structure, | <t indent="0" pn="section-3.1-7">Without further information about the l ayer structure, | |||
| these TID/LID identifiers can only be used for relative priority of la yers | these TID/LID identifiers can only be used for relative priority of la yers | |||
| and implicit dependencies between layers. | and implicit dependencies between layers. | |||
| They convey a layer hierarchy with TID=0 and LID=0 identifying the bas e layer. | They convey a layer hierarchy with TID = 0 and LID = 0 identifying the base layer. | |||
| Higher values of TID identify higher temporal layers with higher frame rates. | Higher values of TID identify higher temporal layers with higher frame rates. | |||
| Higher values of LID identify higher spatial and/or quality layers wit h higher resolutions and/or bitrates. | Higher values of LID identify higher spatial and/or quality layers wit h higher resolutions and/or bitrates. | |||
| Implicit dependencies between layers assume that a layer with a given | Implicit dependencies between layers assume that a layer with a given | |||
| TID/LID MAY depend | TID/LID <bcp14>MAY</bcp14> depend | |||
| on layer(s) with the same or lower TID/LID, but MUST NOT depend on lay | on a layer or layers with the same or lower TID/LID, but they <bcp14>M | |||
| er(s) with higher TID/LID. | UST NOT</bcp14> depend on a layer or layers with higher TID/LID. | |||
| </t><t> | </t> | |||
| <t indent="0" pn="section-3.1-8"> | ||||
| With further information, | With further information, | |||
| for example, possible future RTCP SDES items that convey full layer st | for example, possible future RTCP source description (SDES) items that | |||
| ructure information, it may | convey full layer structure information, it may | |||
| be possible to map these TIDs and LIDs to specific absolute frame rate | be possible to map these TIDs and LIDs to specific absolute frame rate | |||
| s, resolutions and bitrates, | s, resolutions, bitrates, and explicit dependencies between layers. | |||
| as well as explicit dependencies between layers. | Such additional layer information may be useful for forwarding decisio | |||
| Such additional layer information may be useful for forwarding decisio | ns in the RTP switch | |||
| ns in the RTP switch, | but is beyond the scope of this document. The relative layer informati | |||
| but is beyond the scope of this memo. The relative layer information i | on is still useful | |||
| s still useful | for many selective forwarding decisions, even without such additional | |||
| for many selective forwarding decisions even without such additional l | layer information. | |||
| ayer information. | ||||
| </t> | </t> | |||
| </section> | </section> | |||
| <section anchor="mandatory-non-scalable" numbered="true" removeInRFC="fals | ||||
| <section title="Short Extension for Non-Scalable Streams" anchor="mandator | e" toc="include" pn="section-3.2"> | |||
| y-non-scalable"> | <name slugifiedName="name-short-extension-for-non-sca">Short Extension f | |||
| <t>The following RTP header extension is RECOMMENDED for non-scalable str | or Non-Scalable Streams</name> | |||
| eams. | <t indent="0" pn="section-3.2-1">The following RTP header extension is < | |||
| bcp14>RECOMMENDED</bcp14> for non-scalable streams. | ||||
| It is identical to the shortest form of the extension for scalable st reams, | It is identical to the shortest form of the extension for scalable st reams, | |||
| except the last four bits (B and TID) are replaced with zeros. | except the last four bits (B and TID) are replaced with zeros. | |||
| It MAY also be used for scalable streams if the sender has limited or no | It <bcp14>MAY</bcp14> also be used for scalable streams if the sender has limited or no | |||
| information about stream scalability. | information about stream scalability. | |||
| The ID is assigned per <xref target="RFC8285" />, | The ID is assigned per <xref target="RFC8285" format="default" sectio | |||
| and the length is encoded as L=0 which indicates 1 octet of data.</t> | nFormat="of" derivedContent="RFC8285"/>; | |||
| the length is encoded as L=0, which indicates 1 octet of data.</t> | ||||
| <figure> | <artwork align="left" pn="section-3.2-2"> | |||
| <artwork><![CDATA[ | ||||
| 0 1 | 0 1 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=0 |S|E|I|D|0 0 0 0| | | ID=? | L=0 |S|E|I|D|0 0 0 0| | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| <t indent="0" pn="section-3.2-3">The following information is extracted | ||||
| <t>The following information are extracted from the media payload and se | from the media payload and sent in the Video Frame Marking RTP header extension. | |||
| nt in the Frame Marking RTP header extension. | ||||
| <list style='symbols'> | ||||
| <t>S: Start of Frame (1 bit) - MUST be 1 in the first packet in a fr | ||||
| ame; otherwise MUST be 0.</t> | ||||
| <t>E: End of Frame (1 bit) - MUST be 1 in the last packet in a frame | ||||
| ; otherwise MUST be 0. | ||||
| SHOULD match the RTP header marker bit in payload formats with such | ||||
| semantics for marking end of frame.</t> | ||||
| <t>I: Independent Frame (1 bit) - MUST be 1 for frames that can be | ||||
| decoded independent of temporally prior frames, e.g. intra-frame, | ||||
| VPX keyframe, | ||||
| H.264 IDR <xref target="RFC6184" />, | ||||
| H.265 IDR/CRA/BLA/IRAP <xref target="RFC7798" />; | ||||
| otherwise MUST be 0. </t> | ||||
| <t>D: Discardable Frame (1 bit) - MUST be 1 for frames the sender kn | ||||
| ows can be discarded, | ||||
| and still provide a decodable media stream; otherwise MUST be 0. | ||||
| </t> | ||||
| <t>The remaining (4 bits) - are reserved/fixed values and not used f | ||||
| or non-scalable streams; | ||||
| they MUST be set to 0 upon transmission and ignored upon reception | ||||
| .</t> | ||||
| </list> | ||||
| </t> | </t> | |||
| <dl newline="true" indent="3" spacing="normal" pn="section-3.2-4"> | ||||
| <dt pn="section-3.2-4.1">S: Start of Frame (1 bit)</dt> | ||||
| <dd pn="section-3.2-4.2"> | ||||
| <bcp14>MUST</bcp14> be 1 in the first packet in a frame; otherwise, | ||||
| <bcp14>MUST</bcp14> be 0.</dd> | ||||
| <dt pn="section-3.2-4.3">E: End of Frame (1 bit)</dt> | ||||
| <dd pn="section-3.2-4.4"> | ||||
| <bcp14>MUST</bcp14> be 1 in the last packet in a frame; otherwise, < | ||||
| bcp14>MUST</bcp14> be 0. | ||||
| <bcp14>SHOULD</bcp14> match the RTP header marker bit in payload for | ||||
| mats with such semantics for marking end of frame.</dd> | ||||
| <dt pn="section-3.2-4.5">I: Independent Frame (1 bit)</dt> | ||||
| <dd pn="section-3.2-4.6"> | ||||
| <bcp14>MUST</bcp14> be 1 for frames that can be | ||||
| decoded independent of temporally prior frames, e.g., intra-frame | ||||
| , VPX keyframe, | ||||
| H.264 IDR <xref target="RFC6184" format="default" sectionFormat=" | ||||
| of" derivedContent="RFC6184"/>, or | ||||
| H.265 IDR/CRA/BLA/IRAP <xref target="RFC7798" format="default" se | ||||
| ctionFormat="of" derivedContent="RFC7798"/>; | ||||
| otherwise, <bcp14>MUST</bcp14> be 0. </dd> | ||||
| <dt pn="section-3.2-4.7">D: Discardable Frame (1 bit)</dt> | ||||
| <dd pn="section-3.2-4.8"> | ||||
| <bcp14>MUST</bcp14> be 1 for frames the sender knows can be discarde | ||||
| d | ||||
| and still provide a decodable media stream; otherwise, <bcp14>MU | ||||
| ST</bcp14> be 0. </dd> | ||||
| <dt pn="section-3.2-4.9">The remaining (4 bits)</dt> | ||||
| <dd pn="section-3.2-4.10">These are reserved/fixed values and not used | ||||
| for non-scalable streams; | ||||
| they <bcp14>MUST</bcp14> be set to zero upon transmission and igno | ||||
| red upon reception.</dd> | ||||
| </dl> | ||||
| </section> | </section> | |||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-3.3 | ||||
| <section title="Layer ID Mappings for Scalable Streams"> | "> | |||
| <t> This section maps the specific Layer ID information contained in speci | <name slugifiedName="name-lid-mappings-for-scalable-s">LID Mappings for | |||
| fic scalable codecs to the generic LID and TID fields. </t> | Scalable Streams</name> | |||
| <t> Note that non-scalable streams have no Layer ID information and thus n | <t indent="0" pn="section-3.3-1"> This section maps the specific Layer I | |||
| o mappings. </t> | D (LID) information contained in specific scalable codecs to the generic LID and | |||
| TID fields. </t> | ||||
| <section title="VP9 LID Mapping"> | <t indent="0" pn="section-3.3-2"> Note that non-scalable streams have no | |||
| <t> The VP9 <xref target="I-D.ietf-payload-vp9" /> | LID information; thus, they have no mappings. </t> | |||
| Spatial Layer ID (SID, 3 bits) and Temporal Layer ID (TID, 3 bits) | <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | |||
| .3.1"> | ||||
| <name slugifiedName="name-vp9-lid-mapping">VP9 LID Mapping</name> | ||||
| <t indent="0" pn="section-3.3.1-1"> The VP9 <xref target="RFC9628" for | ||||
| mat="default" sectionFormat="of" derivedContent="RFC9628"/> | ||||
| Spatial-layer ID (SID, 3 bits) and Temporal-layer ID (TID, 3 bits) | ||||
| in the VP9 payload descriptor are mapped to the generic LID and TID f ields | in the VP9 payload descriptor are mapped to the generic LID and TID f ields | |||
| in the header extension as shown in the following figure.</t> | in the header extension as shown in the following figure.</t> | |||
| <artwork align="left" pn="section-3.3.1-2"> | ||||
| <figure> | ||||
| <artwork><![CDATA[ | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=2 |S|E|I|D|B| TID |0|0|0|0|0| SID | TL0PICIDX | | | ID=? | L=2 |S|E|I|D|B| TID |0|0|0|0|0| SID | TL0PICIDX | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| <t indent="0" pn="section-3.3.1-3"> The S bit <bcp14>MUST</bcp14> matc | ||||
| <t> The S bit MUST match the B bit in the VP9 payload descriptor.</t> | h the B bit in the VP9 payload descriptor.</t> | |||
| <t> The E bit MUST match the E bit in the VP9 payload descriptor.</t> | <t indent="0" pn="section-3.3.1-4"> The E bit <bcp14>MUST</bcp14> matc | |||
| <t> The I bit MUST match the inverse of the P bit in the VP9 payload desc | h the E bit in the VP9 payload descriptor.</t> | |||
| riptor.</t> | <t indent="0" pn="section-3.3.1-5"> The I bit <bcp14>MUST</bcp14> matc | |||
| <t> The D bit MUST be 1 if the refresh_frame_flags in the VP9 payload unc | h the inverse of the P bit in the VP9 payload descriptor.</t> | |||
| ompressed header are all 0, otherwise it MUST be 0.</t> | <t indent="0" pn="section-3.3.1-6"> The D bit <bcp14>MUST</bcp14> be 1 | |||
| <t> The B bit MUST be 0 if TID is 0; otherwise, if TID is not 0, it MUST | if the refresh_frame_flags bits in the VP9 payload uncompressed header are all | |||
| match the U bit in the VP9 payload descriptor. Note: When using temporally neste | 0; otherwise, it <bcp14>MUST</bcp14> be 0.</t> | |||
| d scalability structures as recommended in <xref target="scalable-structures" /> | <t indent="0" pn="section-3.3.1-7"> The B bit <bcp14>MUST</bcp14> be 0 | |||
| , the B bit and VP9 U bit will always be 1 if TID is not 0, since it is always | if the TID is 0; if the TID is not 0, it <bcp14>MUST</bcp14> match the U bit in | |||
| the VP9 payload descriptor.</t> | ||||
| <aside pn="section-3.3.1-8"> | ||||
| <t indent="0" pn="section-3.3.1-8.1">Note: when using temporally nes | ||||
| ted scalability structures as recommended in <xref target="scalable-structures" | ||||
| format="default" sectionFormat="of" derivedContent="Section 3.5.2"/>, the B bit | ||||
| and VP9 U bit will always be 1 if the TID is not 0 since it is always | ||||
| possible to switch up to a higher temporal layer in such nested struc tures.</t> | possible to switch up to a higher temporal layer in such nested struc tures.</t> | |||
| <t> TID, SID and TL0PICIDX MUST match the correspondingly named fields in | </aside> | |||
| the VP9 payload descriptor, | <t indent="0" pn="section-3.3.1-9">The TID, SID, and TL0PICIDX <bcp14> | |||
| MUST</bcp14> match the correspondingly named fields in the VP9 payload descripto | ||||
| r, | ||||
| with SID aligned in the least significant 3 bits of the 8-bit LID fie ld and zeros | with SID aligned in the least significant 3 bits of the 8-bit LID fie ld and zeros | |||
| in the most significant 5 bits.</t> | in the most significant 5 bits.</t> | |||
| </section> | ||||
| </section> | <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | |||
| .3.2"> | ||||
| <section title="H265 LID Mapping"> | <name slugifiedName="name-h265-lid-mapping">H265 LID Mapping</name> | |||
| <t> The H265 <xref target="RFC7798" /> LayerID (6 bits) and TID (3 bits) | <t indent="0" pn="section-3.3.2-1"> The H265 <xref target="RFC7798" fo | |||
| from the NAL unit header are mapped to the generic LID and TID fiel | rmat="default" sectionFormat="of" derivedContent="RFC7798"/> layer ID (6 bits), | |||
| ds | and TID (3 bits) | |||
| from the Network Abstraction Layer (NAL) unit header are mapped to | ||||
| the generic LID and TID fields | ||||
| in the header extension as shown in the following figure.</t> | in the header extension as shown in the following figure.</t> | |||
| <artwork align="left" pn="section-3.3.2-2"> | ||||
| <figure> | ||||
| <artwork><![CDATA[ | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=2 |S|E|I|D|B| TID |0|0| LayerID | TL0PICIDX | | | ID=? | L=2 |S|E|I|D|B| TID |0|0| layer ID | TL0PICIDX | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| <t indent="0" pn="section-3.3.2-3">The S and E bits <bcp14>MUST</bcp14 | ||||
| <t>The S and E bits MUST match the correspondingly named bits in PACI:PHE | > match the correspondingly named bits in PACI:PHES:TSCI payload structures.</t> | |||
| S:TSCI payload structures.</t> | <t indent="0" pn="section-3.3.2-4">The I bit <bcp14>MUST</bcp14> be 1 | |||
| <t>The I bit MUST be 1 when the NAL unit type is 16-23 (inclusive) or 32- | when the NAL unit type is 16-23 (inclusive) or 32-34 (inclusive), or an aggregat | |||
| 34 (inclusive), or an aggregation packet or fragmentation unit encapsulating any | ion packet or fragmentation unit encapsulating any of these types; otherwise, it | |||
| of these types, otherwise it MUST be 0. These ranges cover intra (IRAP) frames | <bcp14>MUST</bcp14> be 0. These ranges cover intra (IRAP) frames as well as | |||
| as well as | critical parameter sets (Video Parameter Set (VPS), Sequence Parameter | |||
| critical parameter sets (VPS, SPS, PPS).</t> | Set (SPS), Picture Parameter Set (PPS)).</t> | |||
| <t>The D bit MUST be 1 when the NAL unit type is 0, 2, 4, 6, 8, 10, 12, 1 | <t indent="0" pn="section-3.3.2-5">The D bit <bcp14>MUST</bcp14> be 1 | |||
| 4, or 38, or an aggregation packet or fragmentation unit encapsulating only thes | if either:</t> | |||
| e types, otherwise it MUST be 0. These ranges cover non-reference frames as well | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section | |||
| as filler data.</t> | -3.3.2-6"> | |||
| <t>The B bit can not be determined reliably from simple inspection of pay | <li pn="section-3.3.2-6.1">the payload's NAL unit header's NRI field | |||
| load headers, and therefore is determined by implementation-specific means. For | is 0, or</li> | |||
| example, internal codec interfaces may provide information to set this reliably. | <li pn="section-3.3.2-6.2">the payload is an aggregation packet or f | |||
| </t> | ragmentation unit encapsulating only NAL units with NRI = 0.</li> | |||
| <t> TID and LayerID MUST match the correspondingly named fields in the H2 | </ul> | |||
| 65 NAL unit header, | <t indent="0" pn="section-3.3.2-7">Otherwise, it <bcp14>MUST</bcp14> b | |||
| with LayerID aligned in the least significant 6 bits of the 8-bit LID | e 0.</t> | |||
| field and zeros | <t indent="0" pn="section-3.3.2-8">The NRI = 0 condition signals non-r | |||
| eference frames.</t> | ||||
| <t indent="0" pn="section-3.3.2-9">The B bit cannot be determined reli | ||||
| ably from simple inspection of payload headers; therefore, it is determined by i | ||||
| mplementation-specific means. For example, internal codec interfaces may provide | ||||
| information to set this reliably.</t> | ||||
| <t indent="0" pn="section-3.3.2-10">The TID and layer ID <bcp14>MUST</ | ||||
| bcp14> match the correspondingly named fields in the H265 NAL unit header, | ||||
| with layer ID aligned in the least significant 6 bits of the 8-bit LI | ||||
| D field and zeros | ||||
| in the most significant 2 bits.</t> | in the most significant 2 bits.</t> | |||
| </section> | ||||
| </section> | <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | |||
| .3.3"> | ||||
| <section title="H264-SVC LID Mapping"> | <name slugifiedName="name-h264-scalable-video-coding-">H264 Scalable V | |||
| <t> The following shows H264-SVC <xref target="RFC6190" /> Layer encoding inf | ideo Coding (SVC) LID Mapping</name> | |||
| ormation (3 bits for | <t indent="0" pn="section-3.3.3-1"> The following shows H264-SVC <xref | |||
| spatial/dependency layer, 4 bits for quality layer and 3 bits for temporal la | target="RFC6190" format="default" sectionFormat="of" derivedContent="RFC6190"/> | |||
| yer) mapped to the generic LID and TID fields.</t> | Layer encoding information (3 bits for | |||
| <t>The S, E, I and D bits MUST match the correspondingly named bits in PACSI | spatial/dependency layer (DID), 4 bits for quality layer (QID), and 3 bits fo | |||
| payload structures.</t> | r temporal layer) mapped to the generic LID and TID fields.</t> | |||
| <t>The I bit MUST be 1 when the NAL unit type is 5, 7, 8, 13, or 15, | <t indent="0" pn="section-3.3.3-2">The S, E, I, and D bits <bcp14>MUST | |||
| or an aggregation packet or fragmentation unit encapsulating any of these t | </bcp14> match the correspondingly named bits in Payload Content Scalability Inf | |||
| ypes, otherwise it MUST be 0. These ranges cover intra (IDR) frames as well as | ormation (PACSI) payload structures.</t> | |||
| <t indent="0" pn="section-3.3.3-3">The I bit <bcp14>MUST</bcp14> be 1 | ||||
| when the NAL unit type is 5, 7, 8, 13, 15, | ||||
| or an aggregation packet or fragmentation unit encapsulating any of these t | ||||
| ypes; otherwise, it <bcp14>MUST</bcp14> be 0. These ranges cover intra (IDR) fra | ||||
| mes as well as | ||||
| critical parameter sets (SPS/PPS variants).</t> | critical parameter sets (SPS/PPS variants).</t> | |||
| <t>The D bit MUST be 1 when the NAL unit header NRI field is 0, or an aggrega | <t indent="0" pn="section-3.3.3-4">The D bit <bcp14>MUST</bcp14> be 1 | |||
| tion packet or fragmentation unit encapsulating only NAL units with NRI=0, other | if either:</t> | |||
| wise it MUST be 0. | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section | |||
| The NRI=0 condition signals non-reference frames.</t> | -3.3.3-5"> | |||
| <t>The B bit can not be determined reliably from simple inspection of payload | <li pn="section-3.3.3-5.1">the payload's NAL unit header's NRI field | |||
| headers, and therefore is determined by implementation-specific means. For exam | is 0, or</li> | |||
| ple, internal codec interfaces may provide information to set this reliably.</t> | <li pn="section-3.3.3-5.2">the payload is an aggregation packet or f | |||
| ragmentation unit encapsulating only NAL units with NRI = 0.</li> | ||||
| <figure> | </ul> | |||
| <artwork><![CDATA[ | <t indent="0" pn="section-3.3.3-6">Otherwise, it <bcp14>MUST</bcp14> b | |||
| e 0.</t> | ||||
| <t indent="0" pn="section-3.3.3-7">The NRI = 0 condition signals non-r | ||||
| eference frames.</t> | ||||
| <t indent="0" pn="section-3.3.3-8">The B bit cannot be determined reli | ||||
| ably from simple inspection of payload headers; therefore, it is determined by i | ||||
| mplementation-specific means. For example, internal codec interfaces may provide | ||||
| information to set this reliably.</t> | ||||
| <artwork align="left" pn="section-3.3.3-9"> | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=2 |S|E|I|D|B| TID |0| DID | QID | TL0PICIDX | | | ID=? | L=2 |S|E|I|D|B| TID |0| DID | QID | TL0PICIDX | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| </section> | </section> | |||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | ||||
| <section title="H264 (AVC) LID Mapping"> | .3.4"> | |||
| <t> The following shows the header extension for H264 (AVC) <xref target="RF | <name slugifiedName="name-h264-advanced-video-coding-">H264 Advanced V | |||
| C6184" /> that contains | ideo Coding (AVC) LID Mapping</name> | |||
| <t indent="0" pn="section-3.3.4-1"> The following shows the header ex | ||||
| tension for H264 (AVC) <xref target="RFC6184" format="default" sectionFormat="of | ||||
| " derivedContent="RFC6184"/> that contains | ||||
| only temporal layer information.</t> | only temporal layer information.</t> | |||
| <t> The S bit MUST be 1 when the timestamp in the RTP header differs from the | <t indent="0" pn="section-3.3.4-2"> The S bit <bcp14>MUST</bcp14> be 1 | |||
| timestamp | when the timestamp in the RTP header differs from the timestamp | |||
| in the prior RTP sequence number from the same SSRC, otherwise it MUST be 0 | in the prior RTP sequence number from the same SSRC; otherwise, it <bcp14>M | |||
| .</t> | UST</bcp14> be 0.</t> | |||
| <t> The E bit MUST match the M bit in the RTP header.</t> | <t indent="0" pn="section-3.3.4-3"> The E bit <bcp14>MUST</bcp14> matc | |||
| <t>The I bit MUST be 1 when the NAL unit type is 5, 7, or 8, | h the M bit in the RTP header.</t> | |||
| or an aggregation packet or fragmentation unit encapsulating any of these t | <t indent="0" pn="section-3.3.4-4">The I bit <bcp14>MUST</bcp14> be 1 | |||
| ypes, | when the NAL unit type is 5, 7, or 8, | |||
| otherwise it MUST be 0. These ranges cover intra (IDR) frames as well as | or an aggregation packet or fragmentation unit encapsulating any of these t | |||
| ypes; | ||||
| otherwise, it <bcp14>MUST</bcp14> be 0. These ranges cover intra (IDR) fram | ||||
| es as well as | ||||
| critical parameter sets (SPS/PPS).</t> | critical parameter sets (SPS/PPS).</t> | |||
| <t>The D bit MUST be 1 when the NAL unit header NRI field is 0, | <t indent="0" pn="section-3.3.4-5">The D bit <bcp14>MUST</bcp14> be 1 | |||
| or an aggregation packet or fragmentation unit encapsulating only | if either:</t> | |||
| NAL units with NRI=0, otherwise it MUST be 0. | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section | |||
| The NRI=0 condition signals non-reference frames.</t> | -3.3.4-6"> | |||
| <t>The B bit can not be determined reliably from simple inspection of payload | <li pn="section-3.3.4-6.1">the payload's NAL unit header's NRI field | |||
| headers, and therefore is determined by implementation-specific means. For exam | is 0, | |||
| ple, internal codec interfaces may provide information to set this reliably.</t> | or</li> | |||
| <figure> | <li pn="section-3.3.4-6.2">the payload is an aggregation packet or f | |||
| <artwork><![CDATA[ | ragmentation unit encapsulating only | |||
| NAL units with NRI = 0.</li> | ||||
| </ul> | ||||
| <t indent="0" pn="section-3.3.4-7">Otherwise, it <bcp14>MUST</bcp14> b | ||||
| e 0.</t> | ||||
| <t indent="0" pn="section-3.3.4-8">The NRI = 0 condition signals non-r | ||||
| eference frames.</t> | ||||
| <t indent="0" pn="section-3.3.4-9">The B bit cannot be determined reli | ||||
| ably from simple inspection of payload headers; therefore, it is determined by i | ||||
| mplementation-specific means. For example, internal codec interfaces may provide | ||||
| information to set this reliably.</t> | ||||
| <artwork align="left" pn="section-3.3.4-10"> | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=2 |S|E|I|D|B| TID |0|0|0|0|0|0|0|0| TL0PICIDX | | | ID=? | L=2 |S|E|I|D|B| TID |0|0|0|0|0|0|0|0| TL0PICIDX | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| </section> | </section> | |||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | ||||
| <section title="VP8 LID Mapping"> | .3.5"> | |||
| <t> The following shows the header extension for VP8 <xref target="RFC7741" | <name slugifiedName="name-vp8-lid-mapping">VP8 LID Mapping</name> | |||
| /> that contains | <t indent="0" pn="section-3.3.5-1"> The following shows the header ex | |||
| tension for VP8 <xref target="RFC7741" format="default" sectionFormat="of" deriv | ||||
| edContent="RFC7741"/> that contains | ||||
| only temporal layer information.</t> | only temporal layer information.</t> | |||
| <t> The S bit MUST match the correspondingly named bit in the VP8 payload des | <t indent="0" pn="section-3.3.5-2"> The S bit <bcp14>MUST</bcp14> matc | |||
| criptor when PID=0, otherwise it MUST be 0.</t> | h the correspondingly named bit in the VP8 payload descriptor when PID=0; otherw | |||
| <t> The E bit MUST match the M bit in the RTP header. </t> | ise, it <bcp14>MUST</bcp14> be 0.</t> | |||
| <t> The I bit MUST match the inverse of the P bit in the VP8 payload header.< | <t indent="0" pn="section-3.3.5-3"> The E bit <bcp14>MUST</bcp14> matc | |||
| /t> | h the M bit in the RTP header. </t> | |||
| <t> The D bit MUST match the N bit in the VP8 payload descriptor.</t> | <t indent="0" pn="section-3.3.5-4"> The I bit <bcp14>MUST</bcp14> matc | |||
| <t> The B bit MUST match the Y bit in the VP8 payload descriptor. Note: When | h the inverse of the P bit in the VP8 payload header.</t> | |||
| using temporally nested scalability structures as recommended in <xref target="s | <t indent="0" pn="section-3.3.5-5"> The D bit <bcp14>MUST</bcp14> matc | |||
| calable-structures" />, the B bit and VP8 Y bit will always be 1 if TID is not 0 | h the N bit in the VP8 payload descriptor.</t> | |||
| , since it is always | <t indent="0" pn="section-3.3.5-6"> The B bit <bcp14>MUST</bcp14> matc | |||
| h the Y bit in the VP8 payload descriptor.</t> | ||||
| <aside pn="section-3.3.5-7"> | ||||
| <t indent="0" pn="section-3.3.5-7.1">Note: when using temporally nes | ||||
| ted scalability structures as recommended in <xref target="scalable-structures" | ||||
| format="default" sectionFormat="of" derivedContent="Section 3.5.2"/>, the B bit | ||||
| and VP8 Y bit will always be 1 if the TID is not 0 since it is always | ||||
| possible to switch up to a higher temporal layer in such nested structure s.</t> | possible to switch up to a higher temporal layer in such nested structure s.</t> | |||
| <t> TID and TL0PICIDX MUST match the correspondingly named fields in the VP8 | </aside> | |||
| payload descriptor. </t> | <t indent="0" pn="section-3.3.5-8">The TID and TL0PICIDX <bcp14>MUST</ | |||
| <figure> | bcp14> match the correspondingly named fields in the VP8 payload descriptor. </t | |||
| <artwork><![CDATA[ | > | |||
| <artwork align="left" pn="section-3.3.5-9"> | ||||
| 0 1 2 3 | 0 1 2 3 | |||
| 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| | ID=? | L=2 |S|E|I|D|B| TID |0|0|0|0|0|0|0|0| TL0PICIDX | | | ID=? | L=2 |S|E|I|D|B| TID |0|0|0|0|0|0|0|0| TL0PICIDX | | |||
| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |||
| ]]></artwork></figure> | </artwork> | |||
| </section> | </section> | |||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | ||||
| <section title="Future Codec LID Mapping"> | .3.6"> | |||
| <t>The RTP payload format specification for future video codecs SHOULD inc | <name slugifiedName="name-future-codec-lid-mapping">Future Codec LID M | |||
| lude a section describing | apping</name> | |||
| <t indent="0" pn="section-3.3.6-1">The RTP payload format specificatio | ||||
| n for future video codecs <bcp14>SHOULD</bcp14> include a section describing | ||||
| the LID mapping and TID mapping for the codec.</t> | the LID mapping and TID mapping for the codec.</t> | |||
| </section> | </section> | |||
| </section> | ||||
| </section> | <section numbered="true" removeInRFC="false" toc="include" pn="section-3.4 | |||
| "> | ||||
| <section title="Signaling Information"> | <name slugifiedName="name-signaling-information">Signaling Information</ | |||
| <t>The URI for declaring this header extension in an extmap attribute is | name> | |||
| <t indent="0" pn="section-3.4-1">The URI for declaring this header exten | ||||
| sion in an extmap attribute is | ||||
| "urn:ietf:params:rtp-hdrext:framemarking". It does not contain any | "urn:ietf:params:rtp-hdrext:framemarking". It does not contain any | |||
| extension attributes. </t> | extension attributes. </t> | |||
| <t>An example attribute line in SDP:</t> | <t indent="0" pn="section-3.4-2">An example attribute line in SDP:</t> | |||
| <figure> | <artwork align="left" pn="section-3.4-3"> | |||
| <artwork><![CDATA[ | ||||
| a=extmap:3 urn:ietf:params:rtp-hdrext:framemarking | a=extmap:3 urn:ietf:params:rtp-hdrext:framemarking | |||
| ]]></artwork></figure> | </artwork> | |||
| </section> | ||||
| </section> | <section numbered="true" removeInRFC="false" toc="include" pn="section-3.5 | |||
| "> | ||||
| <section title="Usage Considerations"> | <name slugifiedName="name-usage-considerations">Usage Considerations</na | |||
| <t>The header extension values MUST represent what is already in the RTP p | me> | |||
| ayload.</t> | <t indent="0" pn="section-3.5-1">The header extension values <bcp14>MUST | |||
| <t> When an RTP switch needs to discard a received video frame due to cong | </bcp14> represent what is already in the RTP payload.</t> | |||
| estion control considerations, | <t indent="0" pn="section-3.5-2">When an RTP switch needs to discard rec | |||
| it is RECOMMENDED that it preferably drop frames marked with the D (Discar | eived video frames due to congestion control considerations, | |||
| dable) bit set, | it is <bcp14>RECOMMENDED</bcp14> that it drop:</t> | |||
| or the highest values of TID and LID, which indicate the highest tempora | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section-3 | |||
| l and spatial/quality enhancement layers, since those typically have fewer depen | .5-3"> | |||
| denices on them than lower layers.</t> | <li pn="section-3.5-3.1">frames marked with the D bit set, | |||
| <t> When an RTP switch wants to forward a new video stream to a receiver, | or</li> | |||
| it is RECOMMENDED to | <li pn="section-3.5-3.2">frames with the highest values of TID and LID | |||
| select the new video stream from the first switching point with the I (Ind | (which indicate the highest temporal and spatial/quality enhancement layers) si | |||
| ependent) bit set in all spatial layers and forward the same. | nce those typically have fewer dependencies on them than lower layers.</li> | |||
| An RTP switch can request a media source to generate a switching point by | </ul> | |||
| sending | <t indent="0" pn="section-3.5-4"> When an RTP switch wants to forward a | |||
| Full Intra Request (RTCP FIR) as defined in <xref target="RFC5104" />, for | new video stream to a receiver, it is <bcp14>RECOMMENDED</bcp14> to | |||
| example. </t> | select the new video stream from the first switching point with the I bit | |||
| set in all spatial layers and forward the video stream from that point on. An R | ||||
| <section title="Relation to Layer Refresh Request (LRR)"> | TP switch can request that a media source generate a switching point by sending | |||
| <t>Receivers can use the Layer Refresh Request (LRR) <xref target="I-D.i | an RTCP | |||
| etf-avtext-lrr" /> | Full Intra Request (FIR) as defined in <xref target="RFC5104" format="defa | |||
| ult" sectionFormat="of" derivedContent="RFC5104"/>, for example. </t> | ||||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-3 | ||||
| .5.1"> | ||||
| <name slugifiedName="name-relation-to-layer-refresh-r">Relation to Lay | ||||
| er Refresh Request (LRR)</name> | ||||
| <t indent="0" pn="section-3.5.1-1">Receivers can use the Layer Refresh | ||||
| Request (LRR) <xref target="RFC9627" format="default" sectionFormat="of" derive | ||||
| dContent="RFC9627"/> | ||||
| RTCP feedback message | RTCP feedback message | |||
| to upgrade to a higher layer in scalable encodings. The TID/LID values | to upgrade to a higher layer in scalable encodings. The TID/LID values | |||
| and formats used in LRR messages MUST correspond to the same values an | and formats used in LRR messages <bcp14>MUST</bcp14> correspond to the | |||
| d formats | same values and formats | |||
| specified in <xref target="mandatory-scalable" />. | specified in <xref target="mandatory-scalable" format="default" sectio | |||
| nFormat="of" derivedContent="Section 3.1"/>. | ||||
| </t> | </t> | |||
| <t>Because frame marking can only be used with temporally-nested strea | <t indent="0" pn="section-3.5.1-2">Because frame marking can only be u | |||
| ms, | sed with temporally nested streams, | |||
| temporal-layer LRR refreshes are unnecessary for frame-marked stream | temporal-layer refreshes requested with an LRR message are unnecessa | |||
| s. | ry for frame-marked streams. | |||
| Other refreshes can be detected based on the I bit being set for the s pecific spatial layers. | Other refreshes can be detected based on the I bit being set for the s pecific spatial layers. | |||
| </t> | </t> | |||
| </section> | </section> | |||
| <section title="Scalability Structures" anchor="scalable-structures"> | <section anchor="scalable-structures" numbered="true" removeInRFC="false | |||
| <t>The LID and TID information is most useful for fixed scalability st | " toc="include" pn="section-3.5.2"> | |||
| ructures, | <name slugifiedName="name-scalability-structures">Scalability Structur | |||
| es</name> | ||||
| <t indent="0" pn="section-3.5.2-1">The LID and TID information is most | ||||
| useful for fixed scalability structures, | ||||
| such as nested hierarchical temporal layering structures, where each temporal | such as nested hierarchical temporal layering structures, where each temporal | |||
| layer only references lower temporal layers or the base temporal lay er. | layer only references lower temporal layers or the base temporal lay er. | |||
| The LID and TID information is less useful, or even not useful at al l, | The LID and TID information is less useful, or even not useful at al l, | |||
| for complex, irregular scalability structures that do not conform to common, | for complex, irregular scalability structures that do not conform to common, | |||
| fixed patterns of inter-layer dependencies and referencing structure s. | fixed patterns of inter-layer dependencies and referencing structure s. | |||
| Therefore it is RECOMMENDED to use LID and TID information for | Therefore, it is <bcp14>RECOMMENDED</bcp14> to use LID and TID infor mation for | |||
| RTP switch forwarding decisions only in the case of temporally neste d | RTP switch forwarding decisions only in the case of temporally neste d | |||
| scalability structures, and it is NOT RECOMMENDED for other | scalability structures, and it is <bcp14>NOT RECOMMENDED</bcp14> for other | |||
| (more complex or irregular) scalability structures.</t> | (more complex or irregular) scalability structures.</t> | |||
| </section> | ||||
| </section> | </section> | |||
| </section> | </section> | |||
| </section> | <section numbered="true" removeInRFC="false" toc="include" pn="section-4"> | |||
| <name slugifiedName="name-security-and-privacy-consid">Security and Privac | ||||
| <section title="Security Considerations and Privacy Considerations" > | y Considerations</name> | |||
| <t>In the Secure Real-Time Transport Protocol (SRTP) <xref target="RFC3711 | <t indent="0" pn="section-4-1">In "<xref target="RFC3711" format="title" s | |||
| " />, RTP header extensions are | ectionFormat="of" derivedContent="The Secure Real-time Transport Protocol (SRTP) | |||
| authenticated and optionally encrypted <xref target="RFC9335" />. | "/>" <xref target="RFC3711" format="default" sectionFormat="of" derivedContent=" | |||
| RFC3711"/>, RTP header extensions are | ||||
| authenticated and optionally encrypted <xref target="RFC9335" format="defa | ||||
| ult" sectionFormat="of" derivedContent="RFC9335"/>. | ||||
| When unencrypted header extensions are used, some metadata is | When unencrypted header extensions are used, some metadata is | |||
| exposed and visible to middle boxes on the network path, | exposed and visible to middleboxes on the network path, | |||
| while encrypted media data and metadata in encrypted header extensions are not exposed.</t> | while encrypted media data and metadata in encrypted header extensions are not exposed.</t> | |||
| <t indent="0" pn="section-4-2">The primary utility of this specification i | ||||
| <t>The primary utility of this specification is for RTP switches to make p | s for RTP switches to make proper media forwarding decisions. | |||
| roper media forwarding decisions. | ||||
| RTP switches are the SRTP peers of endpoints, so they can access encrypted header extensions, | RTP switches are the SRTP peers of endpoints, so they can access encrypted header extensions, | |||
| but not end-to-end encrypted private media payloads. Other middle boxes on | but not end-to-end encrypted private media payloads. Other middleboxes on | |||
| the network path can only access | the network path can only access | |||
| unencrypted header extensions, since they are not SRTP peers.</t> | unencrypted header extensions since they are not SRTP peers.</t> | |||
| <t indent="0" pn="section-4-3">RTP endpoints that negotiate this extension | ||||
| <t>RTP endpoints which negotiate this extension should consider whether th | should consider whether: | |||
| is video frame marking metadata | </t> | |||
| needs to be exposed to the SRTP peer only, in which case the header extens | <ul bare="false" empty="false" indent="3" spacing="normal" pn="section-4-4 | |||
| ion can be encrypted; or whether | "> | |||
| other middle boxes on the network path also need this metadata, for exampl | <li pn="section-4-4.1">this video frame marking metadata | |||
| e, to optimize packet drop decisions | needs to be exposed to the SRTP peer only, in which case the header extens | |||
| ion can be encrypted; or</li> | ||||
| <li pn="section-4-4.2">other middleboxes on the network path also need t | ||||
| his metadata, for example, to optimize packet drop decisions | ||||
| that minimize media quality impacts, in which case the header extension ca n be unencrypted, if the endpoint | that minimize media quality impacts, in which case the header extension ca n be unencrypted, if the endpoint | |||
| accepts the potential privacy leakage of this metadata. For example, it wo | accepts the potential privacy leakage of this metadata.</li> | |||
| uld be possible to determine | </ul> | |||
| <t indent="0" pn="section-4-5"> | ||||
| For example, it would be possible to determine | ||||
| keyframes and their frequency in unencrypted header extensions. This infor mation can often be obtained via | keyframes and their frequency in unencrypted header extensions. This infor mation can often be obtained via | |||
| statistical analysis of encrypted data. For example, keyframes are usually much larger than other frames, | statistical analysis of encrypted data. For example, keyframes are usually much larger than other frames, | |||
| so frame size alone can leak this in the absence of any unencrypted metada ta. However, unencrypted metadata | so frame size alone can leak this in the absence of any unencrypted metada ta. However, unencrypted metadata | |||
| provides a reliable signal rather than a statistical probability; so endpo ints should take that into consideration | provides a reliable signal rather than a statistical probability; so endpo ints should take that into consideration | |||
| to balance the privacy leakage risk against the potential benefit of optim ized media delivery when deciding | to balance the privacy leakage risk against the potential benefit of optim ized media delivery when deciding | |||
| whether to negotiate and encrypt this header extension.</t> | whether to negotiate and encrypt this header extension.</t> | |||
| </section> | ||||
| <section title="Acknowledgements"> | ||||
| <t>Many thanks to Bernard Aboba, Jonathan Lennox, Stephan Wenger, Dale Wor | ||||
| ley, and Magnus Westerlund for their inputs.</t> | ||||
| </section> | </section> | |||
| <section numbered="true" removeInRFC="false" toc="include" pn="section-5"> | ||||
| <section title="IANA Considerations"> | <name slugifiedName="name-iana-considerations">IANA Considerations</name> | |||
| <t>This document defines a new extension URI to the RTP Compact HeaderExte | <t indent="0" pn="section-5-1">This document defines a new extension URI l | |||
| nsions sub-registry of the | isted in the "RTP Compact Header Extensions" registry of the | |||
| Real-Time Transport Protocol (RTP) Parameters registry, according to the | "Real-Time Transport Protocol (RTP) Parameters" registry group, according | |||
| following data:</t> | to the following data:</t> | |||
| <t>Extension URI: urn:ietf:params:rtp-hdrext:framemarkinginfo </t> | <t indent="0" pn="section-5-2">Extension URI: urn:ietf:params:rtp-hdrext: | |||
| <t>Description: Frame marking information for video streams </t> | framemarking </t> | |||
| <t>Contact: mzanaty@cisco.com </t> | <t indent="0" pn="section-5-3">Description: Frame marking information for | |||
| <t>Reference: RFC XXXX</t> | video streams </t> | |||
| <t indent="0" pn="section-5-4">Contact: mzanaty@cisco.com </t> | ||||
| <t>Note to RFC Editor: please replace RFC XXXX with the number of this RFC | <t indent="0" pn="section-5-5">Reference: RFC 9626</t> | |||
| .</t> | ||||
| </section> | </section> | |||
| </middle> | </middle> | |||
| <back> | <back> | |||
| <references pn="section-6"> | ||||
| <references title="Normative References"> | <name slugifiedName="name-references">References</name> | |||
| <?rfc include="reference.RFC.2119"?> | <references pn="section-6.1"> | |||
| <?rfc include="reference.RFC.8174"?> | <name slugifiedName="name-normative-references">Normative References</na | |||
| <?rfc include="reference.RFC.8285"?> | me> | |||
| <?rfc include="reference.RFC.6184"?> | <reference anchor="RFC2119" target="https://www.rfc-editor.org/info/rfc2 | |||
| <?rfc include="reference.RFC.6190"?> | 119" quoteTitle="true" derivedAnchor="RFC2119"> | |||
| <?rfc include="reference.RFC.7741"?> | <front> | |||
| <?rfc include="reference.RFC.7798"?> | <title>Key words for use in RFCs to Indicate Requirement Levels</tit | |||
| </references> | le> | |||
| <references title="Informative References"> | <author fullname="S. Bradner" initials="S." surname="Bradner"/> | |||
| <?rfc include="reference.RFC.7656"?> | <date month="March" year="1997"/> | |||
| <?rfc include="reference.RFC.7667"?> | <abstract> | |||
| <?rfc include="reference.RFC.6464"?> | <t indent="0">In many standards track documents several words are | |||
| <?rfc include="reference.RFC.3550"?> | used to signify the requirements in the specification. These words are often cap | |||
| <?rfc include="reference.RFC.3711"?> | italized. This document defines these words as they should be interpreted in IET | |||
| <?rfc include="reference.RFC.5104"?> | F documents. This document specifies an Internet Best Current Practices for the | |||
| <?rfc include="reference.RFC.8871"?> | Internet Community, and requests discussion and suggestions for improvements.</t | |||
| <?rfc include="reference.RFC.9335"?> | > | |||
| <?rfc include="reference.I-D.ietf-avtext-lrr"?> | </abstract> | |||
| <?rfc include="reference.I-D.ietf-payload-vp9"?> | </front> | |||
| <seriesInfo name="BCP" value="14"/> | ||||
| <seriesInfo name="RFC" value="2119"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC2119"/> | ||||
| </reference> | ||||
| <reference anchor="RFC6184" target="https://www.rfc-editor.org/info/rfc6 | ||||
| 184" quoteTitle="true" derivedAnchor="RFC6184"> | ||||
| <front> | ||||
| <title>RTP Payload Format for H.264 Video</title> | ||||
| <author fullname="Y.-K. Wang" initials="Y.-K." surname="Wang"/> | ||||
| <author fullname="R. Even" initials="R." surname="Even"/> | ||||
| <author fullname="T. Kristensen" initials="T." surname="Kristensen"/ | ||||
| > | ||||
| <author fullname="R. Jesup" initials="R." surname="Jesup"/> | ||||
| <date month="May" year="2011"/> | ||||
| <abstract> | ||||
| <t indent="0">This memo describes an RTP Payload format for the IT | ||||
| U-T Recommendation H.264 video codec and the technically identical ISO/IEC Inter | ||||
| national Standard 14496-10 video codec, excluding the Scalable Video Coding (SVC | ||||
| ) extension and the Multiview Video Coding extension, for which the RTP payload | ||||
| formats are defined elsewhere. The RTP payload format allows for packetization o | ||||
| f one or more Network Abstraction Layer Units (NALUs), produced by an H.264 vide | ||||
| o encoder, in each RTP payload. The payload format has wide applicability, as it | ||||
| supports applications from simple low bitrate conversational usage, to Internet | ||||
| video streaming with interleaved transmission, to high bitrate video-on-demand. | ||||
| </t> | ||||
| <t indent="0">This memo obsoletes RFC 3984. Changes from RFC 3984 | ||||
| are summarized in Section 14. Issues on backward compatibility to RFC 3984 are d | ||||
| iscussed in Section 15. [STANDARDS-TRACK]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="6184"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC6184"/> | ||||
| </reference> | ||||
| <reference anchor="RFC6190" target="https://www.rfc-editor.org/info/rfc6 | ||||
| 190" quoteTitle="true" derivedAnchor="RFC6190"> | ||||
| <front> | ||||
| <title>RTP Payload Format for Scalable Video Coding</title> | ||||
| <author fullname="S. Wenger" initials="S." surname="Wenger"/> | ||||
| <author fullname="Y.-K. Wang" initials="Y.-K." surname="Wang"/> | ||||
| <author fullname="T. Schierl" initials="T." surname="Schierl"/> | ||||
| <author fullname="A. Eleftheriadis" initials="A." surname="Eleftheri | ||||
| adis"/> | ||||
| <date month="May" year="2011"/> | ||||
| <abstract> | ||||
| <t indent="0">This memo describes an RTP payload format for Scalab | ||||
| le Video Coding (SVC) as defined in Annex G of ITU-T Recommendation H.264, which | ||||
| is technically identical to Amendment 3 of ISO/IEC International Standard 14496 | ||||
| -10. The RTP payload format allows for packetization of one or more Network Abst | ||||
| raction Layer (NAL) units in each RTP packet payload, as well as fragmentation o | ||||
| f a NAL unit in multiple RTP packets. Furthermore, it supports transmission of a | ||||
| n SVC stream over a single as well as multiple RTP sessions. The payload format | ||||
| defines a new media subtype name "H264-SVC", but is still backward compatible to | ||||
| RFC 6184 since the base layer, when encapsulated in its own RTP stream, must us | ||||
| e the H.264 media subtype name ("H264") and the packetization method specified i | ||||
| n RFC 6184. The payload format has wide applicability in videoconferencing, Inte | ||||
| rnet video streaming, and high-bitrate entertainment-quality video, among others | ||||
| . [STANDARDS-TRACK]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="6190"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC6190"/> | ||||
| </reference> | ||||
| <reference anchor="RFC7741" target="https://www.rfc-editor.org/info/rfc7 | ||||
| 741" quoteTitle="true" derivedAnchor="RFC7741"> | ||||
| <front> | ||||
| <title>RTP Payload Format for VP8 Video</title> | ||||
| <author fullname="P. Westin" initials="P." surname="Westin"/> | ||||
| <author fullname="H. Lundin" initials="H." surname="Lundin"/> | ||||
| <author fullname="M. Glover" initials="M." surname="Glover"/> | ||||
| <author fullname="J. Uberti" initials="J." surname="Uberti"/> | ||||
| <author fullname="F. Galligan" initials="F." surname="Galligan"/> | ||||
| <date month="March" year="2016"/> | ||||
| <abstract> | ||||
| <t indent="0">This memo describes an RTP payload format for the VP | ||||
| 8 video codec. The payload format has wide applicability, as it supports applica | ||||
| tions from low-bitrate peer-to-peer usage to high-bitrate video conferences.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="7741"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC7741"/> | ||||
| </reference> | ||||
| <reference anchor="RFC7798" target="https://www.rfc-editor.org/info/rfc7 | ||||
| 798" quoteTitle="true" derivedAnchor="RFC7798"> | ||||
| <front> | ||||
| <title>RTP Payload Format for High Efficiency Video Coding (HEVC)</t | ||||
| itle> | ||||
| <author fullname="Y.-K. Wang" initials="Y.-K." surname="Wang"/> | ||||
| <author fullname="Y. Sanchez" initials="Y." surname="Sanchez"/> | ||||
| <author fullname="T. Schierl" initials="T." surname="Schierl"/> | ||||
| <author fullname="S. Wenger" initials="S." surname="Wenger"/> | ||||
| <author fullname="M. M. Hannuksela" initials="M. M." surname="Hannuk | ||||
| sela"/> | ||||
| <date month="March" year="2016"/> | ||||
| <abstract> | ||||
| <t indent="0">This memo describes an RTP payload format for the vi | ||||
| deo coding standard ITU-T Recommendation H.265 and ISO/IEC International Standar | ||||
| d 23008-2, both also known as High Efficiency Video Coding (HEVC) and developed | ||||
| by the Joint Collaborative Team on Video Coding (JCT-VC). The RTP payload format | ||||
| allows for packetization of one or more Network Abstraction Layer (NAL) units i | ||||
| n each RTP packet payload as well as fragmentation of a NAL unit into multiple R | ||||
| TP packets. Furthermore, it supports transmission of an HEVC bitstream over a si | ||||
| ngle stream as well as multiple RTP streams. When multiple RTP streams are used, | ||||
| a single transport or multiple transports may be utilized. The payload format h | ||||
| as wide applicability in videoconferencing, Internet video streaming, and high-b | ||||
| itrate entertainment-quality video, among others.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="7798"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC7798"/> | ||||
| </reference> | ||||
| <reference anchor="RFC8174" target="https://www.rfc-editor.org/info/rfc8 | ||||
| 174" quoteTitle="true" derivedAnchor="RFC8174"> | ||||
| <front> | ||||
| <title>Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words</ti | ||||
| tle> | ||||
| <author fullname="B. Leiba" initials="B." surname="Leiba"/> | ||||
| <date month="May" year="2017"/> | ||||
| <abstract> | ||||
| <t indent="0">RFC 2119 specifies common key words that may be used | ||||
| in protocol specifications. This document aims to reduce the ambiguity by clari | ||||
| fying that only UPPERCASE usage of the key words have the defined special meanin | ||||
| gs.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="BCP" value="14"/> | ||||
| <seriesInfo name="RFC" value="8174"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC8174"/> | ||||
| </reference> | ||||
| <reference anchor="RFC8285" target="https://www.rfc-editor.org/info/rfc8 | ||||
| 285" quoteTitle="true" derivedAnchor="RFC8285"> | ||||
| <front> | ||||
| <title>A General Mechanism for RTP Header Extensions</title> | ||||
| <author fullname="D. Singer" initials="D." surname="Singer"/> | ||||
| <author fullname="H. Desineni" initials="H." surname="Desineni"/> | ||||
| <author fullname="R. Even" initials="R." role="editor" surname="Even | ||||
| "/> | ||||
| <date month="October" year="2017"/> | ||||
| <abstract> | ||||
| <t indent="0">This document provides a general mechanism to use th | ||||
| e header extension feature of RTP (the Real-time Transport Protocol). It provide | ||||
| s the option to use a small number of small extensions in each RTP packet, where | ||||
| the universe of possible extensions is large and registration is decentralized. | ||||
| The actual extensions in use in a session are signaled in the setup information | ||||
| for that session. This document obsoletes RFC 5285.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="8285"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC8285"/> | ||||
| </reference> | ||||
| </references> | ||||
| <references pn="section-6.2"> | ||||
| <name slugifiedName="name-informative-references">Informative References | ||||
| </name> | ||||
| <reference anchor="RFC3550" target="https://www.rfc-editor.org/info/rfc3 | ||||
| 550" quoteTitle="true" derivedAnchor="RFC3550"> | ||||
| <front> | ||||
| <title>RTP: A Transport Protocol for Real-Time Applications</title> | ||||
| <author fullname="H. Schulzrinne" initials="H." surname="Schulzrinne | ||||
| "/> | ||||
| <author fullname="S. Casner" initials="S." surname="Casner"/> | ||||
| <author fullname="R. Frederick" initials="R." surname="Frederick"/> | ||||
| <author fullname="V. Jacobson" initials="V." surname="Jacobson"/> | ||||
| <date month="July" year="2003"/> | ||||
| <abstract> | ||||
| <t indent="0">This memorandum describes RTP, the real-time transpo | ||||
| rt protocol. RTP provides end-to-end network transport functions suitable for ap | ||||
| plications transmitting real-time data, such as audio, video or simulation data, | ||||
| over multicast or unicast network services. RTP does not address resource reser | ||||
| vation and does not guarantee quality-of- service for real-time services. The da | ||||
| ta transport is augmented by a control protocol (RTCP) to allow monitoring of th | ||||
| e data delivery in a manner scalable to large multicast networks, and to provide | ||||
| minimal control and identification functionality. RTP and RTCP are designed to | ||||
| be independent of the underlying transport and network layers. The protocol supp | ||||
| orts the use of RTP-level translators and mixers. Most of the text in this memor | ||||
| andum is identical to RFC 1889 which it obsoletes. There are no changes in the p | ||||
| acket formats on the wire, only changes to the rules and algorithms governing ho | ||||
| w the protocol is used. The biggest change is an enhancement to the scalable tim | ||||
| er algorithm for calculating when to send RTCP packets in order to minimize tran | ||||
| smission in excess of the intended rate when many participants join a session si | ||||
| multaneously. [STANDARDS-TRACK]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="STD" value="64"/> | ||||
| <seriesInfo name="RFC" value="3550"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC3550"/> | ||||
| </reference> | ||||
| <reference anchor="RFC3711" target="https://www.rfc-editor.org/info/rfc3 | ||||
| 711" quoteTitle="true" derivedAnchor="RFC3711"> | ||||
| <front> | ||||
| <title>The Secure Real-time Transport Protocol (SRTP)</title> | ||||
| <author fullname="M. Baugher" initials="M." surname="Baugher"/> | ||||
| <author fullname="D. McGrew" initials="D." surname="McGrew"/> | ||||
| <author fullname="M. Naslund" initials="M." surname="Naslund"/> | ||||
| <author fullname="E. Carrara" initials="E." surname="Carrara"/> | ||||
| <author fullname="K. Norrman" initials="K." surname="Norrman"/> | ||||
| <date month="March" year="2004"/> | ||||
| <abstract> | ||||
| <t indent="0">This document describes the Secure Real-time Transpo | ||||
| rt Protocol (SRTP), a profile of the Real-time Transport Protocol (RTP), which c | ||||
| an provide confidentiality, message authentication, and replay protection to the | ||||
| RTP traffic and to the control traffic for RTP, the Real-time Transport Control | ||||
| Protocol (RTCP). [STANDARDS-TRACK]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="3711"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC3711"/> | ||||
| </reference> | ||||
| <reference anchor="RFC5104" target="https://www.rfc-editor.org/info/rfc5 | ||||
| 104" quoteTitle="true" derivedAnchor="RFC5104"> | ||||
| <front> | ||||
| <title>Codec Control Messages in the RTP Audio-Visual Profile with F | ||||
| eedback (AVPF)</title> | ||||
| <author fullname="S. Wenger" initials="S." surname="Wenger"/> | ||||
| <author fullname="U. Chandra" initials="U." surname="Chandra"/> | ||||
| <author fullname="M. Westerlund" initials="M." surname="Westerlund"/ | ||||
| > | ||||
| <author fullname="B. Burman" initials="B." surname="Burman"/> | ||||
| <date month="February" year="2008"/> | ||||
| <abstract> | ||||
| <t indent="0">This document specifies a few extensions to the mess | ||||
| ages defined in the Audio-Visual Profile with Feedback (AVPF). They are helpful | ||||
| primarily in conversational multimedia scenarios where centralized multipoint fu | ||||
| nctionalities are in use. However, some are also usable in smaller multicast env | ||||
| ironments and point-to-point calls.</t> | ||||
| <t indent="0">The extensions discussed are messages related to the | ||||
| ITU-T Rec. H.271 Video Back Channel, Full Intra Request, Temporary Maximum Medi | ||||
| a Stream Bit Rate, and Temporal-Spatial Trade-off. [STANDARDS-TRACK]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="5104"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC5104"/> | ||||
| </reference> | ||||
| <reference anchor="RFC6464" target="https://www.rfc-editor.org/info/rfc6 | ||||
| 464" quoteTitle="true" derivedAnchor="RFC6464"> | ||||
| <front> | ||||
| <title>A Real-time Transport Protocol (RTP) Header Extension for Cli | ||||
| ent-to-Mixer Audio Level Indication</title> | ||||
| <author fullname="J. Lennox" initials="J." role="editor" surname="Le | ||||
| nnox"/> | ||||
| <author fullname="E. Ivov" initials="E." surname="Ivov"/> | ||||
| <author fullname="E. Marocco" initials="E." surname="Marocco"/> | ||||
| <date month="December" year="2011"/> | ||||
| <abstract> | ||||
| <t indent="0">This document defines a mechanism by which packets o | ||||
| f Real-time Transport Protocol (RTP) audio streams can indicate, in an RTP heade | ||||
| r extension, the audio level of the audio sample carried in the RTP packet. In l | ||||
| arge conferences, this can reduce the load on an audio mixer or other middlebox | ||||
| that wants to forward only a few of the loudest audio streams, without requiring | ||||
| it to decode and measure every stream that is received. [STANDARDS-TRACK]</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="6464"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC6464"/> | ||||
| </reference> | ||||
| <reference anchor="RFC7656" target="https://www.rfc-editor.org/info/rfc7 | ||||
| 656" quoteTitle="true" derivedAnchor="RFC7656"> | ||||
| <front> | ||||
| <title>A Taxonomy of Semantics and Mechanisms for Real-Time Transpor | ||||
| t Protocol (RTP) Sources</title> | ||||
| <author fullname="J. Lennox" initials="J." surname="Lennox"/> | ||||
| <author fullname="K. Gross" initials="K." surname="Gross"/> | ||||
| <author fullname="S. Nandakumar" initials="S." surname="Nandakumar"/ | ||||
| > | ||||
| <author fullname="G. Salgueiro" initials="G." surname="Salgueiro"/> | ||||
| <author fullname="B. Burman" initials="B." role="editor" surname="Bu | ||||
| rman"/> | ||||
| <date month="November" year="2015"/> | ||||
| <abstract> | ||||
| <t indent="0">The terminology about, and associations among, Real- | ||||
| time Transport Protocol (RTP) sources can be complex and somewhat opaque. This d | ||||
| ocument describes a number of existing and proposed properties and relationships | ||||
| among RTP sources and defines common terminology for discussing protocol entiti | ||||
| es and their relationships.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="7656"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC7656"/> | ||||
| </reference> | ||||
| <reference anchor="RFC7667" target="https://www.rfc-editor.org/info/rfc7 | ||||
| 667" quoteTitle="true" derivedAnchor="RFC7667"> | ||||
| <front> | ||||
| <title>RTP Topologies</title> | ||||
| <author fullname="M. Westerlund" initials="M." surname="Westerlund"/ | ||||
| > | ||||
| <author fullname="S. Wenger" initials="S." surname="Wenger"/> | ||||
| <date month="November" year="2015"/> | ||||
| <abstract> | ||||
| <t indent="0">This document discusses point-to-point and multi-end | ||||
| point topologies used in environments based on the Real-time Transport Protocol | ||||
| (RTP). In particular, centralized topologies commonly employed in the video conf | ||||
| erencing industry are mapped to the RTP terminology.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="7667"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC7667"/> | ||||
| </reference> | ||||
| <reference anchor="RFC8871" target="https://www.rfc-editor.org/info/rfc8 | ||||
| 871" quoteTitle="true" derivedAnchor="RFC8871"> | ||||
| <front> | ||||
| <title>A Solution Framework for Private Media in Privacy-Enhanced RT | ||||
| P Conferencing (PERC)</title> | ||||
| <author fullname="P. Jones" initials="P." surname="Jones"/> | ||||
| <author fullname="D. Benham" initials="D." surname="Benham"/> | ||||
| <author fullname="C. Groves" initials="C." surname="Groves"/> | ||||
| <date month="January" year="2021"/> | ||||
| <abstract> | ||||
| <t indent="0">This document describes a solution framework for ens | ||||
| uring that media confidentiality and integrity are maintained end to end within | ||||
| the context of a switched conferencing environment where Media Distributors are | ||||
| not trusted with the end-to-end media encryption keys. The solution builds upon | ||||
| existing security mechanisms defined for the Real-time Transport Protocol (RTP). | ||||
| </t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="8871"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC8871"/> | ||||
| </reference> | ||||
| <reference anchor="RFC9335" target="https://www.rfc-editor.org/info/rfc9 | ||||
| 335" quoteTitle="true" derivedAnchor="RFC9335"> | ||||
| <front> | ||||
| <title>Completely Encrypting RTP Header Extensions and Contributing | ||||
| Sources</title> | ||||
| <author fullname="J. Uberti" initials="J." surname="Uberti"/> | ||||
| <author fullname="C. Jennings" initials="C." surname="Jennings"/> | ||||
| <author fullname="S. Murillo" initials="S." surname="Murillo"/> | ||||
| <date month="January" year="2023"/> | ||||
| <abstract> | ||||
| <t indent="0">While the Secure Real-time Transport Protocol (SRTP) | ||||
| provides confidentiality for the contents of a media packet, a significant amou | ||||
| nt of metadata is left unprotected, including RTP header extensions and contribu | ||||
| ting sources (CSRCs). However, this data can be moderately sensitive in many app | ||||
| lications. While there have been previous attempts to protect this data, they ha | ||||
| ve had limited deployment, due to complexity as well as technical limitations.</ | ||||
| t> | ||||
| <t indent="0">This document updates RFC 3711, the SRTP specificati | ||||
| on, and defines Cryptex as a new mechanism that completely encrypts header exten | ||||
| sions and CSRCs and uses simpler Session Description Protocol (SDP) signaling wi | ||||
| th the goal of facilitating deployment.</t> | ||||
| </abstract> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="9335"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC9335"/> | ||||
| </reference> | ||||
| <reference anchor="RFC9627" target="https://www.rfc-editor.org/info/rfc9 | ||||
| 627" quoteTitle="true" derivedAnchor="RFC9627"> | ||||
| <front> | ||||
| <title>The Layer Refresh Request (LRR) RTCP Feedback Message</title> | ||||
| <author initials="J." surname="Lennox" fullname="Jonathan Lennox"> | ||||
| <organization showOnFrontPage="true">8x8, Inc. / Jitsi</organizati | ||||
| on> | ||||
| </author> | ||||
| <author initials="D." surname="Hong" fullname="Danny Hong"> | ||||
| <organization showOnFrontPage="true">Google, Inc.</organization> | ||||
| </author> | ||||
| <author initials="J." surname="Uberti" fullname="Justin Uberti"> | ||||
| <organization showOnFrontPage="true">OpenAI</organization> | ||||
| </author> | ||||
| <author initials="S." surname="Holmer" fullname="Stefan Holmer"> | ||||
| <organization showOnFrontPage="true">Google, Inc.</organization> | ||||
| </author> | ||||
| <author initials="M." surname="Flodman" fullname="Magnus Flodman"> | ||||
| <organization showOnFrontPage="true">Google, Inc.</organization> | ||||
| </author> | ||||
| <date month="March" year="2025"/> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="9627"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC9627"/> | ||||
| </reference> | ||||
| <reference anchor="RFC9628" target="https://www.rfc-editor.org/info/rfc9 | ||||
| 628" quoteTitle="true" derivedAnchor="RFC9628"> | ||||
| <front> | ||||
| <title>RTP Payload Format for VP9 Video</title> | ||||
| <author initials="J." surname="Uberti" fullname="Justin Uberti"> | ||||
| <organization showOnFrontPage="true">OpenAI</organization> | ||||
| </author> | ||||
| <author initials="S." surname="Holmer" fullname="Stefan Holmer"> | ||||
| <organization showOnFrontPage="true">Google, Inc.</organization> | ||||
| </author> | ||||
| <author initials="M." surname="Flodman" fullname="Magnus Flodman"> | ||||
| <organization showOnFrontPage="true">Google, Inc.</organization> | ||||
| </author> | ||||
| <author initials="D." surname="Hong" fullname="Danny Hong"> | ||||
| <organization showOnFrontPage="true">Google, Inc.</organization> | ||||
| </author> | ||||
| <author initials="J." surname="Lennox" fullname="Jonathan Lennox"> | ||||
| <organization showOnFrontPage="true">8x8, Inc. / Jitsi</organizati | ||||
| on> | ||||
| </author> | ||||
| <date month="March" year="2025"/> | ||||
| </front> | ||||
| <seriesInfo name="RFC" value="9628"/> | ||||
| <seriesInfo name="DOI" value="10.17487/RFC9628"/> | ||||
| </reference> | ||||
| </references> | ||||
| </references> | </references> | |||
| <section numbered="false" removeInRFC="false" toc="include" pn="section-appe | ||||
| ndix.a"> | ||||
| <name slugifiedName="name-acknowledgements">Acknowledgements</name> | ||||
| <t indent="0" pn="section-appendix.a-1">Many thanks to <contact fullname= | ||||
| "Bernard Aboba"/>, <contact fullname="Jonathan Lennox"/>, <contact fullname="S | ||||
| tephan Wenger"/>, <contact fullname="Dale Worley"/>, and <contact fullname="Ma | ||||
| gnus Westerlund"/> for their inputs.</t> | ||||
| </section> | ||||
| <section anchor="authors-addresses" numbered="false" removeInRFC="false" toc | ||||
| ="include" pn="section-appendix.b"> | ||||
| <name slugifiedName="name-authors-addresses">Authors' Addresses</name> | ||||
| <author fullname="Mo Zanaty" initials="M" surname="Zanaty"> | ||||
| <organization showOnFrontPage="true">Cisco Systems</organization> | ||||
| <address> | ||||
| <postal> | ||||
| <street>170 West Tasman Drive</street> | ||||
| <city>San Jose</city> | ||||
| <region>CA</region> | ||||
| <code>95134</code> | ||||
| <country>United States of America</country> | ||||
| </postal> | ||||
| <email>mzanaty@cisco.com</email> | ||||
| </address> | ||||
| </author> | ||||
| <author initials="E." surname="Berger" fullname="Espen Berger"> | ||||
| <organization showOnFrontPage="true">Cisco Systems</organization> | ||||
| <address> | ||||
| <email>espeberg@cisco.com</email> | ||||
| </address> | ||||
| </author> | ||||
| <author fullname="Suhas Nandakumar" initials="S" surname="Nandakumar"> | ||||
| <organization showOnFrontPage="true">Cisco Systems</organization> | ||||
| <address> | ||||
| <postal> | ||||
| <street>170 West Tasman Drive</street> | ||||
| <city>San Jose</city> | ||||
| <region>CA</region> | ||||
| <code>95134</code> | ||||
| <country>United States of America</country> | ||||
| </postal> | ||||
| <email>snandaku@cisco.com</email> | ||||
| </address> | ||||
| </author> | ||||
| </section> | ||||
| </back> | </back> | |||
| </rfc> | </rfc> | |||
| End of changes. 84 change blocks. | ||||
| 490 lines changed or deleted | 1253 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. | ||||