Diff: rfc9309xml2.original.xml

	rfc9309xml2.original.xml	rfc9309.xml

	<?xml version="1.0" encoding="US-ASCII"?>	<?xml version="1.0" encoding="UTF-8"?>


	<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [	<!DOCTYPE rfc [
	<!ENTITY RFC1945 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re	<!ENTITY nbsp " ">
	ference.RFC.1945.xml">	<!ENTITY zwsp "">
	<!ENTITY RFC2046 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re	<!ENTITY nbhy "‑">
	ference.RFC.2046.xml">	<!ENTITY wj "⁠">
	<!ENTITY RFC2119 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.2119.xml">
	<!ENTITY RFC3629 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.3629.xml">
	<!ENTITY RFC3986 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.3986.xml">
	<!ENTITY RFC5234 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.5234.xml">
	<!ENTITY RFC8174 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.8174.xml">
	<!ENTITY RFC8288 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.8288.xml">
	<!ENTITY RFC9110 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.9110.xml">
	<!ENTITY RFC9111 PUBLIC "" "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/re
	ference.RFC.9111.xml">
	]>	]>


	<rfc ipr="trust200902" category="std" docName="draft-koster-rep-12" >	<rfc xmlns:xi="http://www.w3.org/2001/XInclude" ipr="trust200902" docName="draft
		-koster-rep-12" number="9309" obsoletes="" updates="" submissionType="IETF" cate
	<?xml-stylesheet type="text/xsl" href="rfc2629.xslt" ?>	gory="std" consensus="true" xml:lang="en" tocInclude="true" tocDepth="4" symRefs
		="true" sortRefs="true" version="3">
	<?rfc toc="yes" ?>
	<?rfc tocdepth="4" ?>
	<?rfc symrefs="yes" ?>
	<?rfc sortrefs="yes"?>
	<?rfc compact="yes" ?>
	<?rfc subcompact="no"?>


	<front>	<!-- xml2rfc v2v3 conversion 3.13.0 -->
	<title abbrev="REP">Robots Exclusion Protocol</title>


	<author initials="M." surname="Koster" fullname="Martijn Koster" role="edito	<front>
	r">	<title abbrev="Robots Exclusion Protocol (REP)">Robots Exclusion Protocol</t
	<organization>Stalworthy Computing, Ltd.</organization>	itle>
		<seriesInfo name="RFC" value="9309"/>
		<author initials="M." surname="Koster" fullname="Martijn Koster">
	<address>	<address>
	<postal>	<postal>

		<extaddr>Stalworthy Manor Farm</extaddr>
	<street>Suton Lane</street>	<street>Suton Lane</street>
	<city>Wymondham, Norfolk</city>	<city>Wymondham, Norfolk</city>
	<code>NR18 9JG</code>	<code>NR18 9JG</code>
	<country>United Kingdom</country>	<country>United Kingdom</country>
	</postal>	</postal>
	<email>m.koster@greenhills.co.uk</email>	<email>m.koster@greenhills.co.uk</email>
	</address>	</address>
	</author>	</author>

	<author initials="G." surname="Illyes" fullname="Gary Illyes" role="editor">	<author initials="G." surname="Illyes" fullname="Gary Illyes">
	<organization>Google LLC.</organization>	<organization>Google LLC</organization>
	<address>	<address>
	<postal>	<postal>
	<street>Brandschenkestrasse 110</street>	<street>Brandschenkestrasse 110</street>

	<city>Zurich</city>	<city>Zürich</city>
	<code>8002</code>	<code>8002</code>
	<country>Switzerland</country>	<country>Switzerland</country>
	</postal>	</postal>
	<email>garyillyes@google.com</email>	<email>garyillyes@google.com</email>
	</address>	</address>
	</author>	</author>

	<author initials="H." surname="Zeller" fullname="Henner Zeller" role="editor	<author initials="H." surname="Zeller" fullname="Henner Zeller">
	">	<organization>Google LLC</organization>
	<organization>Google LLC.</organization>
	<address>	<address>
	<postal>	<postal>
	<street>1600 Amphitheatre Pkwy</street>	<street>1600 Amphitheatre Pkwy</street>

	<city>Mountain View, CA</city>	<city>Mountain View</city>
		<region>CA</region>
	<code>94043</code>	<code>94043</code>

	<country>USA</country>	<country>United States of America</country>
	</postal>	</postal>
	<email>henner@google.com</email>	<email>henner@google.com</email>
	</address>	</address>
	</author>	</author>

	<author initials="L." surname="Sassman" fullname="Lizzi Sassman" role="edito	<author initials="L." surname="Sassman" fullname="Lizzi Sassman">
	r">	<organization>Google LLC</organization>
	<organization>Google LLC.</organization>
	<address>	<address>
	<postal>	<postal>
	<street>Brandschenkestrasse 110</street>	<street>Brandschenkestrasse 110</street>

	<city>Zurich</city>	<city>Zürich</city>
	<code>8002</code>	<code>8002</code>
	<country>Switzerland</country>	<country>Switzerland</country>
	</postal>	</postal>
	<email>lizzi@google.com</email>	<email>lizzi@google.com</email>
	</address>	</address>
	</author>	</author>

		<date year="2022" month="September"/>
	<date year="2022" month="July" day="06"/>	<keyword>robot</keyword>
		<keyword>crawler</keyword>
	<area>General</area>	<keyword>robots.txt</keyword>

	<keyword>internet-drafts</keyword>

	<abstract>	<abstract>

	<t> This document specifies and extends the "Robots Exclusion Protoco	<t> This document specifies and extends the "Robots Exclusion Protocol"
	l"	method originally defined by Martijn Koster in 1994 for service owners
	method originally defined by Martijn Koster in 1996 for service owners
	to control how content served by their services may be accessed, if at	to control how content served by their services may be accessed, if at
	all, by automatic clients known as crawlers. Specifically, it adds	all, by automatic clients known as crawlers. Specifically, it adds

	definition language for the protocol and instructions for handling	definition language for the protocol, instructions for handling
	errors and caching. </t>	errors, and instructions for caching. </t>
	</abstract>	</abstract>
	</front>	</front>


	<middle>	<middle>

	<section anchor="introduction" title="Introduction">	<section anchor="introduction" numbered="true" toc="default">
		<name>Introduction</name>
	<t> This document applies to services that provide resources that clients	<t> This document applies to services that provide resources that clients

	can access through URIs as defined in <xref target="RFC3986"/>. For ex ample,	can access through URIs as defined in <xref target="RFC3986" format="d efault"/>. For example,
	in the context of HTTP, a browser is a client that displays the conten t of a	in the context of HTTP, a browser is a client that displays the conten t of a
	web page. </t>	web page. </t>

		<t> Crawlers are automated clients. Search engines, for instance, have cra
	<t> Crawlers are automated clients. Search engines for instance have crawl	wlers to
	ers to
	recursively traverse links for indexing as defined in	recursively traverse links for indexing as defined in

	<xref target="RFC8288"/>. </t>	<xref target="RFC8288" format="default"/>. </t>

	<t> It may be inconvenient for service owners if crawlers visit the entire ty of	<t> It may be inconvenient for service owners if crawlers visit the entire ty of
	their URI space. This document specifies the rules originally defined by	their URI space. This document specifies the rules originally defined by

	the "Robots Exclusion Protocol" <xref target="ROBOTSTXT"/> t hat crawlers	the "Robots Exclusion Protocol" <xref target="ROBOTSTXT" format="defau lt"/> that crawlers
	are requested to honor when accessing URIs. </t>	are requested to honor when accessing URIs. </t>


	<t> These rules are not a form of access authorization. </t>	<t> These rules are not a form of access authorization. </t>

		<section anchor="requirements-language" numbered="true" toc="default">
	<section anchor="requirements-language" title="Requirements Language">	<name>Requirements Language</name>
	<t> The key words "<bcp14>MUST</bcp14>",	<t>The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>",
	"<bcp14>MUST NOT</bcp14>", "<bcp14>REQUIRED</bcp14>&q	"<bcp14>REQUIRED</bcp14>", "<bcp14>SHALL</bcp14>",
	uot;,	"<bcp14>SHALL NOT</bcp14>", "<bcp14>SHOULD</bcp14>",
	"<bcp14>SHALL</bcp14>", "<bcp14>SHALL NOT</bcp14>&quo	"<bcp14>SHOULD NOT</bcp14>",
	t;,	"<bcp14>RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>",
	"<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>&q	"<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document
	uot;,	are to be interpreted as described in BCP 14
	"<bcp14>RECOMMENDED</bcp14>",	<xref target="RFC2119"/> <xref target="RFC8174"/> when, and only
	"<bcp14>NOT RECOMMENDED</bcp14>", "<bcp14>MAY</bcp14>	when, they appear in all capitals, as shown here.</t>
	",
	and "<bcp14>OPTIONAL</bcp14>" in this document are to be
	interpreted as described in
	BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> when, and o
	nly
	when, they appear in all capitals, as shown here. </t>
	</section>	</section>
	</section>	</section>

	<section anchor="specification" title="Specification">	<section anchor="specification" numbered="true" toc="default">
	<section anchor="protocol-definition" title="Protocol Definition">	<name>Specification</name>
		<section anchor="protocol-definition" numbered="true" toc="default">
		<name>Protocol Definition</name>
	<t> The protocol language consists of rule(s) and group(s) that the serv ice	<t> The protocol language consists of rule(s) and group(s) that the serv ice

	makes available in a file named 'robots.txt' as described in	makes available in a file named "robots.txt" as described in
	<xref target="access-method" />: </t>	<xref target="access-method" format="default"/>: </t>
	<t>	<dl spacing="normal">
	<list style="symbols">	<dt> Rule:</dt><dd> A line with a key-value pair that defines how a
	<t> Rule: A line with a key-value pair that defines how a
	crawler may access URIs. See	crawler may access URIs. See

	<xref target="the-allow-and-disallow-lines" />. </t>	<xref target="the-allow-and-disallow-lines" format="default"/>.
	<t> Group: One or more user-agent lines that is followed by	</dd>
		<dt> Group:</dt><dd> One or more user-agent lines that are followed by
	one or more rules. The group is terminated by a user-agent line	one or more rules. The group is terminated by a user-agent line

	or end of file. See <xref target="the-user-agent-line" />.	or end of file. See <xref target="the-user-agent-line" format="d efault"/>.
	The last group may have no rules, which means it implicitly	The last group may have no rules, which means it implicitly

	allows everything. </t>	allows everything. </dd>
	</list> </t>	</dl>
	</section>	</section>

	<section anchor="formal-syntax" title="Formal Syntax">	<section anchor="formal-syntax" numbered="true" toc="default">
		<name>Formal Syntax</name>
	<t> Below is an Augmented Backus-Naur Form (ABNF) description, as descri bed	<t> Below is an Augmented Backus-Naur Form (ABNF) description, as descri bed

	in <xref target="RFC5234"/>. </t>	in <xref target="RFC5234" format="default"/>. </t>
		<sourcecode name="" type="abnf"><![CDATA[
	<figure><artwork>	robotstxt = *(group / emptyline)
	<![CDATA[	group = startgroupline ; We start with a user-agent
	robotstxt = *(group / emptyline)	; line
	group = startgroupline ; We start with a user-agent	*(startgroupline / emptyline) ; ... and possibly more
	*(startgroupline / emptyline) ; ... and possibly more	; user-agent lines
	; user-agents	*(rule / emptyline) ; followed by rules relevant
	*(rule / emptyline) ; followed by rules relevant	; for the preceding
	; for UAs	; user-agent lines


	startgroupline = WS "user-agent" WS ":" *WS product-token EOL	startgroupline = WS "user-agent" WS ":" *WS product-token EOL


	rule = WS ("allow" / "disallow") WS ":"	rule = WS ("allow" / "disallow") WS ":"
	*WS (path-pattern / empty-pattern) EOL	*WS (path-pattern / empty-pattern) EOL


	; parser implementors: define additional lines you need (for	; parser implementors: define additional lines you need (for
	; example, sitemaps).	; example, Sitemaps).


	product-token = identifier / "*"	product-token = identifier / "*"
	path-pattern = "/" *UTF8-char-noctl ; valid URI path pattern	path-pattern = "/" *UTF8-char-noctl ; valid URI path pattern
	empty-pattern = *WS	empty-pattern = *WS


	identifier = 1*(%x2D / %x41-5A / %x5F / %x61-7A)	identifier = 1*(%x2D / %x41-5A / %x5F / %x61-7A)
	comment = "#" *(UTF8-char-noctl / WS / "#")	comment = "#" *(UTF8-char-noctl / WS / "#")
	emptyline = EOL	emptyline = EOL
	EOL = *WS [comment] NL ; end-of-line may have	EOL = *WS [comment] NL ; end-of-line may have
	; optional trailing comment	; optional trailing comment
	NL = %x0D / %x0A / %x0D.0A	NL = %x0D / %x0A / %x0D.0A
	WS = %x20 / %x09	WS = %x20 / %x09


	; UTF8 derived from RFC3629, but excluding control characters	; UTF8 derived from RFC 3629, but excluding control characters


	UTF8-char-noctl = UTF8-1-noctl / UTF8-2 / UTF8-3 / UTF8-4	UTF8-char-noctl = UTF8-1-noctl / UTF8-2 / UTF8-3 / UTF8-4
	UTF8-1-noctl = %x21 / %x22 / %x24-7F ; excluding control, space, '#'	UTF8-1-noctl = %x21 / %x22 / %x24-7F ; excluding control, space, "#"
	UTF8-2 = %xC2-DF UTF8-tail	UTF8-2 = %xC2-DF UTF8-tail
	UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2UTF8-tail /	UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2UTF8-tail /
	%xED %x80-9F UTF8-tail / %xEE-EF 2UTF8-tail	%xED %x80-9F UTF8-tail / %xEE-EF 2UTF8-tail
	UTF8-4 = %xF0 %x90-BF 2UTF8-tail / %xF1-F3 3UTF8-tail /	UTF8-4 = %xF0 %x90-BF 2UTF8-tail / %xF1-F3 3UTF8-tail /
	%xF4 %x80-8F 2UTF8-tail	%xF4 %x80-8F 2UTF8-tail


	UTF8-tail = %x80-BF	UTF8-tail = %x80-BF
	]]>	]]></sourcecode>
	</artwork></figure>	<section anchor="the-user-agent-line" numbered="true" toc="default">
	<section anchor="the-user-agent-line" title="The User-Agent Line">	<name>The User-Agent Line</name>
	<t> Crawlers set their own name, which is called a product token, to f ind	<t> Crawlers set their own name, which is called a product token, to f ind
	relevant groups. The product token <bcp14>MUST</bcp14> contain onl y	relevant groups. The product token <bcp14>MUST</bcp14> contain onl y

	upper and lowercase letters ("a-z" and "A-Z"),	uppercase and lowercase letters ("a-z" and "A-Z"),
	underscores ("_"), and hyphens ("-").	underscores ("_"), and hyphens ("-").
	The product token <bcp14>SHOULD</bcp14>	The product token <bcp14>SHOULD</bcp14>
	be a substring of the identification string that the crawler sends to	be a substring of the identification string that the crawler sends to

	the service (for example, in the case of HTTP, the product token	the service. For example, in the case of HTTP
	<bcp14>SHOULD</bcp14> be a substring in the user-agent header).	<xref target="RFC9110" format="default"/>, the product token
		<bcp14>SHOULD</bcp14> be a substring in the User-Agent header.
	The identification string <bcp14>SHOULD</bcp14> describe the purpo se of	The identification string <bcp14>SHOULD</bcp14> describe the purpo se of

	the crawler. Here's an example of a user-agent HTTP request he ader	the crawler. Here's an example of a User-Agent HTTP request header
	with a link pointing to a page describing the purpose of the	with a link pointing to a page describing the purpose of the

	ExampleBot crawler, which appears as a substring in the user-agent HTTP	ExampleBot crawler, which appears as a substring in the User-Agent HTTP
	header and as a product token in the robots.txt user-agent line: < /t>	header and as a product token in the robots.txt user-agent line: < /t>


	<texttable title="Example of a user-agent HTTP header and	<figure anchor="fig-1">
	robots.txt user-agent line for the ExampleBot produc	<name>Example of a User-Agent HTTP header and
	t token.	robots.txt user-agent line for the ExampleBot product token</n
	Note that the product token (ExampleBot) is a substr	ame>
	ing of the	<artwork name="" type="" align="center" alt=""><![CDATA[
	user-agent HTTP header">	+==========================================+========================+
	<ttcol align="left">user-agent HTTP header</ttcol>	\| User-Agent HTTP header \| robots.txt user-agent \|
	<ttcol align="left">robots.txt user-agent line</ttcol>	\| \| line \|
	<c>user-agent: Mozilla/5.0 (compatible; ExampleBot/0.1; https://www.	+==========================================+========================+
	example.com/bot.html)</c>	\| User-Agent: Mozilla/5.0 (compatible; \| user-agent: ExampleBot \|
	<c>user-agent: ExampleBot</c>	\| ExampleBot/0.1; \| \|
	</texttable>	\| https://www.example.com/bot.html) \| \|
		+------------------------------------------+------------------------+
		]]></artwork>
		</figure>


		<t> Note that the product token (ExampleBot) is a substring of
		the User-Agent HTTP header.</t>
	<t> Crawlers <bcp14>MUST</bcp14> use case-insensitive matching	<t> Crawlers <bcp14>MUST</bcp14> use case-insensitive matching

	to find the group that matches the product token, and then	to find the group that matches the product token and then
	obey the rules of the group. If there is more than one	obey the rules of the group. If there is more than one
	group matching the user-agent, the matching groups' rules	group matching the user-agent, the matching groups' rules
	<bcp14>MUST</bcp14> be combined into one group and parsed	<bcp14>MUST</bcp14> be combined into one group and parsed
	according to	according to

	<xref target="the-allow-and-disallow-lines" />. </t>	<xref target="the-allow-and-disallow-lines" format="default"/>.</t
		>
	<texttable title="Example of how to merge two robots.txt
	groups that match the same product token">
	<ttcol align="left">Two groups that match the same product token exa
	ctly</ttcol>
	<ttcol align="left">Merged group</ttcol>
	<c>user-agent: ExampleBot<br />
	disallow: /foo<br />
	disallow: /bar<br />
	<br />
	user-agent: ExampleBot<br />
	disallow: /baz
	</c>
	<c>user-agent: ExampleBot<br />
	disallow: /foo<br />
	disallow: /bar<br />
	disallow: /baz</c>
	</texttable>


		<figure anchor="fig-2">
		<name>Example of how to merge two robots.txt
		groups that match the same product token</name>
		<artwork name="" type="" align="center" alt=""><![CDATA[
		+========================================+========================+
		\| Two groups that match the same product \| Merged group \|
		\| token exactly \| \|
		+========================================+========================+
		\| user-agent: ExampleBot \| user-agent: ExampleBot \|
		\| disallow: /foo \| disallow: /foo \|
		\| disallow: /bar \| disallow: /bar \|
		\| \| disallow: /baz \|
		\| user-agent: ExampleBot \| \|
		\| disallow: /baz \| \|
		+----------------------------------------+------------------------+
		]]></artwork>
		</figure>
	<t> If no matching group exists, crawlers <bcp14>MUST</bcp14> obey the group	<t> If no matching group exists, crawlers <bcp14>MUST</bcp14> obey the group
	with a user-agent line with the "*" value, if present. </t>	with a user-agent line with the "*" value, if present. </t>

		<figure anchor="fig-3">
	<texttable title="Example of no matching groups other than the '*'	<name>Example of no matching groups other than the "*"
	for the ExampleBot product token">	for the ExampleBot product token</name>
	<ttcol align="left">Two groups that don't explicitly match ExampleBo	<artwork name="" type="" align="center" alt=""><![CDATA[
	t</ttcol>	+==================================+======================+
	<ttcol align="left">Applicable group for ExampleBot</ttcol>	\| Two groups that don't explicitly \| Applicable group for \|
	<c>user-agent: *<br />	\| match ExampleBot \| ExampleBot \|
	disallow: /foo<br />	+==================================+======================+
	disallow: /bar<br />	\| user-agent: * \| user-agent: * \|
	<br />	\| disallow: /foo \| disallow: /foo \|
	user-agent: BazBot<br />	\| disallow: /bar \| disallow: /bar \|
	disallow: /baz	\| \| \|
	</c>	\| user-agent: BazBot \| \|
	<c>user-agent: *<br />	\| disallow: /baz \| \|
	disallow: /foo<br />	+----------------------------------+----------------------+
	disallow: /bar</c>	]]></artwork>
	</texttable>	</figure>

	<t> If no group matches the product token and there is no group with a user-agent	<t> If no group matches the product token and there is no group with a user-agent
	line with the "*" value, or no groups are present at all, no	line with the "*" value, or no groups are present at all, no
	rules apply. </t>	rules apply. </t>
	</section>	</section>

	<section anchor="the-allow-and-disallow-lines" title="The Allow and Disa	<section anchor="the-allow-and-disallow-lines" numbered="true" toc="defa
	llow Lines">	ult">
		<name>The "Allow" and "Disallow" Lines</name>
	<t> These lines indicate whether accessing a URI that matches the	<t> These lines indicate whether accessing a URI that matches the
	corresponding path is allowed or disallowed. </t>	corresponding path is allowed or disallowed. </t>


	<t> To evaluate if access to a URI is allowed, a crawler <bcp14>MUST</ bcp14>	<t> To evaluate if access to a URI is allowed, a crawler <bcp14>MUST</ bcp14>

	match the paths in allow and disallow rules against the URI.	match the paths in "allow" and "disallow" rules against the URI.
	The matching <bcp14>SHOULD</bcp14> be case sensitive. The matching	The matching <bcp14>SHOULD</bcp14> be case sensitive. The matching
	<bcp14>MUST</bcp14> start with the first octet of the path. The mo st	<bcp14>MUST</bcp14> start with the first octet of the path. The mo st
	specific match found <bcp14>MUST</bcp14> be used. The most specifi c	specific match found <bcp14>MUST</bcp14> be used. The most specifi c
	match is the match that has the most octets. Duplicate rules in a	match is the match that has the most octets. Duplicate rules in a

	group <bcp14>MAY</bcp14> be deduplicated. If an allow and disallow	group <bcp14>MAY</bcp14> be deduplicated. If an "allow" rule and a
	rule are equivalent, then the allow rule <bcp14>SHOULD</bcp14> be	"disallow"
	used. If no	rule are equivalent, then the "allow" rule <bcp14>SHOULD</bcp14> b
	match is found amongst the rules in a group for a matching user-ag	e used. If no
	ent,	match is found amongst the rules in a group for a matching user-ag
		ent
	or there are no rules in the group, the URI is allowed. The	or there are no rules in the group, the URI is allowed. The
	/robots.txt URI is implicitly allowed. </t>	/robots.txt URI is implicitly allowed. </t>


	<t> Octets in the URI and robots.txt paths outside the range of the	<t> Octets in the URI and robots.txt paths outside the range of the

	US-ASCII coded character set, and those in the reserved range defi	ASCII coded character set, and those in the reserved range defined
	ned	by <xref target="RFC3986" format="default"/>, <bcp14>MUST</bcp14>
	by <xref target="RFC3986"/>, <bcp14>MUST</bcp14> be percent-encode	be percent-encoded as
	d as	defined by <xref target="RFC3986" format="default"/> prior to comp
	defined by <xref target="RFC3986"></xref> prior to comparison. </t	arison. </t>
	>	<t> If a percent-encoded ASCII octet is encountered in the URI, it

	<t> If a percent-encoded US-ASCII octet is encountered in the URI, it
	<bcp14>MUST</bcp14> be unencoded prior to comparison, unless it is a	<bcp14>MUST</bcp14> be unencoded prior to comparison, unless it is a

	reserved character in the URI as defined by <xref target="RFC3986" />	reserved character in the URI as defined by <xref target="RFC3986" format="default"/>
	or the character is outside the unreserved character range. The ma tch	or the character is outside the unreserved character range. The ma tch
	evaluates positively if and only if the end of the path from the r ule	evaluates positively if and only if the end of the path from the r ule
	is reached before a difference in octets is encountered. </t>	is reached before a difference in octets is encountered. </t>


	<t> For example: </t>	<t> For example: </t>


	<texttable title="Examples of matching percent-encoded URI components"	<figure anchor="fig-4">
	>	<name>Examples of matching percent-encoded URI components</name>
	<ttcol align='left'>Path</ttcol>	<artwork name="" type="" align="center" alt=""><![CDATA[
	<ttcol align='left'>Encoded Path</ttcol>	+==================+=======================+=======================+
	<ttcol align='left'>Path to Match</ttcol>	\| Path \| Encoded Path \| Path to Match \|
	<c>/foo/bar?baz=quz</c>	+==================+=======================+=======================+
	<c>/foo/bar?baz=quz</c>	\| /foo/bar?baz=quz \| /foo/bar?baz=quz \| /foo/bar?baz=quz \|
	<c>/foo/bar?baz=quz</c>	+------------------+-----------------------+-----------------------+
	<c>/foo/bar?baz=http<br />://foo.bar</c>	\| /foo/bar?baz= \| /foo/bar?baz= \| /foo/bar?baz= \|
	<c>/foo/bar?baz=http%3A<br />%2F%2Ffoo.bar</c>	\| https://foo.bar \| https%3A%2F%2Ffoo.bar \| https%3A%2F%2Ffoo.bar \|
	<c>/foo/bar?baz=http%3A<br />%2F%2Ffoo.bar</c>	+------------------+-----------------------+-----------------------+
	<c>/foo/bar/U+E38384</c>	\| /foo/bar/ \| /foo/bar/%E3%83%84 \| /foo/bar/%E3%83%84 \|
	<c>/foo/bar/%E3%83%84</c>	\| U+E38384 \| \| \|
	<c>/foo/bar/%E3%83%84</c>	+------------------+-----------------------+-----------------------+
	<c>/foo/bar/%E3%83%84</c>	\| /foo/ \| /foo/bar/%E3%83%84 \| /foo/bar/%E3%83%84 \|
	<c>/foo/bar/%E3%83%84</c>	\| bar/%E3%83%84 \| \| \|
	<c>/foo/bar/%E3%83%84</c>	+------------------+-----------------------+-----------------------+
	<c>/foo/bar/%62%61%7A</c>	\| /foo/ \| /foo/bar/%62%61%7A \| /foo/bar/baz \|
	<c>/foo/bar/%62%61%7A</c>	\| bar/%62%61%7A \| \| \|
	<c>/foo/bar/baz</c>	+------------------+-----------------------+-----------------------+
	</texttable>	]]></artwork>
		</figure>


	<t> The crawler <bcp14>SHOULD</bcp14> ignore "disallow" and	<t> The crawler <bcp14>SHOULD</bcp14> ignore "disallow" and
	"allow" rules that are not in any group (for example, an	"allow" rules that are not in any group (for example, any
	y
	rule that precedes the first user-agent line). </t>	rule that precedes the first user-agent line). </t>

		<t> Implementors <bcp14>MAY</bcp14> bridge encoding mismatches if they
	<t> Implementers <bcp14>MAY</bcp14> bridge encoding mismatches if they	detect that the robots.txt file is not UTF-8 encoded. </t>
	detect that the robots.txt file is not UTF8 encoded. </t>
	</section>	</section>

	<section anchor="special-characters" title="Special Characters">	<section anchor="special-characters" numbered="true" toc="default">
	<t> Crawlers <bcp14>MUST</bcp14> allow the following special character	<name>Special Characters</name>
	s: </t>	<t> Crawlers <bcp14>MUST</bcp14> support the following special charact
		ers: </t>
	<texttable title="List of special characters in robots.txt files">	<figure anchor="fig-5">
	<ttcol align='left'>Character</ttcol>	<name>List of special characters in robots.txt files</name>
	<ttcol align='left'>Description</ttcol>	<artwork name="" type="" align="center" alt=""><![CDATA[
	<ttcol align='left'>Example</ttcol>	+===========+===================+==============================+
	<c>"#"</c>	\| Character \| Description \| Example \|
	<c>Designates an end of line comment.</c>	+===========+===================+==============================+
	<c>"allow: / # comment in line"<br /><br />"# comment	\| # \| Designates a line \| allow: / # comment in line \|
	on its own line"</c>	\| \| comment. \| \|
	<c>"$"</c>	\| \| \| # comment on its own line \|
	<c>Designates the end of the match pattern.</c>	+-----------+-------------------+------------------------------+
	<c>"allow: /this/path/exactly$"</c>	\| $ \| Designates the \| allow: /this/path/exactly$ \|
	<c>"*"</c>	\| \| end of the match \| \|
	<c>Designates 0 or more instances of any character.</c>	\| \| pattern. \| \|
	<c>"allow: /this/*/exactly"</c>	+-----------+-------------------+------------------------------+
	</texttable>	\| * \| Designates 0 or \| allow: /this/*/exactly \|
		\| \| more instances of \| \|
		\| \| any character. \| \|
		+-----------+-------------------+------------------------------+
		]]></artwork>
		</figure>

	<t> If crawlers match special characters verbatim in the URI, crawlers	<t> If crawlers match special characters verbatim in the URI, crawlers

	<bcp14>SHOULD</bcp14> use "%" encoding. For example: </t	<bcp14>SHOULD</bcp14> use "%" encoding. For example: </t>
	>	<figure anchor="fig-6">
		<name>Example of percent-encoding</name>
	<texttable title="Example of percent-encoding">	<artwork name="" type="" align="center" alt=""><![CDATA[
	<ttcol align='left'>Percent-encoded Pattern</ttcol>	+============================+====================================+
	<ttcol align='left'>URI</ttcol>	\| Percent-encoded Pattern \| URI \|
	<c>/path/file-with-a-%2A.html</c>	+============================+====================================+
	<c>https://www.example.com/path/file-with-a-*.html</c>	\| /path/file-with-a-%2A.html \| https://www.example.com/path/ \|
	<c>/path/foo-%24</c>	\| \| file-with-a-*.html \|
	<c>https://www.example.com/path/foo-$</c>	+----------------------------+------------------------------------+
	</texttable>	\| /path/foo-%24 \| https://www.example.com/path/foo-$ \|
		+----------------------------+------------------------------------+
		]]></artwork>
		</figure>
	</section>	</section>

	<section anchor="other-records" title="Other Records">	<section anchor="other-records" numbered="true" toc="default">
		<name>Other Records</name>
	<t> Crawlers <bcp14>MAY</bcp14> interpret other records that are not	<t> Crawlers <bcp14>MAY</bcp14> interpret other records that are not

	part of the robots.txt protocol. For example, 'sitemap'	part of the robots.txt protocol -- for example, "Sitemaps"
	<xref target="SITEMAPS"/>. Crawlers MAY be lenient when	<xref target="SITEMAPS" format="default"/>. Crawlers <bcp14>MAY</b
		cp14> be lenient when
	interpreting other records. For example, crawlers may accept	interpreting other records. For example, crawlers may accept

	common typos of the record. </t>	common misspellings of the record. </t>

	<t> Parsing of other records	<t> Parsing of other records
	<bcp14>MUST NOT</bcp14> interfere with the parsing of explicitly	<bcp14>MUST NOT</bcp14> interfere with the parsing of explicitly

	defined records in <xref target="specification" />. </t>	defined records in <xref target="specification" format="default"/>
		.
		For example, a "Sitemaps" record <bcp14>MUST NOT</bcp14> terminate
		a
		group. </t>
	</section>	</section>
	</section>	</section>

	<section anchor="access-method" title="Access Method">	<section anchor="access-method" numbered="true" toc="default">
	<t> The rules <bcp14>MUST</bcp14> be accessible in a file named	<name>Access Method</name>
	"/robots.txt" (all lower case) in the top level path of	<t> The rules <bcp14>MUST</bcp14> be accessible in a file named
		"/robots.txt" (all lowercase) in the top-level path of
	the service. The file <bcp14>MUST</bcp14> be UTF-8 encoded (as	the service. The file <bcp14>MUST</bcp14> be UTF-8 encoded (as

	defined in <xref target="RFC3629"/>) and Internet Media Type	defined in <xref target="RFC3629" format="default"/>) and Internet Med
	"text/plain"	ia Type
	(as defined in <xref target="RFC2046"/>). </t>	"text/plain"
	<t> As per <xref target="RFC3986"/>, the URI of the robots.txt is: </t>	(as defined in <xref target="RFC2046" format="default"/>). </t>
	<t> "scheme:[//authority]/robots.txt" </t>	<t> As per <xref target="RFC3986" format="default"/>, the URI of the rob
	<t> For example, in the context of HTTP or FTP, the URI is: </t>	ots.txt file is: </t>
		<t> "scheme:[//authority]/robots.txt" </t>
	<figure>	<t> For example, in the context of HTTP or FTP, the URI is: </t>
	<artwork><![CDATA[	<artwork name="" type="" align="left" alt=""><![CDATA[
	https://www.example.com/robots.txt	https://www.example.com/robots.txt

	ftp://ftp.example.com/robots.txt	ftp://ftp.example.com/robots.txt
	]]></artwork>	]]></artwork>

	</figure>	<section anchor="access-results" numbered="true" toc="default">
		<name>Access Results</name>
	<section anchor="access-results" title="Access Results">	<section anchor="successful-access" numbered="true" toc="default">
	<section anchor="successful-access" title="Successful Access">	<name>Successful Access</name>
	<t> If the crawler successfully downloads the robots.txt, the	<t> If the crawler successfully downloads the robots.txt file, the
	crawler <bcp14>MUST</bcp14> follow the parseable rules. </t>	crawler <bcp14>MUST</bcp14> follow the parseable rules. </t>

	</section>	</section>
	<section anchor="redirects" title="Redirects">	<section anchor="redirects" numbered="true" toc="default">
	<t> It's possible that a server responds to a robots.txt fetch	<name>Redirects</name>
	request with a redirect, such as HTTP 301 and HTTP 302 in	<t> It's possible that a server responds to a robots.txt fetch
		request with a redirect, such as HTTP 301 or HTTP 302 in the
	case of HTTP. The crawlers <bcp14>SHOULD</bcp14> follow at	case of HTTP. The crawlers <bcp14>SHOULD</bcp14> follow at
	least five consecutive redirects, even across authorities	least five consecutive redirects, even across authorities

	(for example, hosts in case of HTTP), as defined in	(for example, hosts in the case of HTTP). </t>
	<xref target="RFC1945"/>. </t>	<t> If a robots.txt file is reached within five consecutive
	<t> If a robots.txt file is reached within five consecutive
	redirects, the robots.txt file <bcp14>MUST</bcp14> be fetched,	redirects, the robots.txt file <bcp14>MUST</bcp14> be fetched,
	parsed, and its rules followed in the context of the initial	parsed, and its rules followed in the context of the initial
	authority. </t>	authority. </t>

	<t> If there are more than five consecutive redirects, crawlers	<t> If there are more than five consecutive redirects, crawlers
	<bcp14>MAY</bcp14> assume that the robots.txt is	<bcp14>MAY</bcp14> assume that the robots.txt file is
	unavailable. </t>	unavailable. </t>

	</section>	</section>
	<section anchor="unavailable-status" title="Unavailable Status">	<section anchor="unavailable-status" numbered="true" toc="default">
	<t> Unavailable means the crawler tries to fetch the robots.txt,	<name>"Unavailable" Status</name>
	and the server responds with unavailable status codes. For	<t> "Unavailable" means the crawler tries to fetch the robots.txt fi
	example, in the context of HTTP, unavailable status codes are	le
		and the server responds with status codes indicating that the reso
		urce in question is unavailable. For
		example, in the context of HTTP, such status codes are
	in the 400-499 range. </t>	in the 400-499 range. </t>

		<t> If a server status code indicates that the robots.txt file is
	<t> If a server status code indicates that the robots.txt file is	unavailable to the crawler, then the crawler <bcp14>MAY</bcp14> ac
	unavailable to the crawler, then the crawler MAY access any	cess any
	resources on the server. </t>	resources on the server. </t>

	</section>	</section>
	<section anchor="unreachable-status" title="Unreachable Status">	<section anchor="unreachable-status" numbered="true" toc="default">
	<t> If the robots.txt is unreachable due to server or network	<name>"Unreachable" Status</name>
	errors, this means the robots.txt is undefined and the crawler	<t> If the robots.txt file is unreachable due to server or network
		errors, this means the robots.txt file is undefined and the crawle
		r
	<bcp14>MUST</bcp14> assume complete disallow. For example, in	<bcp14>MUST</bcp14> assume complete disallow. For example, in

	the context of HTTP, an unreachable robots.txt has a response	the context of HTTP, server errors are identified by status codes
	code in the 500-599 range. </t>	in the 500-599 range. </t>
	<t> If the robots.txt is undefined for a reasonably long period of	<t> If the robots.txt file is undefined for a reasonably long period
	time (for example, 30 days), crawlers <bcp14>MAY</bcp14> assume	of
	the robots.txt is unavailable as defined in	time (for example, 30 days), crawlers <bcp14>MAY</bcp14> assume th
	<xref target="unavailable-status"/> or continue to use a cached	at
		the robots.txt file is unavailable as defined in
		<xref target="unavailable-status" format="default"/> or continue t
		o use a cached
	copy. </t>	copy. </t>

	</section>	</section>
	<section anchor="parsing-errors" title="Parsing Errors">	<section anchor="parsing-errors" numbered="true" toc="default">
	<t> Crawlers <bcp14>MUST</bcp14> try to parse each line of the	<name>Parsing Errors</name>
		<t> Crawlers <bcp14>MUST</bcp14> try to parse each line of the
	robots.txt file. Crawlers <bcp14>MUST</bcp14> use the parseable	robots.txt file. Crawlers <bcp14>MUST</bcp14> use the parseable
	rules. </t>	rules. </t>

		</section>
	</section>	</section>
	</section>	</section>

	</section>	<section anchor="caching" numbered="true" toc="default">
	<section anchor="caching" title="Caching">	<name>Caching</name>
	<t> Crawlers <bcp14>MAY</bcp14> cache the fetched robots.txt file's	<t> Crawlers <bcp14>MAY</bcp14> cache the fetched robots.txt file's
	contents. Crawlers <bcp14>MAY</bcp14> use standard cache control as	contents. Crawlers <bcp14>MAY</bcp14> use standard cache control as

	defined in <xref target="RFC9111"/>. Crawlers	defined in <xref target="RFC9111" format="default"/>. Crawlers
	<bcp14>SHOULD NOT</bcp14> use the cached version for more than 24	<bcp14>SHOULD NOT</bcp14> use the cached version for more than 24

	hours, unless the robots.txt is unreachable. </t>	hours, unless the robots.txt file is unreachable. </t>
	</section>	</section>

	<section anchor="limits" title="Limits">	<section anchor="limits" numbered="true" toc="default">
	<t> Crawlers SHOULD impose a parsing limit to protect their systems;	<name>Limits</name>
	see <xref target="security"/>. The parsing limit MUST be at least	<t> Crawlers <bcp14>SHOULD</bcp14> impose a parsing limit to protect the
	500 kibibytes <xref target="KiB"/>. </t>	ir systems;
		see <xref target="security" format="default"/>. The parsing limit <bcp
		14>MUST</bcp14> be at least
		500 kibibytes <xref target="KiB" format="default"/>. </t>
	</section>	</section>
	</section>	</section>

	<section anchor="security" title="Security Considerations">	<section anchor="security" numbered="true" toc="default">
	<t> The Robots Exclusion Protocol is not a substitute for more valid	<name>Security Considerations</name>
		<t> The Robots Exclusion Protocol is not a substitute for valid
	content security measures. Listing paths in the robots.txt file	content security measures. Listing paths in the robots.txt file
	exposes them publicly and thus makes the paths discoverable. To	exposes them publicly and thus makes the paths discoverable. To
	control access to the URI paths in a robots.txt file, users of	control access to the URI paths in a robots.txt file, users of
	the protocol should employ a valid security measure relevant to	the protocol should employ a valid security measure relevant to

	the application layer on which the robots.txt file is served.	the application layer on which the robots.txt file is served --
	For example, in case of HTTP, HTTP Authentication defined in	for example, in the case of HTTP, HTTP Authentication as defined in
	<xref target="RFC9110"/>. </t>	<xref target="RFC9110" format="default"/>. </t>
	<t> To protect against attacks against their system, implementors	<t> To protect against attacks against their system, implementors
	of robots.txt parsing and matching logic should take the	of robots.txt parsing and matching logic should take the
	following considerations into account: </t>	following considerations into account: </t>

	<t>	<dl spacing="normal">
	<list style="symbols">	<dt> Memory management:</dt><dd> <xref target="limits" format="default"/
	<t> Memory management: <xref target="limits" /> defines the lower	> defines the lower
	limit of bytes that must be processed, which inherently also	limit of bytes that must be processed, which inherently also

	protects the parser from out of memory scenarios. </t>	protects the parser from out-of-memory scenarios. </dd>
	<t> Invalid characters: <xref target="formal-syntax" /> defines	<dt> Invalid characters:</dt><dd> <xref target="formal-syntax" format="d
		efault"/> defines
	a set of characters that parsers and matchers can expect in	a set of characters that parsers and matchers can expect in

	robots.txt files. Out of bound characters should be rejected	robots.txt files. Out-of-bound characters should be rejected
	as invalid, which limits the available attack vectors that	as invalid, which limits the available attack vectors that

	attempt to compromise the system. </t>	attempt to compromise the system. </dd>
	<t> Untrusted content: Implementors should treat the content of	<dt> Untrusted content:</dt><dd> Implementors should treat the content o
		f
	a robots.txt file as untrusted content, as defined by the	a robots.txt file as untrusted content, as defined by the
	specification of the application layer used. For example,	specification of the application layer used. For example,
	in the context of HTTP, implementors should follow the	in the context of HTTP, implementors should follow the

	security considerations section of	Security Considerations section of
	<xref target="RFC9110"/>. </t>	<xref target="RFC9110" format="default"/>. </dd>
	</list>	</dl>
	</t>

	</section>	</section>

	<section anchor="IANA" title="IANA Considerations">	<section anchor="IANA" numbered="true" toc="default">
	<t> This document has no actions for IANA. </t>	<name>IANA Considerations</name>
		<t> This document has no IANA actions. </t>
	</section>	</section>

	<section anchor="examples" title="Examples">	<section anchor="examples" numbered="true" toc="default">
	<section anchor="simple-example" title="Simple Example">	<name>Examples</name>
		<section anchor="simple-example" numbered="true" toc="default">
		<name>Simple Example</name>
	<t> The following example shows: </t>	<t> The following example shows: </t>

	<t>	<dl spacing="normal">
	<list style="symbols">	<dt> *:</dt><dd> A group that's relevant to all user agents that
	<t> *: A group that's relevant to all user-agents that
	don't have an explicitly defined matching group. It allows	don't have an explicitly defined matching group. It allows

	access to the URLs with the /publications/ path prefix, and	access to the URLs with the /publications/ path prefix, and it
	restricts access to the URLs with the /example/ path prefix	restricts access to the URLs with the /example/ path prefix

	and to all URLs with .gif suffix. The * character designates	and to all URLs with a .gif suffix. The "*" character designates
	any character, including the otherwise required forward	any character, including the otherwise-required forward
	slash; see <xref target="formal-syntax" />. </t>	slash; see <xref target="formal-syntax" format="default"/>. </dd
	<t> foobot: A regular case. A single user-agent followed	>
		<dt> foobot:</dt><dd> A regular case. A single user agent followed
	by rules. The crawler only has access to two URL path	by rules. The crawler only has access to two URL path

	prefixes on the site, /example/page.html and	prefixes on the site -- /example/page.html and
	/example/allowed.gif. The rules of the group are missing	/example/allowed.gif. The rules of the group are missing

	the optional whitespace character, which is acceptable as	the optional space character, which is acceptable as
	defined in <xref target="formal-syntax" />. </t>	defined in <xref target="formal-syntax" format="default"/>. </dd
	<t> barbot and bazbot: A group that's relevant for more	>
	than one user-agent. The crawlers are not allowed to access	<dt> barbot and bazbot:</dt><dd> A group that's relevant for more
	the URLs with the /example/page.html path prefix, but	than one user agent. The crawlers are not allowed to access
		the URLs with the /example/page.html path prefix but
	otherwise have unrestricted access to the rest of the URLs	otherwise have unrestricted access to the rest of the URLs

	on the site. </t>	on the site. </dd>
	<t> quxbot: An empty group at end of the file. The crawler has	<dt> quxbot:</dt><dd> An empty group at the end of the file. The crawl
	unrestricted access to the URLs on the site. </t>	er has
	</list>	unrestricted access to the URLs on the site. </dd>
	</t>	</dl>
	<figure>	<artwork name="" type="" align="left" alt=""><![CDATA[
	<artwork><![CDATA[	User-Agent: *
	User-agent: *
	Disallow: *.gif$	Disallow: *.gif$
	Disallow: /example/	Disallow: /example/
	Allow: /publications/	Allow: /publications/

	User-Agent: foobot	User-Agent: foobot
	Disallow:/	Disallow:/
	Allow:/example/page.html	Allow:/example/page.html
	Allow:/example/allowed.gif	Allow:/example/allowed.gif

	User-Agent: barbot	User-Agent: barbot
	User-Agent: bazbot	User-Agent: bazbot
	Disallow: /example/page.html	Disallow: /example/page.html

	User-Agent: quxbot	User-Agent: quxbot

	EOF	EOF
	]]></artwork>	]]></artwork>

	</figure>
	</section>	</section>

	<section anchor="longest-match" title="Longest Match">	<section anchor="longest-match" numbered="true" toc="default">
		<name>Longest Match</name>
	<t> The following example shows that in the case of two rules, the	<t> The following example shows that in the case of two rules, the
	longest one is used for matching. In the following case,	longest one is used for matching. In the following case,
	/example/page/disallowed.gif <bcp14>MUST</bcp14> be used for	/example/page/disallowed.gif <bcp14>MUST</bcp14> be used for
	the URI example.com/example/page/disallow.gif. </t>	the URI example.com/example/page/disallow.gif. </t>

	<figure>	<artwork name="" type="" align="left" alt=""><![CDATA[
	<artwork><![CDATA[
	User-Agent: foobot	User-Agent: foobot
	Allow: /example/page/	Allow: /example/page/
	Disallow: /example/page/disallowed.gif	Disallow: /example/page/disallowed.gif
	]]></artwork>	]]></artwork>

	</figure>
	</section>	</section>
	</section>	</section>
	</middle>	</middle>


	<back>	<back>

	<references title='Normative References'>	<references>
	&RFC1945;	<name>References</name>
	&RFC2046;	<references>
	&RFC2119;	<name>Normative References</name>
	&RFC3629;	<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2
	&RFC3986;	046.xml"/>
	&RFC5234;	<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2
	&RFC8174;	119.xml"/>
	&RFC8288;	<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3
	&RFC9110;	629.xml"/>
	&RFC9111;	<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.3
	</references>	986.xml"/>
		<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5
		234.xml"/>
		<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8
		174.xml"/>
		<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8
		288.xml"/>
		<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9
		110.xml"/>
		<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9
		111.xml"/>
		</references>
		<references>
		<name>Informative References</name>


	<references title='Informative References'>	<reference anchor="ROBOTSTXT" target="https://www.robotstxt.org/">
	<reference anchor="ROBOTSTXT" target="http://www.robotstxt.org/">	<front>
	<front>	<title>The Web Robots Pages (including /robots.txt)</title>
	<title>Robots Exclusion Protocol</title>	<author>
	<author>	<organization/>
	<organization></organization>	</author>
	</author>	<date>2007</date>
	<date year="n.d."/>	</front>
	</front>	</reference>
	</reference>	<reference anchor="SITEMAPS" target="https://www.sitemaps.org/index.html
	<reference anchor="SITEMAPS" target="https://www.sitemaps.org/index.html">	">
	<front>	<front>
	<title>Sitemaps Protocol</title>	<title>What are Sitemaps? (Sitemap protocol)</title>
	<author>	<author>
	<organization></organization>	<organization/>
	</author>	</author>
	<date year="n.d."/>	<date>April 2020</date>
	</front>	</front>
	</reference>	</reference>
	<reference anchor="KiB" target="https://simple.wikipedia.org/wiki/Kibibyte	<reference anchor="KiB" target="https://simple.wikipedia.org/wiki/Kibiby
	">	te">
	<front>	<front>
	<title>Kibibyte - Simple English Wikipedia, the free encyclopedia</tit	<title>Kibibyte</title>
	le>	<author>
	<author>	<organization/>
	<organization></organization>	</author>
	</author>	<date day="17" month="September" year="2020"/>
	<date year="n.d."/>	</front>
	</front>	<refcontent>Simple English Wikipedia, the free encyclopedia</refconten
	</reference>	t>
		</reference>
		</references>
	</references>	</references>
	</back>	</back>
	</rfc>	</rfc>

End of changes. 104 change blocks.
	425 lines changed or deleted	438 lines changed or added
This html diff was produced by rfcdiff 1.48.