source: draft-ietf-iri-3987bis/draft-ietf-iri-bidi-guidelines.xml @ 168

Last change on this file since 168 was 168, checked in by duerst@…, 7 years ago

replaced entity reference to RFC 3490 with internationalized inline version

File size: 32.4 KB
Line 
1<?xml version="1.0"?>
2<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
3<!ENTITY rfc2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
4<!ENTITY DRAFT "draft-ietf-iri-bidi-guidelines-03">
5<!ENTITY YEAR "2012">
6]>
7<?rfc strict='yes'?>
8
9<?xml-stylesheet type='text/css' href='rfc2629.css' ?>
10<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
11<?rfc symrefs='yes'?>
12<?rfc sortrefs='yes'?>
13<?rfc iprnotified="no" ?>
14<?rfc toc='yes'?>
15<?rfc compact='yes'?>
16<?rfc subcompact='no'?>
17<rfc ipr="pre5378Trust200902" docName="&DRAFT;"
18  category="bcp" xml:lang="en">
19  <front>
20    <title abbrev="Bidi IRI Guidelines">Guidelines for Internationalized
21      Resource Identifiers with Bi-directional Characters (Bidi IRIs)</title>
22    <author initials="M.J." isurname="Dürst" surname="Duerst" ifullname="Martin J. Dürst"
23      fullname="Martin J. Duerst (Note: Please write &quot;Duerst&quot; with u-umlaut wherever possible, for example as &quot;D&amp;#252;rst&quot; in XML and HTML.)">
24      <organization>Aoyama Gakuin University<ionly> (青山学院大学)</ionly> </organization>
25      <address>
26        <postal>
27          <street>5-10-1 Fuchinobe</street>
28          <street>Chuo-ku</street>
29          <city>Sagamihara</city>
30          <region>Kanagawa</region>
31          <code>252-5258</code>
32          <country>Japan</country>
33        </postal>
34        <phone>+81 42 759 6329</phone>
35        <facsimile>+81 42 759 6495</facsimile>
36        <email>duerst@it.aoyama.ac.jp</email>
37        <uri><aonly>http://www.sw.it.aoyama.ac.jp/D%C3%BCrst/ (Note: This is the percent-encoded form of an IRI)</aonly><ionly>http://www.sw.it.aoyama.ac.jp/Dürst/</ionly></uri>
38      </address>
39    </author>
40    <author initials="L." surname="Masinter" fullname="Larry Masinter">
41      <organization>Adobe</organization>
42      <address>
43        <postal>
44          <street>345 Park Ave</street>
45          <city>San Jose</city>
46          <region>CA</region>
47          <code>95110</code>
48          <country>U.S.A.</country>
49        </postal>
50        <phone>+1-408-536-3024</phone>
51        <email>masinter@adobe.com</email>
52        <uri>http://larry.masinter.net</uri>
53      </address>
54    </author>
55    <author initials="A." isurname="Allawi (عادل علاوي)" surname="Allawi"
56      ifullname="Adil Allawi (عادل علاوي)" fullname="Adil Allawi">
57    <organization>Diwan Software Limited</organization>
58      <address>
59        <postal>
60          <street>37-39 Peckham Road</street>
61          <city>London</city>
62          <code>SE5 8UH</code>
63          <country>United Kingdom</country>
64        </postal>
65        <phone>+44 7718 785850</phone>
66        <facsimile>+44 20 72525444</facsimile>
67        <email>adil@diwan.com</email>
68        <uri>http://ironymark.diwan.com/</uri>
69      </address>
70    </author>
71    <date year="&YEAR;" month="October" />
72    <area>Applications</area>
73    <workgroup>Internationalized Resource Identifiers (iri)</workgroup>
74    <keyword>IRI</keyword>
75    <keyword>Internationalized Resource Identifier</keyword>
76    <keyword>BIDI</keyword>
77    <keyword>URI</keyword>
78    <keyword>URL</keyword>
79    <keyword>IDN</keyword>
80    <abstract>
81      <t>This specification gives guidelines for selection, use, and
82        presentation of International Resource Identifiers (IRIs) which include
83        characters with inherent right-to-left (rtl) writing direction. </t>
84    </abstract>
85  </front>
86  <middle>
87    <section title="Introduction">
88      <section title='Overview'>
89      <t>Some UCS characters, such as those used in the Arabic and Hebrew
90        scripts, have an inherent right-to-left (rtl) writing direction as
91        opposed to characters, such as those in the Latin script, that have an
92        inherent left-to-right (ltr) direction. IRIs containing rtl characters
93        (called bidirectional IRIs or Bidi IRIs) require additional attention
94        because of the non-trivial relation between their logical and visual
95        ordering. The logical order represents the order in which characters are
96        stored on computers and read by people. The visual order is the order in
97        which the characters appear (or are expected to appear) on a computer
98        display or printout.</t>
99      <t>Generally, alphabetic characters in scripts like Arabic and Hebrew are
100        drawn rtl while numbers are drawn ltr. Symbols such as slash ('/') and
101        period ('.') take their visual direction from the surrounding characters.
102        A list of all ASCII symbols with their bidirectional character type
103        and their function in URIs and IRIs is given in <xref target="ASCIISymbols"/>.</t>
104      <t>Because of this complex interaction between the logical representation,
105        the visual representation, and the syntax of a Bidi IRI, a balance is
106        needed between various requirements. The main requirements are: <list
107        style="hanging">
108        <t hangText="1.">user-predictable conversion between visual and logical
109          representation;</t>
110        <t hangText="2.">the ability to include a wide range of characters in
111          various parts of the IRI; and</t>
112        <t hangText="3.">minor or no changes or restrictions for
113          implementations.</t>
114        </list></t>
115        </section>
116      <section title='Availability'>
117        <t>This document is available in (line-printer ready) plaintext ASCII and in PDF.
118          It is also available in HTML from
119          <vspace/><eref target="http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.html"
120            >http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.html</eref>,
121          and in UTF-8 plaintext from
122          <vspace/><eref target="http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.utf8.txt"
123            >http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.utf8.txt</eref>.
124          While all these versions are identical in their technical content,
125          the HTML, PDF, and UTF-8 plaintext versions show non-Unicode characters directly.
126          This often makes it easier to understand examples, and readers are therefore strongly advised
127          to consult one of these versions in preference to or as a supplement to the ASCII version.</t>
128        <t><ionly>This version of this document contains bidirectional examples.
129          In order to correctly understand the examples, it is important to view this document
130          with a viewer that correctly implements the Unicode Bidirectional Algorithm <xref target="UNI9"/>.
131           Many text viewers and text editors, and all major browsers, currently implement
132           the Unicode Bidirectional Algorithm.
133           Also, all users who are reading RTL text on a regular basis have viewers
134           that implement this algorithm, because otherwise, they would be unable
135           to read even the simplest texts.
136           In order to check whether a viewer implements the Unicode Bidirectional Algorithm,
137            please observe the following three lines:
138           <vspace/>FEDCBA ,EDCBA ,DCBA, CBA, BA, A
139          <vspace/><span dir='ltr'>ب, بت, بتث, بتثج, بتثجح, بتثجحخ</span>
140          <vspace/><span dir='ltr'>א, אב, אבג, אבגד, אבגדה, אבגדהו</span>
141           <vspace/>The first line contains upper-case Latin letters,
142           the second line contains Arabic letters,
143           and the third line contains Hebrew letters.
144           Your viewer will be okay if in all three lines, the shortest word (one character)
145           is on the right, and the longest word (six characters) on the left,
146           the words are getting longer and longer from right to left,
147           and the commas are between the words, but on the right of the spaces.
148           Otherwise, please use another viewer.
149           In the second line, the characters in each word should all be connected,
150           and change shape slighly on context. In the first and third line,
151           no characters should be connected.</ionly></t>
152      </section>
153      <section title="Notation">
154        <t>In this document, "Bidi Notation", abbreviated "BN" is used for the given Bidi IRI
155          examples as follows: Lower case letters a-z stand for characters that
156          are written with a left to right ordering (such as Latin characters),
157          whereas upper case letters A-Z represent characters that are written
158          right to left (such as Arabic or Hebrew characters). Numbers and
159          symbols are the same.</t>
160        <t> In this document, the key words "MUST", "MUST NOT", "REQUIRED",
161          "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY",
162          and "OPTIONAL" are to be interpreted as described in <xref
163            target="RFC2119"/>.</t>
164      </section>
165      <!-- Notation -->
166    </section>
167    <!-- Introduction -->
168    <section title="Logical Storage and Visual Presentation" anchor="visual">
169      <t>When stored or transmitted in digital representation, Bidi IRIs MUST be
170        in full logical order and MUST conform to the IRI syntax rules (which
171        includes the rules relevant to their scheme). This ensures that
172        Bidi IRIs can be processed in the same way as other IRIs.</t>
173      <t>Bidi IRIs MUST be visually ordered by the Unicode Bidirectional
174        Algorithm <xref target="UNIV6"/>, <xref target="UNI9"/>. Bidi IRIs MUST
175        be rendered in the same way as they would be if they were in a
176        left-to-right embedding. </t>
177      <t>In conformance with the Unicode Bidirectional Algorithm, embedding MAY
178        be done in one of two ways: <list style="hanging">
179        <t hangText="1.">precede the IRI with U+202A, LEFT-TO-RIGHT EMBEDDING
180          (LRE), and follow with U+202C, POP DIRECTIONAL FORMATTING (PDF);
181          or</t>
182        <t hangText="2.">use a higher-level protocol (e.g., the dir='ltr'
183          attribute in HTML).</t>
184        </list></t>
185      <t>Preceding and following the Bidi IRI with U+200E, LEFT-TO-RIGHT MARK
186        (LRM) is NOT RECOMMENDED as, there are cases where this may not be
187        sufficient to match full left to right embedding.</t>
188      <t>There is no requirement to use embedding if the display is still the
189        same without the embedding. For example, a Bidi IRI in a text
190        with left-to-right base directionality (such as used for English or
191        Cyrillic) that is preceded and followed by whitespace and strong
192        left-to-right characters does not need an embedding. Also, a
193        bidirectional relative IRI reference that only contains strong
194        right-to-left characters and weak characters (such as symbols) and that
195        starts and ends with a strong right-to-left character and appears in a
196        text with right-to-left base directionality (such as used for Arabic or
197        Hebrew) and is preceded and followed by whitespace and strong characters
198        does not need an embedding.</t>
199      <t>However, implementers are RECOMMENDED to use embedding in all cases
200        where they are not completely sure that the display behavior is
201        unaffected without the embedding.</t>
202      <t>The Unicode Bidirectional Algorithm (<xref target="UNI9"/>, section
203        4.3) permits higher-level protocols to influence bidirectional
204        rendering. Such changes by higher-level protocols MUST NOT be used if
205        they change the rendering of IRIs.</t>
206      <t>The bidirectional formatting characters that may be used before or
207        after the IRI to ensure correct display are not themselves part of the
208        IRI. IRIs MUST NOT contain bidirectional formatting characters (LRM,
209        RLM, LRE, RLE, LRO, RLO, and PDF). They affect the visual rendering of
210        the IRI but do not appear themselves. It would therefore not be possible
211        to input an IRI with such characters correctly.</t>
212    </section>
213    <!-- visual -->
214    <section title="Bidi IRI Structure" anchor="bidi-structure">
215      <t>The Unicode Bidirectional Algorithm is designed for general purpose
216        text. To make sure that it does not affect the rendering of Bidi IRIs
217        outside of the requirements of this document, some restrictions on Bidi
218        IRIs are necessary. These restrictions are given in terms of delimiters
219        (structural characters, mostly punctuation such as "@", ".", ":", and
220        "/") and components (usually consisting mostly of letters and
221        digits).</t>
222      <t>The following syntax rules from the ABNF of <xref target="RFC3987bis"/>
223        correspond to components for the purpose of Bidi behavior: iuserinfo,
224        ireg-name, isegment, isegment-nz, isegment-nz-nc, ireg-name, iquery, and
225        ifragment.</t>
226      <t>Specifications that define the syntax of any of the above components
227        MAY divide them further and define smaller parts to be components
228        according to this document. As an example, the restrictions of <xref
229          target="RFC3490"/> on bidirectional domain names correspond to treating
230        each label of a domain name as a component for schemes with ireg-name as
231        a domain name. Even where the components are not defined formally, it
232        may be helpful to think about some syntax in terms of components and to
233        apply the relevant restrictions. For example, for the usual name/value
234        syntax in query parts, it is convenient to treat each name and each
235        value as a component. As another example, the extensions in a resource
236        name can be treated as separate components.</t>
237      <t>For each component, the following restrictions apply:</t>
238      <t> <list style="hanging">
239        <t hangText="1.">A component SHOULD NOT use both right-to-left and
240          left-to-right characters.</t>
241        <t hangText="2.">A component using right-to-left characters SHOULD start
242          with a right-to-left character, and end with a right-to-left character
243          potentially followed by one or more nonspacing mark (bidi class NSM).</t>
244      </list></t>
245      <t>The above restrictions are given as "SHOULD"s, rather than as "MUST"s.
246        For IRIs that are never presented visually, they are not relevant.
247        However, for IRIs in general, they are very important to ensure
248        consistent conversion between visual presentation and logical
249        representation, in both directions.</t>
250      <t><list style="hanging">
251        <t hangText="Note:">In some components, the above restrictions may
252          actually be strictly enforced. For example, <xref target="RFC3490"/>
253          requires that these restrictions apply to the labels of a host name
254          for those schemes where ireg-name is a host name. In some other
255          components (for example, path components) following these restrictions
256          may not be too difficult. For other components, such as parts of the
257          query part, it may be very difficult to enforce the restrictions
258          because the values of query parameters may be arbitrary character
259          sequences.</t>
260      </list></t>
261      <t>If the above restrictions cannot be satisfied otherwise, the affected
262        component can always be mapped to URI notation using the general
263        percent-encoding of IRI components, as described in <xref
264          target="RFC3987bis"/>. Please note that the whole component has to be
265        mapped (see also Example 9 below).</t>
266    </section>
267    <!-- bidi-structure -->
268    <section title="Input of Bidi IRIs" anchor="bidiInput">
269      <t>Bidi input methods MUST generate Bidi IRIs in logical order while
270        rendering them according to <xref target="visual"/>. During input,
271        rendering SHOULD be updated after every new character is input to avoid
272        end-user confusion.</t>
273    </section>
274    <!-- bidiInput -->
275    <section title="Examples">
276      <t>This section gives examples of Bidi IRIs in Bidi Notation. It shows
277        legal IRIs with the relationship between their logical and visual
278        representation and explains how certain phenomena in this relationship
279        may look strange to somebody not familiar with bidirectional behavior,
280        but familiar to users of Arabic and Hebrew. It also shows what happens
281        if the restrictions given in <xref target="bidi-structure"/> are not
282        followed. <aonly>Please see <eref target="Availability"/> for versions
283        of the examples in Arabic and Hebrew script.</aonly></t>
284      <t>To read the bidi text in the examples, read the visual representation
285        from left to right until you encounter a block of rtl text. Read the rtl
286        block (including slashes and other special characters) from right to
287        left, then continue at the next unread ltr character.</t>
288      <t>Please note that "BN" stands for "Bidi Notation", see <eref target="Notation" />.
289        AR stands for Arabic, HE for Hebrew.</t>
290
291      <t>Example 1: A single component with rtl characters is inverted:
292
293        <vspace/>Logical representation (BN): "http://ab.CDEFGH.ij/kl/mn/op.html"
294        <vspace/>Visual representation (BN): "http://ab.HGFEDC.ij/kl/mn/op.html"
295        <ionly>
296        <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثجحخد.ij/kl/mn/op.html</span>"
297        <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדהוזח.ij/kl/mn/op.html</span>"
298        </ionly>
299        <vspace/>Components can be read one
300        by one, and each component can be read in its natural direction.</t>
301
302      <t>Example 2: More than one consecutive component with rtl characters is
303        inverted as a whole:
304
305        <vspace/>Logical representation (BN): "http://ab.CDE.FGH/ij/kl/mn/op.html"
306        <vspace/>Visual representation (BN): "http://ab.HGF.EDC/ij/kl/mn/op.html"
307        <ionly>
308          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثج.حخد/ij/kl/mn/op.html</span>"
309          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדה.וזח/ij/kl/mn/op.html</span>"
310        </ionly>
311       
312        <vspace/> A sequence of rtl
313        components is read rtl, in the same way as a sequence of rtl words is
314        read rtl in a bidi text.</t>
315
316      <t>Example 3: All components of an IRI (except for the scheme) are rtl.
317        All rtl components are inverted overall:
318
319        <vspace/>Logical representation (BN): "http://AB.CD.EF/GH/IJ/KL?MN=OP;QR=ST#UV"
320        <vspace/>Visual representation (BN): "http://VU#TS=RQ;PO=NM?LK/JI/HG/FE.DC.BA"
321        <ionly>
322          <vspace/>Visual representation (AR): "<span dir='ltr'>http://اب.تث.جح/خد/ذر/زس?شص=ضط;ظع=غف#قك</span>"
323          <vspace/>Visual representation (HE): "<span dir='ltr'>http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת</span>"
324        </ionly>
325       
326        <vspace/> The
327        whole IRI (except the scheme) is read rtl. Delimiters between rtl
328        components stay between the respective components; delimiters between
329        ltr and rtl components don't move.</t>
330
331      <t>Example 4: Each of several sequences of rtl components is inverted on
332        its own:
333
334        <vspace/>Logical representation (BN): "http://AB.CD.ef/gh/IJ/KL.html"
335        <vspace/>Visual representation (BN): "http://DC.BA.ef/gh/LK/JI.html"
336        <ionly>
337          <vspace/>Visual representation (AR): "<span dir='ltr'>http://اب.تث.ef/gh/ذر/زس.html</span>"
338          <vspace/>Visual representation (HE): "<span dir='ltr'>http://אב.גד.ef/gh/טי/כל.html</span>"
339        </ionly>
340       
341        <vspace/> Each sequence of rtl components
342        is read rtl, in the same way as each sequence of rtl words in an ltr
343        text is read rtl.</t>
344
345      <t>Example 5: Example 2, applied to components of different kinds:
346
347        <vspace/>Logical representation (BN): "http://ab.cd.EF/GH/ij/kl.html"
348        <vspace/>Visual representation (BN): "http://ab.cd.HG/FE/ij/kl.html"
349        <ionly>
350          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.cd.جح/خد/ij/kl.html</span>"
351          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.cd.הו/זח/ij/kl.html</span>"
352        </ionly>
353       
354        <vspace/>
355        The inversion of the domain name label and the path component may be
356        unexpected, but it is consistent with other bidi behavior. For
357        reassurance that the domain component really is "ab.cd.EF", it may be
358        helpful to read aloud the visual representation following the Unicode
359        Bidirectional Algorithm. After "http://ab.cd." one reads the RTL block
360        "E-F-slash-G-H", which corresponds to the logical representation. </t>
361
362      <t>Example 6: Same as Example 5, with more rtl components:
363       
364        <vspace/>Logical representation (BN): "http://ab.CD.EF/GH/IJ/kl.html"
365        <vspace/>Visual representation (BN): "http://ab.JI/HG/FE.DC/kl.html"
366        <ionly>
367          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تث.جح/خد/ذر/kl.html</span>"
368          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גד.הו/זח/טי/kl.html</span>"
369        </ionly>
370       
371        <vspace/> The inversion of the domain
372        name labels and the path components may be easier to identify because
373        the delimiters also move.</t>
374     
375      <t>Example 7: A single rtl component includes digits:
376
377        <vspace/>Logical representation (BN): "http://ab.CDE123FGH.ij/kl/mn/op.html"
378        <vspace/>Visual representation (BN): "http://ab.HGF123EDC.ij/kl/mn/op.html"
379        <ionly>
380          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثج123حخد.ij/kl/mn/op.html</span>"
381          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדה123וזח.ij/kl/mn/op.html</span>"
382        </ionly>
383       
384        <vspace/> Numbers
385        are written ltr in all cases but are treated as an additional embedding
386        inside a run of rtl characters. This is completely consistent with usual
387        bidirectional text.</t>
388
389      <t>Example 8 (not allowed): Numbers are at the start or end of an rtl
390        component:
391       
392        <vspace/>Logical representation (BN): "http://ab.cd.ef/GH1/2IJ/KL.html"
393        <vspace/>Visual representation (BN): "http://ab.cd.ef/LK/JI1/2HG.html"
394        <ionly>
395          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.cd.ef/خد1/2ذر/زس.html</span>"
396          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.cd.ef/זח1/2טי/כל.html</span>"
397        </ionly>
398       
399        <vspace/> The sequence "1/2" is
400        interpreted by the Bidirectional Algorithm as a fraction, fragmenting the
401        components and leading to confusion. There are other characters that are
402        interpreted in a special way close to numbers; in particular, "+", "-",
403        "#", "$", "%", ",", ".", and ":".</t>
404
405      <t>Example 9 (not allowed): The numbers in the previous example are
406        percent-encoded:
407       
408        <vspace/>Logical representation (BN): "http://ab.cd.ef/GH%31/%32IJ/KL.html"
409        <vspace/>Visual representation (BN): "http://ab.cd.ef/LK/JI%32/%31HG.html"
410        <ionly>
411          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.cd.ef/خد%31/%32ذر/زس.html</span>"
412          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.cd.ef/זח%31/%32טי/כל.html</span>"
413        </ionly>
414       
415      </t>
416     
417      <t>Example 10 (allowed but not recommended):
418       
419        <vspace/>Logical representation (BN): "http://ab.CDEFGH.123/kl/mn/op.html"
420        <vspace/>Visual representation (BN): "http://ab.123.HGFEDC/kl/mn/op.html"
421        <ionly>
422          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثجحخد.123/kl/mn/op.html</span>"
423          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדהוזח.123/kl/mn/op.html</span>"
424        </ionly>
425       
426        <vspace/> Components
427        consisting of only numbers are allowed (it would be rather difficult to
428        prohibit them), but these may interact with adjacent RTL components in
429        ways that are not easy to predict.</t>
430
431      <t>Example 11 (allowed but not recommended):
432       
433        <vspace/>Logical representation (BN): "http://ab.CDEFGH.123ij/kl/mn/op.html"
434        <vspace/>Visual representation (BN): "http://ab.123.HGFEDCij/kl/mn/op.html"
435        <ionly>
436          <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثجحخد.123ij/kl/mn/op.html</span>"
437          <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדהוזח.123ij/kl/mn/op.html</span>"
438        </ionly>
439       
440        <vspace/>
441        Components consisting of numbers and left-to-right characters are
442        allowed, but these may interact with adjacent RTL components in ways
443        that are not easy to predict.</t>
444    </section>
445    <!-- examples -->
446    <section title="IANA Considerations" anchor="iana">
447      <t>This document makes no changes to IANA registries.</t>
448    </section>
449    <!-- IANA -->
450    <section title="Security Considerations" anchor="security">
451      <t>Confusion can occur with bidirectional IRIs, if the restrictions in
452        <xref target="bidi-structure"/> are not followed. The same visual
453        representation may be interpreted as different logical representations,
454        and vice versa. It is also very important that a correct Unicode
455        bidirectional implementation be used.</t>
456    </section>
457    <!-- security -->
458    <section title="Acknowledgements">
459      <t>This document was derived from <xref target="RFC3987"/> and <xref
460        target="RFC3987bis"/> and the acknowledgments of those documents
461        apply. Shunsuke Oshima (大嶋 俊介) provided the data for <xref  target="ASCIISymbols"/>.</t>
462    </section>
463    <!-- acknowledgements -->
464    <section title="Main Changes Since RFC 3987">
465      <t>This section describes the main changes since <xref target="RFC3987"></xref>.</t>       
466       
467      <t><list style="symbols">
468        <t>Separated out the section on bidi in <xref target="RFC3987"/> to this document.</t>
469        <t>Added examples in Arabic and Hebrew, which can be seen in html/pdf/utf8.txt versions.</t>
470        <t>Allowed NSMs at the end of components, for Dhivehi, Yiddish,...</t>
471        <t>TODO: check for major changes between RFC3987 and draft -02.</t>
472      </list>
473      </t>
474        <t>Note to RFC Editor: Please remove this paragraph before publication.
475          Detailled change logs are available in the IETF tools subversion repository at
476          http://trac.tools.ietf.org/wg/iri/trac/log/draft-ietf-iri-3987bis/draft-ietf-iri-bidi-guidelines.xml.</t>
477     </section>
478  </middle>
479  <back>
480    <references title="Normative References">
481      <reference anchor="RFC3987bis"
482        target="http://tools.ietf.org/id/draft-ietf-iri-3987bis">
483        <front>
484          <title>Internationalized Resource Identifiers (IRIs)</title>
485          <author initials="M.J." isurname="Dürst" surname="Duerst" ifullname="Martin J. Dürst" fullname="Martin J. Duerst"/>
486          <author initials="L." surname="Masinter" fullname="Larry Masinter"/>
487          <author initials="M." surname="Suignard"/>
488          <date year="2012" month="October"/>
489        </front>
490      </reference>
491      &rfc2119;
492      <reference anchor="RFC3490">
493        <front>
494          <title>
495            Internationalizing Domain Names in Applications (IDNA)
496          </title>
497          <author initials="P." surname="Faltstrom" isurname="Fältström"
498                  fullname="P. Faltstrom" ifullname="P. Fältström"/>
499          <author initials="P." surname="Hoffman" fullname="P. Hoffman"/>
500          <author initials="A." surname="Costello" fullname="A. Costello"/>
501          <date year="2003" month="March"/>
502        </front>
503        <seriesInfo name="RFC" value="3490"/>
504        <format type="TXT" octets="51943" target="http://www.rfc-editor.org/rfc/rfc3490.txt"/>
505      </reference>      <reference anchor="UNIV6">
506        <front>
507          <title>The Unicode Standard, Version 6.2.0 (Mountain View, CA, The
508            Unicode Consortium, 2012, ISBN 978-1-936213-07-8)</title>
509          <author>
510            <organization>The Unicode Consortium</organization>
511          </author>
512          <date year="2012" month="October"/>
513        </front>
514      </reference>
515      <reference anchor="UNI9"
516        target="http://www.unicode.org/reports/tr9/tr9-27.html">
517        <front>
518          <title>The Unicode Bidirectional Algorithm</title>
519          <author initials="M." surname="Davis" fullname="Mark Davis">
520            <organization/>
521          </author>
522          <date year="2012" month="September"/>
523        </front>
524        <seriesInfo name="Unicode Standard Annex" value="#9"/>
525      </reference>
526    </references>
527
528    <references title="Informative References">
529      <reference anchor="RFC3987">
530        <front>
531          <title>Internationalized Resource Identifiers (IRIs)</title>
532          <author  initials="M.J." isurname="Dürst" surname="Duerst" ifullname="Martin J. Dürst" fullname="Martin J. Duerst"/>
533          <author initials="M." surname="Suignard" fullname="M. Suignard">
534            <organization/>
535          </author>
536          <date year="2005" month="January"/>
537        </front>
538        <seriesInfo name="RFC" value="3987"/>
539        <format type="TXT" octets="111190" target="http://www.rfc-editor.org/rfc/rfc3987.txt"/>
540      </reference>
541     
542    </references>
543    <section title='List of ASCII Symbols and their Bidirectional Character Types'  anchor="ASCIISymbols">
544      <t>To help understand the influence of various symbols on IRI display,
545        this appendix lists all of them, giving the character itself,
546        the Unicode codepoint, the character name, the bidirectional character type (BCT)
547        and the rule and relevance in the IRI syntax.</t>
548      <t>The most important ones in practice are
549        ":", delimining schem and port (CS, Common Number Separator),
550        "/" to indicate generic (hierarchical) schemes and as a path separator (CS, Common Number Separator),
551        "?" to introduce a query part (ON, Other Neutral),
552        "#" to introduce a fragment identifier (ET, European Number Terminator),
553        "." to separate labels in a domain name (CS, Common Number Separator),
554        "&amp;" to separate form parameters (ON, Other Neutral), and
555        "@" to separate user information (ON, Other Neutral).
556      </t>
557      <figure>
558        <artwork>
559Char Codepoint  Character Name       BCT  IRI syntax
560-------------------------------------------------------------
561"#"  U+0023     NUMBER SIGN          ET   gen-delims, fragments
562"/"  U+002F     SOLIDUS              CS   gen-delims, paths
563":"  U+003A     COLON                CS   gen-delims, scheme, port
564"?"  U+003F     QUESTION MARK        ON   gen-delims, query part
565"@"  U+0040     COMMERCIAL AT        ON   gen-delims, user
566"["  U+005B     LEFT SQUARE BRACKET  ON   gen-delims
567"]"  U+005D     RIGHT SQUARE BRACKET ON   gen-delims
568"%"  U+0025     PERCENT SIGN         ET   pcd-encoded
569"!"  U+0021     EXCLAMATION MARK     ON   sub-delims
570","  U+002C     COMMA                CS   sub-delims
571"+"  U+002B     PLUS SIGN            ES   sub-delims
572"$"  U+0024     DOLLAR SIGN          ET   sub-delims
573"("  U+0028     LEFT PARENTHESIS     ON   sub-delims
574"'"  U+0027     APOSTROPHE           ON   sub-delims
575")"  U+0029     RIGHT PARENTHESIS    ON   sub-delims
576"*"  U+002A     ASTERISK             ON   sub-delims
577";"  U+003B     SEMICOLON            ON   sub-delims
578"="  U+003D     EQUALS SIGN          ON   sub-delims, forms
579"&amp;"  U+0026     AMPERSAND            ON   sub-delims, forms
580"."  U+002E     FULL STOP            CS   unreserved, domain names
581"-"  U+002D     HYPHEN-MINUS         ES   unreserved
582"_"  U+005F     LOW LINE             ON   unreserved
583"~"  U+007E     TILDE                ON   unreserved
584" "  U+0020     SPACE                WS   excluded, delim
585'"'  U+0022     QUOTATION MARK       ON   excluded, delim
586"\"  U+005C     REVERSE SOLIDUS      ON   excluded, unwise
587"^"  U+005E     CIRCUMFLEX ACCENT    ON   excluded, unwise
588"&lt;"  U+003C     LESS-THAN SIGN       ON   excluded, delim
589">"  U+003E     GREATER-THAN SIGN    ON   excluded, delim
590"`"  U+0060     GRAVE ACCENT         ON   excluded, unwise
591"|"  U+007C     VERTICAL LINE        ON   excluded, unwise
592"{"  U+007B     LEFT CURLY BRACKET   ON   excluded, delim
593"}"  U+007D     RIGHT CURLY BRACKET  ON   excluded, delim
594        </artwork>
595      </figure>
596    </section>
597  </back>
598</rfc>
Note: See TracBrowser for help on using the repository browser.