1 | <?xml version="1.0"?> |
---|
2 | <!DOCTYPE rfc SYSTEM "rfc2629.dtd" [ |
---|
3 | <!ENTITY rfc2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"> |
---|
4 | <!ENTITY rfc3490 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3490.xml"> |
---|
5 | <!ENTITY rfc3987 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3987.xml"> |
---|
6 | <!ENTITY DRAFT "draft-ietf-iri-bidi-guidelines-03"> |
---|
7 | <!ENTITY YEAR "2012"> |
---|
8 | ]> |
---|
9 | <?rfc strict='yes'?> |
---|
10 | |
---|
11 | <?xml-stylesheet type='text/css' href='rfc2629.css' ?> |
---|
12 | <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> |
---|
13 | <?rfc symrefs='yes'?> |
---|
14 | <?rfc sortrefs='yes'?> |
---|
15 | <?rfc iprnotified="no" ?> |
---|
16 | <?rfc toc='yes'?> |
---|
17 | <?rfc compact='yes'?> |
---|
18 | <?rfc subcompact='no'?> |
---|
19 | <rfc ipr="pre5378Trust200902" docName="&DRAFT;" |
---|
20 | category="bcp" xml:lang="en"> |
---|
21 | <front> |
---|
22 | <title abbrev="Bidi IRI Guidelines">Guidelines for Internationalized |
---|
23 | Resource Identifiers with Bi-directional Characters (Bidi IRIs)</title> |
---|
24 | <author initials="M.J." isurname="Dürst" surname="Duerst" ifullname="Martin J. Dürst" |
---|
25 | fullname="Martin J. Duerst (Note: Please write "Duerst" with u-umlaut wherever possible, for example as "D&#252;rst" in XML and HTML.)"> |
---|
26 | <organization>Aoyama Gakuin University<ionly> (青山学院大学)</ionly> </organization> |
---|
27 | <address> |
---|
28 | <postal> |
---|
29 | <street>5-10-1 Fuchinobe</street> |
---|
30 | <street>Chuo-ku</street> |
---|
31 | <city>Sagamihara</city> |
---|
32 | <region>Kanagawa</region> |
---|
33 | <code>252-5258</code> |
---|
34 | <country>Japan</country> |
---|
35 | </postal> |
---|
36 | <phone>+81 42 759 6329</phone> |
---|
37 | <facsimile>+81 42 759 6495</facsimile> |
---|
38 | <email>duerst@it.aoyama.ac.jp</email> |
---|
39 | <uri><aonly>http://www.sw.it.aoyama.ac.jp/D%C3%BCrst/ (Note: This is the percent-encoded form of an IRI)</aonly><ionly>http://www.sw.it.aoyama.ac.jp/Dürst/</ionly></uri> |
---|
40 | </address> |
---|
41 | </author> |
---|
42 | <author initials="L." surname="Masinter" fullname="Larry Masinter"> |
---|
43 | <organization>Adobe</organization> |
---|
44 | <address> |
---|
45 | <postal> |
---|
46 | <street>345 Park Ave</street> |
---|
47 | <city>San Jose</city> |
---|
48 | <region>CA</region> |
---|
49 | <code>95110</code> |
---|
50 | <country>U.S.A.</country> |
---|
51 | </postal> |
---|
52 | <phone>+1-408-536-3024</phone> |
---|
53 | <email>masinter@adobe.com</email> |
---|
54 | <uri>http://larry.masinter.net</uri> |
---|
55 | </address> |
---|
56 | </author> |
---|
57 | <author initials="A." isurname="Allawi (عادل علاوي)" surname="Allawi" |
---|
58 | ifullname="Adil Allawi (عادل علاوي)" fullname="Adil Allawi"> |
---|
59 | <organization>Diwan Software Limited</organization> |
---|
60 | <address> |
---|
61 | <postal> |
---|
62 | <street>37-39 Peckham Road</street> |
---|
63 | <city>London</city> |
---|
64 | <code>SE5 8UH</code> |
---|
65 | <country>United Kingdom</country> |
---|
66 | </postal> |
---|
67 | <phone>+44 7718 785850</phone> |
---|
68 | <facsimile>+44 20 72525444</facsimile> |
---|
69 | <email>adil@diwan.com</email> |
---|
70 | <uri>http://ironymark.diwan.com/</uri> |
---|
71 | </address> |
---|
72 | </author> |
---|
73 | <date year="&YEAR;" month="October" /> |
---|
74 | <area>Applications</area> |
---|
75 | <workgroup>Internationalized Resource Identifiers (iri)</workgroup> |
---|
76 | <keyword>IRI</keyword> |
---|
77 | <keyword>Internationalized Resource Identifier</keyword> |
---|
78 | <keyword>BIDI</keyword> |
---|
79 | <keyword>URI</keyword> |
---|
80 | <keyword>URL</keyword> |
---|
81 | <keyword>IDN</keyword> |
---|
82 | <abstract> |
---|
83 | <t>This specification gives guidelines for selection, use, and |
---|
84 | presentation of International Resource Identifiers (IRIs) which include |
---|
85 | characters with inherent right-to-left (rtl) writing direction. </t> |
---|
86 | </abstract> |
---|
87 | </front> |
---|
88 | <middle> |
---|
89 | <section title="Introduction"> |
---|
90 | <section title='Overview'> |
---|
91 | <t>Some UCS characters, such as those used in the Arabic and Hebrew |
---|
92 | scripts, have an inherent right-to-left (rtl) writing direction as |
---|
93 | opposed to characters, such as those in the Latin script, that have an |
---|
94 | inherent left-to-right (ltr) direction. IRIs containing rtl characters |
---|
95 | (called bidirectional IRIs or Bidi IRIs) require additional attention |
---|
96 | because of the non-trivial relation between their logical and visual |
---|
97 | ordering. The logical order represents the order in which characters are |
---|
98 | stored on computers and read by people. The visual order is the order in |
---|
99 | which the characters appear (or are expected to appear) on a computer |
---|
100 | display or printout.</t> |
---|
101 | <t>Generally, alphabetic characters in scripts like Arabic and Hebrew are |
---|
102 | drawn rtl while numbers are drawn ltr. Symbols such as slash ('/') and |
---|
103 | period ('.') take their visual direction from the surrounding characters.</t> |
---|
104 | <t>Because of this complex interaction between the logical representation, |
---|
105 | the visual representation, and the syntax of a Bidi IRI, a balance is |
---|
106 | needed between various requirements. The main requirements are: <list |
---|
107 | style="hanging"> |
---|
108 | <t hangText="1.">user-predictable conversion between visual and logical |
---|
109 | representation;</t> |
---|
110 | <t hangText="2.">the ability to include a wide range of characters in |
---|
111 | various parts of the IRI; and</t> |
---|
112 | <t hangText="3.">minor or no changes or restrictions for |
---|
113 | implementations.</t> |
---|
114 | </list></t> |
---|
115 | </section> |
---|
116 | <section title='Availability'> |
---|
117 | <t>This document is available in (line-printer ready) plaintext ASCII and in PDF. |
---|
118 | It is also available in HTML from |
---|
119 | <vspace/><eref target="http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.html" |
---|
120 | >http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.html</eref>, |
---|
121 | and in UTF-8 plaintext from |
---|
122 | <vspace/><eref target="http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.utf8.txt" |
---|
123 | >http://www.sw.it.aoyama.ac.jp/&YEAR;/pub/&DRAFT;.utf8.txt</eref>. |
---|
124 | While all these versions are identical in their technical content, |
---|
125 | the HTML, PDF, and UTF-8 plaintext versions show non-Unicode characters directly. |
---|
126 | This often makes it easier to understand examples, and readers are therefore strongly advised |
---|
127 | to consult these versions in preference or as a supplement to the ASCII version.</t> |
---|
128 | <t>This document contains bidirectional examples. In order to correctly understand |
---|
129 | the examples, it is important to view this document with a tool that |
---|
130 | correctly implements the Unicode Bidirectional Algorithm <xref target="UNI9"/>. |
---|
131 | Most many text viewers and text editors, and most browsers, currently implement |
---|
132 | the Unicode Bidirectional Algorithm, but there are some exceptions. |
---|
133 | In order to check whether a tool implements the Unicode Bidirectional Algorithm, |
---|
134 | please observe the line of Hebrew characters below:</t> |
---|
135 | <t><span dir='ltr'>א, אב, אבג, אבגד ,אבגדה, אבגדהו</span><t/> |
---|
136 | <t>If the shortest word (one letter) is on the right, and the longest word (six characters) |
---|
137 | is on the left, and the words get longer and longer from right to left, |
---|
138 | and the commas are between the words, but on the right of the spaces, |
---|
139 | then your tool should be okay. Otherwise, please use another tool. |
---|
140 | Please note that the example is only in Hebrew because in Arabic, |
---|
141 | it is difficult to count characters for people who are not familiar |
---|
142 | with the script.</t> |
---|
143 | </section> |
---|
144 | <section title="Notation"> |
---|
145 | <t>In this document, "Bidi Notation", abbreviated "BN" is used for the given Bidi IRI |
---|
146 | examples as follows: Lower case letters a-z stand for characters that |
---|
147 | are written with a left to right ordering (such as Latin characters), |
---|
148 | whereas upper case letters A-Z represent characters that are written |
---|
149 | right to left (such as Arabic or Hebrew characters). Numbers and |
---|
150 | symbols are the same.</t> |
---|
151 | <t> In this document, the key words "MUST", "MUST NOT", "REQUIRED", |
---|
152 | "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", |
---|
153 | and "OPTIONAL" are to be interpreted as described in <xref |
---|
154 | target="RFC2119"/>.</t> |
---|
155 | </section> |
---|
156 | <!-- Notation --> |
---|
157 | </section> |
---|
158 | <!-- Introduction --> |
---|
159 | <section title="Logical Storage and Visual Presentation" anchor="visual"> |
---|
160 | <t>When stored or transmitted in digital representation, Bidi IRIs MUST be |
---|
161 | in full logical order and MUST conform to the IRI syntax rules (which |
---|
162 | includes the rules relevant to their scheme). This ensures that |
---|
163 | Bidi IRIs can be processed in the same way as other IRIs.</t> |
---|
164 | <t>Bidi IRIs MUST be visually ordered by the Unicode Bidirectional |
---|
165 | Algorithm <xref target="UNIV6"/>, <xref target="UNI9"/>. Bidi IRIs MUST |
---|
166 | be rendered in the same way as they would be if they were in a |
---|
167 | left-to-right embedding. </t> |
---|
168 | <t>In conformance with the Unicode Bidirectional Algorithm, embedding MAY |
---|
169 | be done in one of two ways: <list style="hanging"> |
---|
170 | <t hangText="1.">precede the IRI with U+202A, LEFT-TO-RIGHT EMBEDDING |
---|
171 | (LRE), and follow with U+202C, POP DIRECTIONAL FORMATTING (PDF); |
---|
172 | or</t> |
---|
173 | <t hangText="2.">use a higher-level protocol (e.g., the dir='ltr' |
---|
174 | attribute in HTML).</t> |
---|
175 | </list></t> |
---|
176 | <t>Preceding and following the Bidi IRI with U+200E, LEFT-TO-RIGHT MARK |
---|
177 | (LRM). Is NOT RECOMMENDED as, there are cases where this may not be |
---|
178 | sufficient to match full left to right embedding.</t> |
---|
179 | <t>There is no requirement to use embedding if the display is still the |
---|
180 | same without the embedding. For example, a Bidi IRI in a text |
---|
181 | with left-to-right base directionality (such as used for English or |
---|
182 | Cyrillic) that is preceded and followed by whitespace and strong |
---|
183 | left-to-right characters does not need an embedding. Also, a |
---|
184 | bidirectional relative IRI reference that only contains strong |
---|
185 | right-to-left characters and weak characters (such as symbols) and that |
---|
186 | starts and ends with a strong right-to-left character and appears in a |
---|
187 | text with right-to-left base directionality (such as used for Arabic or |
---|
188 | Hebrew) and is preceded and followed by whitespace and strong characters |
---|
189 | does not need an embedding.</t> |
---|
190 | <t>However, Implementers are, RECOMMENDED to use embedding in all cases |
---|
191 | where they are not completely sure that the display behavior is |
---|
192 | unaffected without the embedding.</t> |
---|
193 | <t>The Unicode Bidirectional Algorithm (<xref target="UNI9"/>, section |
---|
194 | 4.3) permits higher-level protocols to influence bidirectional |
---|
195 | rendering. Such changes by higher-level protocols MUST NOT be used if |
---|
196 | they change the rendering of IRIs.</t> |
---|
197 | <t>The bidirectional formatting characters that may be used before or |
---|
198 | after the IRI to ensure correct display are not themselves part of the |
---|
199 | IRI. IRIs MUST NOT contain bidirectional formatting characters (LRM, |
---|
200 | RLM, LRE, RLE, LRO, RLO, and PDF). They affect the visual rendering of |
---|
201 | the IRI but do not appear themselves. It would therefore not be possible |
---|
202 | to input an IRI with such characters correctly.</t> |
---|
203 | </section> |
---|
204 | <!-- visual --> |
---|
205 | <section title="Bidi IRI Structure" anchor="bidi-structure"> |
---|
206 | <t>The Unicode Bidirectional Algorithm is designed for general purpose |
---|
207 | text. To make sure that it does not affect the rendering of Bidi IRIs |
---|
208 | outside of the requirements of this document, some restrictions on Bidi |
---|
209 | IRIs are necessary. These restrictions are given in terms of delimiters |
---|
210 | (structural characters, mostly punctuation such as "@", ".", ":", and |
---|
211 | "/") and components (usually consisting mostly of letters and |
---|
212 | digits).</t> |
---|
213 | <t>The following syntax rules from the ABNF of <xref target="RFC3987bis"/> |
---|
214 | correspond to components for the purpose of Bidi behavior: iuserinfo, |
---|
215 | ireg-name, isegment, isegment-nz, isegment-nz-nc, ireg-name, iquery, and |
---|
216 | ifragment.</t> |
---|
217 | <t>Specifications that define the syntax of any of the above components |
---|
218 | MAY divide them further and define smaller parts to be components |
---|
219 | according to this document. As an example, the restrictions of <xref |
---|
220 | target="RFC3490"/> on bidirectional domain names correspond to treating |
---|
221 | each label of a domain name as a component for schemes with ireg-name as |
---|
222 | a domain name. Even where the components are not defined formally, it |
---|
223 | may be helpful to think about some syntax in terms of components and to |
---|
224 | apply the relevant restrictions. For example, for the usual name/value |
---|
225 | syntax in query parts, it is convenient to treat each name and each |
---|
226 | value as a component. As another example, the extensions in a resource |
---|
227 | name can be treated as separate components.</t> |
---|
228 | <t>For each component, the following restrictions apply:</t> |
---|
229 | <t> <list style="hanging"> |
---|
230 | <t hangText="1.">A component SHOULD NOT use both right-to-left and |
---|
231 | left-to-right characters.</t> |
---|
232 | <t hangText="2.">A component using right-to-left characters SHOULD start |
---|
233 | and end with right-to-left characters.</t> |
---|
234 | </list></t> |
---|
235 | <t>The above restrictions are given as "SHOULD"s, rather than as "MUST"s. |
---|
236 | For IRIs that are never presented visually, they are not relevant. |
---|
237 | However, for IRIs in general, they are very important to ensure |
---|
238 | consistent conversion between visual presentation and logical |
---|
239 | representation, in both directions.</t> |
---|
240 | <t><list style="hanging"> |
---|
241 | <t hangText="Note:">In some components, the above restrictions may |
---|
242 | actually be strictly enforced. For example, <xref target="RFC3490"/> |
---|
243 | requires that these restrictions apply to the labels of a host name |
---|
244 | for those schemes where ireg-name is a host name. In some other |
---|
245 | components (for example, path components) following these restrictions |
---|
246 | may not be too difficult. For other components, such as parts of the |
---|
247 | query part, it may be very difficult to enforce the restrictions |
---|
248 | because the values of query parameters may be arbitrary character |
---|
249 | sequences.</t> |
---|
250 | </list></t> |
---|
251 | <t>If the above restrictions cannot be satisfied otherwise, the affected |
---|
252 | component can always be mapped to URI notation using the general |
---|
253 | percent-encoding of IRI components, as described in <xref |
---|
254 | target="RFC3987bis"/>. Please note that the whole component has to be |
---|
255 | mapped (see also Example 9 below).</t> |
---|
256 | </section> |
---|
257 | <!-- bidi-structure --> |
---|
258 | <section title="Input of Bidi IRIs" anchor="bidiInput"> |
---|
259 | <t>Bidi input methods MUST generate Bidi IRIs in logical order while |
---|
260 | rendering them according to <xref target="visual"/>. During input, |
---|
261 | rendering SHOULD be updated after every new character is input to avoid |
---|
262 | end-user confusion.</t> |
---|
263 | </section> |
---|
264 | <!-- bidiInput --> |
---|
265 | <section title="Examples"> |
---|
266 | <t>This section gives examples of Bidi IRIs in Bidi Notation. It shows |
---|
267 | legal IRIs with the relationship between their logical and visual |
---|
268 | representation and explains how certain phenomena in this relationship |
---|
269 | may look strange to somebody not familiar with bidirectional behavior, |
---|
270 | but familiar to users of Arabic and Hebrew. It also shows what happens |
---|
271 | if the restrictions given in <xref target="bidi-structure"/> are not |
---|
272 | followed. The examples below can be seen at <xref target="BidiEx"/>, in |
---|
273 | Arabic, Hebrew, and Bidi Notation variants.</t> |
---|
274 | <t>To read the bidi text in the examples, read the visual representation |
---|
275 | from left to right until you encounter a block of rtl text. Read the rtl |
---|
276 | block (including slashes and other special characters) from right to |
---|
277 | left, then continue at the next unread ltr character.</t> |
---|
278 | <t>Please note that "BN" stands for "Bidi Notation", see <eref target="Notation" />. |
---|
279 | AR stands for Arabic, HE for Hebrew.</t> |
---|
280 | |
---|
281 | <t>Example 1: A single component with rtl characters is inverted: |
---|
282 | |
---|
283 | <vspace/>Logical representation (BN): "http://ab.CDEFGH.ij/kl/mn/op.html" |
---|
284 | <vspace/>Visual representation (BN): "http://ab.HGFEDC.ij/kl/mn/op.html" |
---|
285 | <ionly> |
---|
286 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثجحخد.ij/kl/mn/op.html</span>" |
---|
287 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדהוזח.ij/kl/mn/op.html</span>" |
---|
288 | </ionly> |
---|
289 | <vspace/>Components can be read one |
---|
290 | by one, and each component can be read in its natural direction.</t> |
---|
291 | |
---|
292 | <t>Example 2: More than one consecutive component with rtl characters is |
---|
293 | inverted as a whole: |
---|
294 | |
---|
295 | <vspace/>Logical representation (BN): "http://ab.CDE.FGH/ij/kl/mn/op.html" |
---|
296 | <vspace/>Visual representation (BN): "http://ab.HGF.EDC/ij/kl/mn/op.html" |
---|
297 | <ionly> |
---|
298 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثج.حخد/ij/kl/mn/op.html</span>" |
---|
299 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדה.וזח/ij/kl/mn/op.html</span>" |
---|
300 | </ionly> |
---|
301 | |
---|
302 | <vspace/> A sequence of rtl |
---|
303 | components is read rtl, in the same way as a sequence of rtl words is |
---|
304 | read rtl in a bidi text.</t> |
---|
305 | |
---|
306 | <t>Example 3: All components of an IRI (except for the scheme) are rtl. |
---|
307 | All rtl components are inverted overall: |
---|
308 | |
---|
309 | <vspace/>Logical representation (BN): "http://AB.CD.EF/GH/IJ/KL?MN=OP;QR=ST#UV" |
---|
310 | <vspace/>Visual representation (BN): "http://VU#TS=RQ;PO=NM?LK/JI/HG/FE.DC.BA" |
---|
311 | <ionly> |
---|
312 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://اب.تث.جح/خد/ذر/زس?شص=ضط;ظع=غف#قك</span>" |
---|
313 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://אב.גד.הו/זח/טי/כל?מן=סע;פץ=קר#שת</span>" |
---|
314 | </ionly> |
---|
315 | |
---|
316 | <vspace/> The |
---|
317 | whole IRI (except the scheme) is read rtl. Delimiters between rtl |
---|
318 | components stay between the respective components; delimiters between |
---|
319 | ltr and rtl components don't move.</t> |
---|
320 | |
---|
321 | <t>Example 4: Each of several sequences of rtl components is inverted on |
---|
322 | its own: |
---|
323 | |
---|
324 | <vspace/>Logical representation (BN): "http://AB.CD.ef/gh/IJ/KL.html" |
---|
325 | <vspace/>Visual representation (BN): "http://DC.BA.ef/gh/LK/JI.html" |
---|
326 | <ionly> |
---|
327 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://اب.تث.ef/gh/ذر/زس.html</span>" |
---|
328 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://אב.גד.ef/gh/טי/כל.html</span>" |
---|
329 | </ionly> |
---|
330 | |
---|
331 | <vspace/> Each sequence of rtl components |
---|
332 | is read rtl, in the same way as each sequence of rtl words in an ltr |
---|
333 | text is read rtl.</t> |
---|
334 | |
---|
335 | <t>Example 5: Example 2, applied to components of different kinds: |
---|
336 | |
---|
337 | <vspace/>Logical representation (BN): "http://ab.cd.EF/GH/ij/kl.html" |
---|
338 | <vspace/>Visual representation (BN): "http://ab.cd.HG/FE/ij/kl.html" |
---|
339 | <ionly> |
---|
340 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.cd.جح/خد/ij/kl.html</span>" |
---|
341 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.cd.הו/זח/ij/kl.html</span>" |
---|
342 | </ionly> |
---|
343 | |
---|
344 | <vspace/> |
---|
345 | The inversion of the domain name label and the path component may be |
---|
346 | unexpected, but it is consistent with other bidi behavior. For |
---|
347 | reassurance that the domain component really is "ab.cd.EF", it may be |
---|
348 | helpful to read aloud the visual representation following the Unicode |
---|
349 | Bidirectional Algorithm. After "http://ab.cd." one reads the RTL block |
---|
350 | "E-F-slash-G-H", which corresponds to the logical representation. </t> |
---|
351 | |
---|
352 | <t>Example 6: Same as Example 5, with more rtl components: |
---|
353 | |
---|
354 | <vspace/>Logical representation (BN): "http://ab.CD.EF/GH/IJ/kl.html" |
---|
355 | <vspace/>Visual representation (BN): "http://ab.JI/HG/FE.DC/kl.html" |
---|
356 | <ionly> |
---|
357 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تث.جح/خد/ذر/kl.html</span>" |
---|
358 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גד.הו/זח/טי/kl.html</span>" |
---|
359 | </ionly> |
---|
360 | |
---|
361 | <vspace/> The inversion of the domain |
---|
362 | name labels and the path components may be easier to identify because |
---|
363 | the delimiters also move.</t> |
---|
364 | |
---|
365 | <t>Example 7: A single rtl component includes digits: |
---|
366 | |
---|
367 | <vspace/>Logical representation (BN): "http://ab.CDE123FGH.ij/kl/mn/op.html" |
---|
368 | <vspace/>Visual representation (BN): "http://ab.HGF123EDC.ij/kl/mn/op.html" |
---|
369 | <ionly> |
---|
370 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثج123حخد.ij/kl/mn/op.html</span>" |
---|
371 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדה123וזח.ij/kl/mn/op.html</span>" |
---|
372 | </ionly> |
---|
373 | |
---|
374 | <vspace/> Numbers |
---|
375 | are written ltr in all cases but are treated as an additional embedding |
---|
376 | inside a run of rtl characters. This is completely consistent with usual |
---|
377 | bidirectional text.</t> |
---|
378 | |
---|
379 | <t>Example 8 (not allowed): Numbers are at the start or end of an rtl |
---|
380 | component: |
---|
381 | |
---|
382 | <vspace/>Logical representation (BN): "http://ab.cd.ef/GH1/2IJ/KL.html" |
---|
383 | <vspace/>Visual representation (BN): "http://ab.cd.ef/LK/JI1/2HG.html" |
---|
384 | <ionly> |
---|
385 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.cd.ef/خد1/2ذر/زس.html</span>" |
---|
386 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.cd.ef/זח1/2טי/כל.html</span>" |
---|
387 | </ionly> |
---|
388 | |
---|
389 | <vspace/> The sequence "1/2" is |
---|
390 | interpreted by the Bidirectional Algorithm as a fraction, fragmenting the |
---|
391 | components and leading to confusion. There are other characters that are |
---|
392 | interpreted in a special way close to numbers; in particular, "+", "-", |
---|
393 | "#", "$", "%", ",", ".", and ":".</t> |
---|
394 | |
---|
395 | <t>Example 9 (not allowed): The numbers in the previous example are |
---|
396 | percent-encoded: |
---|
397 | |
---|
398 | <vspace/>Logical representation (BN): "http://ab.cd.ef/GH%31/%32IJ/KL.html" |
---|
399 | <vspace/>Visual representation (BN): "http://ab.cd.ef/LK/JI%32/%31HG.html" |
---|
400 | <ionly> |
---|
401 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.cd.ef/זח%31/%32טי/כל.html</span>" |
---|
402 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.cd.ef/خد%31/%32ذر/زس.html</span>" |
---|
403 | </ionly> |
---|
404 | |
---|
405 | </t> |
---|
406 | |
---|
407 | <t>Example 10 (allowed but not recommended): |
---|
408 | |
---|
409 | <vspace/>Logical representation (BN): "http://ab.CDEFGH.123/kl/mn/op.html" |
---|
410 | <vspace/>Visual representation (BN): "http://ab.123.HGFEDC/kl/mn/op.html" |
---|
411 | <ionly> |
---|
412 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثجحخد.123/kl/mn/op.html</span>" |
---|
413 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדהוזח.123/kl/mn/op.html</span>" |
---|
414 | </ionly> |
---|
415 | |
---|
416 | <vspace/> Components |
---|
417 | consisting of only numbers are allowed (it would be rather difficult to |
---|
418 | prohibit them), but these may interact with adjacent RTL components in |
---|
419 | ways that are not easy to predict.</t> |
---|
420 | |
---|
421 | <t>Example 11 (allowed but not recommended): |
---|
422 | |
---|
423 | <vspace/>Logical representation (BN): "http://ab.CDEFGH.123ij/kl/mn/op.html" |
---|
424 | <vspace/>Visual representation (BN): "http://ab.123.HGFEDCij/kl/mn/op.html" |
---|
425 | <ionly> |
---|
426 | <vspace/>Visual representation (AR): "<span dir='ltr'>http://ab.تثجحخد.123ij/kl/mn/op.html</span>" |
---|
427 | <vspace/>Visual representation (HE): "<span dir='ltr'>http://ab.גדהוזח.123ij/kl/mn/op.html</span>" |
---|
428 | </ionly> |
---|
429 | |
---|
430 | <vspace/> |
---|
431 | Components consisting of numbers and left-to-right characters are |
---|
432 | allowed, but these may interact with adjacent RTL components in ways |
---|
433 | that are not easy to predict.</t> |
---|
434 | </section> |
---|
435 | <!-- examples --> |
---|
436 | <section title="IANA Considerations" anchor="iana"> |
---|
437 | <t>This document makes no changes to IANA registries.</t> |
---|
438 | </section> |
---|
439 | <!-- IANA --> |
---|
440 | <section title="Security Considerations" anchor="security"> |
---|
441 | <t>Confusion can occur with bidirectional IRIs, if the restrictions in |
---|
442 | <xref target="bidi-structure"/> are not followed. The same visual |
---|
443 | representation may be interpreted as different logical representations, |
---|
444 | and vice versa. It is also very important that a correct Unicode |
---|
445 | bidirectional implementation be used.</t> |
---|
446 | </section> |
---|
447 | <!-- security --> |
---|
448 | <section title="Acknowledgements"> |
---|
449 | <t>This document was derived from <xref target="RFC3987"/> and <xref |
---|
450 | target="RFC3987bis"/> and the acknowledgments of those documents |
---|
451 | apply.</t> |
---|
452 | </section> |
---|
453 | <!-- acknowledgements --> |
---|
454 | <section title="Main Changes Since RFC 3987"> |
---|
455 | <t>This section describes the main changes since <xref target="RFC3987"></xref>.</t> |
---|
456 | <t>Note to RFC Editor: Please remove this paragraph before publication. |
---|
457 | Detailled change logs are available in the IETF tools subversion repository at |
---|
458 | http://trac.tools.ietf.org/wg/iri/trac/log/draft-ietf-iri-3987bis/draft-ietf-iri-bidi-guidelines.xml.</t> |
---|
459 | |
---|
460 | <t><list style="symbols"> |
---|
461 | <t>Separated out the section on bidi in <xref target="RFC3987"/> to this document.</t> |
---|
462 | <t>Added examples in Arabic and Hebrew, which can be seen in html/pdf/utf8.txt versions.</t> |
---|
463 | <t>TODO: check for major changes between RFC3987 and draft -02.</t> |
---|
464 | </list> |
---|
465 | </t> |
---|
466 | </section> |
---|
467 | </middle> |
---|
468 | <back> |
---|
469 | <references title="Normative References"> |
---|
470 | <reference anchor="RFC3987bis" |
---|
471 | target="http://tools.ietf.org/id/draft-ietf-iri-3987bis"> |
---|
472 | <front> |
---|
473 | <title>Internationalized Resource Identifiers (IRIs)</title> |
---|
474 | <author initials="M." surname="Duerst"/> |
---|
475 | <author initials="L." surname="Masinter" fullname="Larry Masinter"/> |
---|
476 | <author initials="M." surname="Suignard"/> |
---|
477 | <date year="2011" month="August" day="14"/> |
---|
478 | </front> |
---|
479 | </reference> |
---|
480 | &rfc2119; |
---|
481 | &rfc3490; |
---|
482 | <reference anchor="UNIV6"> |
---|
483 | <front> |
---|
484 | <title>The Unicode Standard, Version 6.0.0 (Mountain View, CA, The |
---|
485 | Unicode Consortium, 2011, ISBN 978-1-936213-01-6)</title> |
---|
486 | <author> |
---|
487 | <organization>The Unicode Consortium</organization> |
---|
488 | </author> |
---|
489 | <date year="2010" month="October"/> |
---|
490 | </front> |
---|
491 | </reference> |
---|
492 | <reference anchor="UNI9" |
---|
493 | target="http://www.unicode.org/reports/tr9/tr9-13.html"> |
---|
494 | <front> |
---|
495 | <title>The Unicode Bidirectional Algorithm</title> |
---|
496 | <author initials="M." surname="Davis" fullname="Mark Davis"> |
---|
497 | <organization/> |
---|
498 | </author> |
---|
499 | <date year="2004" month="March"/> |
---|
500 | </front> |
---|
501 | <seriesInfo name="Unicode Standard Annex" value="#9"/> |
---|
502 | </reference> |
---|
503 | </references> |
---|
504 | <references title="Informative References"> |
---|
505 | <reference anchor="BidiEx" |
---|
506 | target="http://www.w3.org/International/iri-edit/BidiExamples"> |
---|
507 | <front> |
---|
508 | <title>Examples of Bidi IRIs</title> |
---|
509 | <author> |
---|
510 | <organization/> |
---|
511 | </author> |
---|
512 | <date year="" month=""/> |
---|
513 | </front> |
---|
514 | </reference> &rfc3987; </references> |
---|
515 | </back> |
---|
516 | </rfc> |
---|