[html5] r3191 - [ct] (0) Support dash-dash-bang-angle comment terminators.

Thu Jun 4 16:42:56 PDT 2009

Author: ianh
Date: 2009-06-04 16:42:54 -0700 (Thu, 04 Jun 2009)
New Revision: 3191

Modified:
   index
   source
Log:
[ct] (0) Support dash-dash-bang-angle comment terminators.

Modified: index
===================================================================

--- index	2009-06-04 22:53:35 UTC (rev 3190)
+++ index	2009-06-04 23:42:54 UTC (rev 3191)
@@ -966,21 +966,22 @@
        <li><a href=#comment-state><span class=secno>9.2.4.20 </span>Comment state</a></li>
        <li><a href=#comment-end-dash-state><span class=secno>9.2.4.21 </span>Comment end dash state</a></li>
        <li><a href=#comment-end-state><span class=secno>9.2.4.22 </span>Comment end state</a></li>
-       <li><a href=#doctype-state><span class=secno>9.2.4.23 </span>DOCTYPE state</a></li>
-       <li><a href=#before-doctype-name-state><span class=secno>9.2.4.24 </span>Before DOCTYPE name state</a></li>
-       <li><a href=#doctype-name-state><span class=secno>9.2.4.25 </span>DOCTYPE name state</a></li>
-       <li><a href=#after-doctype-name-state><span class=secno>9.2.4.26 </span>After DOCTYPE name state</a></li>
-       <li><a href=#before-doctype-public-identifier-state><span class=secno>9.2.4.27 </span>Before DOCTYPE public identifier state</a></li>
-       <li><a href=#doctype-public-identifier-(double-quoted)-state><span class=secno>9.2.4.28 </span>DOCTYPE public identifier (double-quoted) state</a></li>
-       <li><a href=#doctype-public-identifier-(single-quoted)-state><span class=secno>9.2.4.29 </span>DOCTYPE public identifier (single-quoted) state</a></li>
-       <li><a href=#after-doctype-public-identifier-state><span class=secno>9.2.4.30 </span>After DOCTYPE public identifier state</a></li>
-       <li><a href=#before-doctype-system-identifier-state><span class=secno>9.2.4.31 </span>Before DOCTYPE system identifier state</a></li>
-       <li><a href=#doctype-system-identifier-(double-quoted)-state><span class=secno>9.2.4.32 </span>DOCTYPE system identifier (double-quoted) state</a></li>
-       <li><a href=#doctype-system-identifier-(single-quoted)-state><span class=secno>9.2.4.33 </span>DOCTYPE system identifier (single-quoted) state</a></li>
-       <li><a href=#after-doctype-system-identifier-state><span class=secno>9.2.4.34 </span>After DOCTYPE system identifier state</a></li>
-       <li><a href=#bogus-doctype-state><span class=secno>9.2.4.35 </span>Bogus DOCTYPE state</a></li>
-       <li><a href=#cdata-section-state><span class=secno>9.2.4.36 </span>CDATA section state</a></li>
-       <li><a href=#tokenizing-character-references><span class=secno>9.2.4.37 </span>Tokenizing character references</a></ol></li>
+       <li><a href=#comment-end-bang-state><span class=secno>9.2.4.23 </span>Comment end bang state</a></li>
+       <li><a href=#doctype-state><span class=secno>9.2.4.24 </span>DOCTYPE state</a></li>
+       <li><a href=#before-doctype-name-state><span class=secno>9.2.4.25 </span>Before DOCTYPE name state</a></li>
+       <li><a href=#doctype-name-state><span class=secno>9.2.4.26 </span>DOCTYPE name state</a></li>
+       <li><a href=#after-doctype-name-state><span class=secno>9.2.4.27 </span>After DOCTYPE name state</a></li>
+       <li><a href=#before-doctype-public-identifier-state><span class=secno>9.2.4.28 </span>Before DOCTYPE public identifier state</a></li>
+       <li><a href=#doctype-public-identifier-(double-quoted)-state><span class=secno>9.2.4.29 </span>DOCTYPE public identifier (double-quoted) state</a></li>
+       <li><a href=#doctype-public-identifier-(single-quoted)-state><span class=secno>9.2.4.30 </span>DOCTYPE public identifier (single-quoted) state</a></li>
+       <li><a href=#after-doctype-public-identifier-state><span class=secno>9.2.4.31 </span>After DOCTYPE public identifier state</a></li>
+       <li><a href=#before-doctype-system-identifier-state><span class=secno>9.2.4.32 </span>Before DOCTYPE system identifier state</a></li>
+       <li><a href=#doctype-system-identifier-(double-quoted)-state><span class=secno>9.2.4.33 </span>DOCTYPE system identifier (double-quoted) state</a></li>
+       <li><a href=#doctype-system-identifier-(single-quoted)-state><span class=secno>9.2.4.34 </span>DOCTYPE system identifier (single-quoted) state</a></li>
+       <li><a href=#after-doctype-system-identifier-state><span class=secno>9.2.4.35 </span>After DOCTYPE system identifier state</a></li>
+       <li><a href=#bogus-doctype-state><span class=secno>9.2.4.36 </span>Bogus DOCTYPE state</a></li>
+       <li><a href=#cdata-section-state><span class=secno>9.2.4.37 </span>CDATA section state</a></li>
+       <li><a href=#tokenizing-character-references><span class=secno>9.2.4.38 </span>Tokenizing character references</a></ol></li>
      <li><a href=#tree-construction><span class=secno>9.2.5 </span>Tree construction</a>
       <ol>
        <li><a href=#creating-and-inserting-elements><span class=secno>9.2.5.1 </span>Creating and inserting elements</a></li>
@@ -60831,8 +60832,9 @@
    <dd>Switch to the <a href=#comment-start-dash-state>comment start dash state</a>.</dd>
 
    <dt>U+003E GREATER-THAN SIGN (>)</dt>
-   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Switch to
-   the <a href=#data-state>data state</a>.</dd>
+   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Switch to the
+   <a href=#data-state>data state</a>.</dd> <!-- see comment in comment end state
+   -->
 
    <dt>EOF</dt>
    <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume
@@ -60854,8 +60856,9 @@
    the <a href=#data-state>data state</a>.</dd>
 
    <dt>EOF</dt>
-   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume
-   the EOF character in the <a href=#data-state>data state</a>.</dd>
+   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume the
+   EOF character in the <a href=#data-state>data state</a>.</dd> <!-- see comment
+   in comment end state -->
 
    <dt>Anything else</dt>
    <dd>Append a U+002D HYPHEN-MINUS (-) character and the input
@@ -60870,12 +60873,9 @@
    <dd>Switch to the <a href=#comment-end-dash-state>comment end dash state</a></dd>
 
    <dt>EOF</dt>
-   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume
-   the EOF character in the <a href=#data-state>data state</a>.</dd> <!-- For
-   security reasons: otherwise, hostile user could put a <script> in
-   a comment e.g. in a blog comment and then DOS the server so that
-   the end tag isn't read, and then the commented <script> tag would
-   be treated as live code -->
+   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume the
+   EOF character in the <a href=#data-state>data state</a>.</dd> <!-- see comment
+   in comment end state -->
 
    <dt>Anything else</dt>
    <dd>Append the input character to the comment token's data. Stay
@@ -60889,12 +60889,9 @@
    <dd>Switch to the <a href=#comment-end-state>comment end state</a></dd>
 
    <dt>EOF</dt>
-   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume
-   the EOF character in the <a href=#data-state>data state</a>.</dd> <!-- For
-   security reasons: otherwise, hostile user could put a <script> in
-   a comment e.g. in a blog comment and then DOS the server so that
-   the end tag isn't read, and then the commented <script> tag would
-   be treated as live code -->
+   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume the
+   EOF character in the <a href=#data-state>data state</a>.</dd> <!-- see comment
+   in comment end state -->
 
    <dt>Anything else</dt>
    <dd>Append a U+002D HYPHEN-MINUS (-) character and the input
@@ -60914,6 +60911,10 @@
    (-) character to the comment token's data. Stay in the
    <a href=#comment-end-state>comment end state</a>.</dd>
 
+   <dt>U+0021 EXCLAMATION MARK (!)</dt>
+   <dd><a href=#parse-error>Parse error</a>. Switch to the <a href=#comment-end-bang-state>comment end bang
+   state</a>.</dd>
+
    <dt>EOF</dt>
    <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume
    the EOF character in the <a href=#data-state>data state</a>.</dd> <!-- For
@@ -60927,10 +60928,34 @@
    characters and the input character to the comment token's
    data. Switch to the <a href=#comment-state>comment state</a>.</dd>
 
-  </dl><h5 id=doctype-state><span class=secno>9.2.4.23 </span><dfn>DOCTYPE state</dfn></h5>
+  </dl><h5 id=comment-end-bang-state><span class=secno>9.2.4.23 </span><dfn>Comment end bang state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
+  <dl class=switch><dt>U+003E GREATER-THAN SIGN (>)</dt>
+   <dd>Emit the comment token. Switch to the <a href=#data-state>data
+   state</a>.</dd>
+
+   <dt>U+002D HYPHEN-MINUS (-)</dt>
+   <dd>Append two U+002D HYPHEN-MINUS (-) characters and a U+0021
+   EXCLAMATION MARK (!) character to the comment token's data. Switch
+   to the <a href=#comment-end-dash-state>comment end dash state</a>.</dd>
+
+   <dt>EOF</dt>
+   <dd><a href=#parse-error>Parse error</a>. Emit the comment token. Reconsume
+   the EOF character in the <a href=#data-state>data state</a>.</dd> <!-- see
+   comment in comment end state -->
+
+   <dt>Anything else</dt>
+   <dd><a href=#parse-error>Parse error</a>. Append two U+002D HYPHEN-MINUS (-)
+   characters, a U+0021 EXCLAMATION MARK (!) character, and the input
+   character to the comment token's data. Switch to the <a href=#comment-state>comment
+   state</a>.</dd>
+
+  </dl><h5 id=doctype-state><span class=secno>9.2.4.24 </span><dfn>DOCTYPE state</dfn></h5>
+
+  <p>Consume the <a href=#next-input-character>next input character</a>:</p>
+
   <dl class=switch><dt>U+0009 CHARACTER TABULATION</dt>
    <dt>U+000A LINE FEED (LF)</dt>
    <dt>U+000C FORM FEED (FF)</dt>
@@ -60947,7 +60972,7 @@
    <dd><a href=#parse-error>Parse error</a>. Reconsume the current
    character in the <a href=#before-doctype-name-state>before DOCTYPE name state</a>.</dd>
 
-  </dl><h5 id=before-doctype-name-state><span class=secno>9.2.4.24 </span><dfn>Before DOCTYPE name state</dfn></h5>
+  </dl><h5 id=before-doctype-name-state><span class=secno>9.2.4.25 </span><dfn>Before DOCTYPE name state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -60979,7 +61004,7 @@
    <a href=#current-input-character>current input character</a>. Switch to the <a href=#doctype-name-state>DOCTYPE name
    state</a>.</dd>
 
-  </dl><h5 id=doctype-name-state><span class=secno>9.2.4.25 </span><dfn>DOCTYPE name state</dfn></h5>
+  </dl><h5 id=doctype-name-state><span class=secno>9.2.4.26 </span><dfn>DOCTYPE name state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61008,7 +61033,7 @@
    <dd>Append the <a href=#current-input-character>current input character</a> to the current DOCTYPE
    token's name. Stay in the <a href=#doctype-name-state>DOCTYPE name state</a>.</dd>
 
-  </dl><h5 id=after-doctype-name-state><span class=secno>9.2.4.26 </span><dfn>After DOCTYPE name state</dfn></h5>
+  </dl><h5 id=after-doctype-name-state><span class=secno>9.2.4.27 </span><dfn>After DOCTYPE name state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61048,7 +61073,7 @@
 
    </dd>
 
-  </dl><h5 id=before-doctype-public-identifier-state><span class=secno>9.2.4.27 </span><dfn>Before DOCTYPE public identifier state</dfn></h5>
+  </dl><h5 id=before-doctype-public-identifier-state><span class=secno>9.2.4.28 </span><dfn>Before DOCTYPE public identifier state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61084,7 +61109,7 @@
    <i>force-quirks flag</i> to <i>on</i>. Switch to the <a href=#bogus-doctype-state>bogus
    DOCTYPE state</a>.</dd>
 
-  </dl><h5 id=doctype-public-identifier-(double-quoted)-state><span class=secno>9.2.4.28 </span><dfn>DOCTYPE public identifier (double-quoted) state</dfn></h5>
+  </dl><h5 id=doctype-public-identifier-(double-quoted)-state><span class=secno>9.2.4.29 </span><dfn>DOCTYPE public identifier (double-quoted) state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61106,7 +61131,7 @@
    token's public identifier. Stay in the <a href=#doctype-public-identifier-(double-quoted)-state>DOCTYPE public
    identifier (double-quoted) state</a>.</dd>
 
-  </dl><h5 id=doctype-public-identifier-(single-quoted)-state><span class=secno>9.2.4.29 </span><dfn>DOCTYPE public identifier (single-quoted) state</dfn></h5>
+  </dl><h5 id=doctype-public-identifier-(single-quoted)-state><span class=secno>9.2.4.30 </span><dfn>DOCTYPE public identifier (single-quoted) state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61128,7 +61153,7 @@
    token's public identifier. Stay in the <a href=#doctype-public-identifier-(single-quoted)-state>DOCTYPE public
    identifier (single-quoted) state</a>.</dd>
 
-  </dl><h5 id=after-doctype-public-identifier-state><span class=secno>9.2.4.30 </span><dfn>After DOCTYPE public identifier state</dfn></h5>
+  </dl><h5 id=after-doctype-public-identifier-state><span class=secno>9.2.4.31 </span><dfn>After DOCTYPE public identifier state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61163,7 +61188,7 @@
    <i>force-quirks flag</i> to <i>on</i>. Switch to the <a href=#bogus-doctype-state>bogus
    DOCTYPE state</a>.</dd>
 
-  </dl><h5 id=before-doctype-system-identifier-state><span class=secno>9.2.4.31 </span><dfn>Before DOCTYPE system identifier state</dfn></h5>
+  </dl><h5 id=before-doctype-system-identifier-state><span class=secno>9.2.4.32 </span><dfn>Before DOCTYPE system identifier state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61199,7 +61224,7 @@
    <i>force-quirks flag</i> to <i>on</i>. Switch to the <a href=#bogus-doctype-state>bogus
    DOCTYPE state</a>.</dd>
 
-  </dl><h5 id=doctype-system-identifier-(double-quoted)-state><span class=secno>9.2.4.32 </span><dfn>DOCTYPE system identifier (double-quoted) state</dfn></h5>
+  </dl><h5 id=doctype-system-identifier-(double-quoted)-state><span class=secno>9.2.4.33 </span><dfn>DOCTYPE system identifier (double-quoted) state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61221,7 +61246,7 @@
    token's system identifier. Stay in the <a href=#doctype-system-identifier-(double-quoted)-state>DOCTYPE system
    identifier (double-quoted) state</a>.</dd>
 
-  </dl><h5 id=doctype-system-identifier-(single-quoted)-state><span class=secno>9.2.4.33 </span><dfn>DOCTYPE system identifier (single-quoted) state</dfn></h5>
+  </dl><h5 id=doctype-system-identifier-(single-quoted)-state><span class=secno>9.2.4.34 </span><dfn>DOCTYPE system identifier (single-quoted) state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61243,7 +61268,7 @@
    token's system identifier. Stay in the <a href=#doctype-system-identifier-(single-quoted)-state>DOCTYPE system
    identifier (single-quoted) state</a>.</dd>
 
-  </dl><h5 id=after-doctype-system-identifier-state><span class=secno>9.2.4.34 </span><dfn>After DOCTYPE system identifier state</dfn></h5>
+  </dl><h5 id=after-doctype-system-identifier-state><span class=secno>9.2.4.35 </span><dfn>After DOCTYPE system identifier state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61268,7 +61293,7 @@
    state</a>. (This does <em>not</em> set the DOCTYPE token's
    <i>force-quirks flag</i> to <i>on</i>.)</dd>
 
-  </dl><h5 id=bogus-doctype-state><span class=secno>9.2.4.35 </span><dfn>Bogus DOCTYPE state</dfn></h5>
+  </dl><h5 id=bogus-doctype-state><span class=secno>9.2.4.36 </span><dfn>Bogus DOCTYPE state</dfn></h5>
 
   <p>Consume the <a href=#next-input-character>next input character</a>:</p>
 
@@ -61283,7 +61308,7 @@
    <dt>Anything else</dt>
    <dd>Stay in the <a href=#bogus-doctype-state>bogus DOCTYPE state</a>.</dd>
 
-  </dl><h5 id=cdata-section-state><span class=secno>9.2.4.36 </span><dfn>CDATA section state</dfn></h5>
+  </dl><h5 id=cdata-section-state><span class=secno>9.2.4.37 </span><dfn>CDATA section state</dfn></h5>
 
   <p><em>(This can only happen if the <a href=#content-model-flag>content model
   flag</a> is set to the PCDATA state, and is unrelated to the
@@ -61304,7 +61329,7 @@
 
 
 
-  <h5 id=tokenizing-character-references><span class=secno>9.2.4.37 </span>Tokenizing character references</h5>
+  <h5 id=tokenizing-character-references><span class=secno>9.2.4.38 </span>Tokenizing character references</h5>
 
   <p>This section defines how to <dfn id=consume-a-character-reference>consume a character
   reference</dfn>. This definition is used when parsing character

Modified: source
===================================================================
--- source	2009-06-04 22:53:35 UTC (rev 3190)
+++ source	2009-06-04 23:42:54 UTC (rev 3191)
@@ -74642,8 +74642,9 @@
    <dd>Switch to the <span>comment start dash state</span>.</dd>
 
    <dt>U+003E GREATER-THAN SIGN (>)</dt>
-   <dd><span>Parse error</span>. Emit the comment token. Switch to
-   the <span>data state</span>.</dd>
+   <dd><span>Parse error</span>. Emit the comment token. Switch to the
+   <span>data state</span>.</dd> <!-- see comment in comment end state
+   -->
 
    <dt>EOF</dt>
    <dd><span>Parse error</span>. Emit the comment token. Reconsume
@@ -74670,8 +74671,9 @@
    the <span>data state</span>.</dd>
 
    <dt>EOF</dt>
-   <dd><span>Parse error</span>. Emit the comment token. Reconsume
-   the EOF character in the <span>data state</span>.</dd>
+   <dd><span>Parse error</span>. Emit the comment token. Reconsume the
+   EOF character in the <span>data state</span>.</dd> <!-- see comment
+   in comment end state -->
 
    <dt>Anything else</dt>
    <dd>Append a U+002D HYPHEN-MINUS (-) character and the input
@@ -74691,12 +74693,9 @@
    <dd>Switch to the <span>comment end dash state</span></dd>
 
    <dt>EOF</dt>
-   <dd><span>Parse error</span>. Emit the comment token. Reconsume
-   the EOF character in the <span>data state</span>.</dd> <!-- For
-   security reasons: otherwise, hostile user could put a <script> in
-   a comment e.g. in a blog comment and then DOS the server so that
-   the end tag isn't read, and then the commented <script> tag would
-   be treated as live code -->
+   <dd><span>Parse error</span>. Emit the comment token. Reconsume the
+   EOF character in the <span>data state</span>.</dd> <!-- see comment
+   in comment end state -->
 
    <dt>Anything else</dt>
    <dd>Append the input character to the comment token's data. Stay
@@ -74715,12 +74714,9 @@
    <dd>Switch to the <span>comment end state</span></dd>
 
    <dt>EOF</dt>
-   <dd><span>Parse error</span>. Emit the comment token. Reconsume
-   the EOF character in the <span>data state</span>.</dd> <!-- For
-   security reasons: otherwise, hostile user could put a <script> in
-   a comment e.g. in a blog comment and then DOS the server so that
-   the end tag isn't read, and then the commented <script> tag would
-   be treated as live code -->
+   <dd><span>Parse error</span>. Emit the comment token. Reconsume the
+   EOF character in the <span>data state</span>.</dd> <!-- see comment
+   in comment end state -->
 
    <dt>Anything else</dt>
    <dd>Append a U+002D HYPHEN-MINUS (-) character and the input
@@ -74745,6 +74741,10 @@
    (-) character to the comment token's data. Stay in the
    <span>comment end state</span>.</dd>
 
+   <dt>U+0021 EXCLAMATION MARK (!)</dt>
+   <dd><span>Parse error</span>. Switch to the <span>comment end bang
+   state</span>.</dd>
+
    <dt>EOF</dt>
    <dd><span>Parse error</span>. Emit the comment token. Reconsume
    the EOF character in the <span>data state</span>.</dd> <!-- For
@@ -74761,6 +74761,35 @@
   </dl>
 
 
+  <h5><dfn>Comment end bang state</dfn></h5>
+
+  <p>Consume the <span>next input character</span>:</p>
+
+  <dl class="switch">
+
+   <dt>U+003E GREATER-THAN SIGN (>)</dt>
+   <dd>Emit the comment token. Switch to the <span>data
+   state</span>.</dd>
+
+   <dt>U+002D HYPHEN-MINUS (-)</dt>
+   <dd>Append two U+002D HYPHEN-MINUS (-) characters and a U+0021
+   EXCLAMATION MARK (!) character to the comment token's data. Switch
+   to the <span>comment end dash state</span>.</dd>
+
+   <dt>EOF</dt>
+   <dd><span>Parse error</span>. Emit the comment token. Reconsume
+   the EOF character in the <span>data state</span>.</dd> <!-- see
+   comment in comment end state -->
+
+   <dt>Anything else</dt>
+   <dd><span>Parse error</span>. Append two U+002D HYPHEN-MINUS (-)
+   characters, a U+0021 EXCLAMATION MARK (!) character, and the input
+   character to the comment token's data. Switch to the <span>comment
+   state</span>.</dd>
+
+  </dl>
+
+
   <h5><dfn>DOCTYPE state</dfn></h5>
 
   <p>Consume the <span>next input character</span>:</p>