com.opensymphony.module.sitemesh.parser
Class FastPageParser

java.lang.Object
  extended by com.opensymphony.module.sitemesh.parser.FastPageParser
All Implemented Interfaces:
PageParser

public final class FastPageParser
extends Object
implements PageParser

Very fast PageParser implementation for parsing HTML.

Produces FastPage.

Version:
$Revision: 1.10 $
Author:
Victor Salaman

Nested Class Summary
private  class FastPageParser.Tag
           
 
Field Summary
private static int BODY_HASH
           
private static int CONTENT_HASH
           
private static int FRAME_HASH
           
private static int FRAMESET_HASH
           
private static int HEAD_HASH
           
private static int HTML_HASH
           
private static int META_HASH
           
private static int PARAMETER_HASH
           
private static int SLASH_BODY_HASH
           
private static int SLASH_HEAD_HASH
           
private static int SLASH_HTML_HASH
           
private static int SLASH_TITLE_HASH
           
private static int SLASH_XML_HASH
           
private static int SLASH_XMP_HASH
           
private static int STATE_CDATA
           
private static int STATE_COMMENT
           
private static int STATE_DOCTYPE
           
private static int STATE_EOF
           
private static int STATE_SCRIPT
           
private static int STATE_TAG
           
private static int STATE_TAG_QUOTE
           
private static int STATE_TEXT
           
private static int TAG_STATE_BODY
           
private static int TAG_STATE_HEAD
           
private static int TAG_STATE_HTML
           
private static int TAG_STATE_NONE
           
private static int TAG_STATE_TITLE
           
private static int TAG_STATE_XML
           
private static int TAG_STATE_XMP
           
private static int TITLE_HASH
           
private static int TOKEN_CDATA
           
private static int TOKEN_COMMENT
           
private static int TOKEN_DOCTYPE
           
private static int TOKEN_EMPTYTAG
           
private static int TOKEN_EOF
           
private static int TOKEN_NONE
           
private static int TOKEN_SCRIPT
           
private static int TOKEN_TAG
           
private static int TOKEN_TEXT
           
private static int XML_HASH
           
private static int XMP_HASH
           
 
Constructor Summary
FastPageParser()
           
 
Method Summary
private  FastPage internalParse(Reader reader)
           
 Page parse(char[] data)
          This builds a Page.
 Page parse(Reader reader)
           
private static FastPageParser.Tag parseProperties(FastPageParser.Tag tag, CharArray buffer)
          This is called when we need to extract the properties for the tag from the tag's HTML.
private  FastPageParser.Tag parseTag(FastPageParser.Tag tag, CharArray buf)
          Populates a FastPageParser.Tag object using data from the supplied CharArray.
private static boolean shouldWriteToHead(int state, int laststate)
           
private static void writeTag(int state, int laststate, boolean hide, CharArray _head, CharArray _buffer, CharArray _body)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

TOKEN_NONE

private static final int TOKEN_NONE
See Also:
Constant Field Values

TOKEN_EOF

private static final int TOKEN_EOF
See Also:
Constant Field Values

TOKEN_TEXT

private static final int TOKEN_TEXT
See Also:
Constant Field Values

TOKEN_TAG

private static final int TOKEN_TAG
See Also:
Constant Field Values

TOKEN_COMMENT

private static final int TOKEN_COMMENT
See Also:
Constant Field Values

TOKEN_CDATA

private static final int TOKEN_CDATA
See Also:
Constant Field Values

TOKEN_SCRIPT

private static final int TOKEN_SCRIPT
See Also:
Constant Field Values

TOKEN_DOCTYPE

private static final int TOKEN_DOCTYPE
See Also:
Constant Field Values

TOKEN_EMPTYTAG

private static final int TOKEN_EMPTYTAG
See Also:
Constant Field Values

STATE_EOF

private static final int STATE_EOF
See Also:
Constant Field Values

STATE_TEXT

private static final int STATE_TEXT
See Also:
Constant Field Values

STATE_TAG

private static final int STATE_TAG
See Also:
Constant Field Values

STATE_COMMENT

private static final int STATE_COMMENT
See Also:
Constant Field Values

STATE_TAG_QUOTE

private static final int STATE_TAG_QUOTE
See Also:
Constant Field Values

STATE_CDATA

private static final int STATE_CDATA
See Also:
Constant Field Values

STATE_SCRIPT

private static final int STATE_SCRIPT
See Also:
Constant Field Values

STATE_DOCTYPE

private static final int STATE_DOCTYPE
See Also:
Constant Field Values

TAG_STATE_NONE

private static final int TAG_STATE_NONE
See Also:
Constant Field Values

TAG_STATE_HTML

private static final int TAG_STATE_HTML
See Also:
Constant Field Values

TAG_STATE_HEAD

private static final int TAG_STATE_HEAD
See Also:
Constant Field Values

TAG_STATE_TITLE

private static final int TAG_STATE_TITLE
See Also:
Constant Field Values

TAG_STATE_BODY

private static final int TAG_STATE_BODY
See Also:
Constant Field Values

TAG_STATE_XML

private static final int TAG_STATE_XML
See Also:
Constant Field Values

TAG_STATE_XMP

private static final int TAG_STATE_XMP
See Also:
Constant Field Values

SLASH_XML_HASH

private static final int SLASH_XML_HASH
See Also:
Constant Field Values

XML_HASH

private static final int XML_HASH
See Also:
Constant Field Values

SLASH_XMP_HASH

private static final int SLASH_XMP_HASH
See Also:
Constant Field Values

XMP_HASH

private static final int XMP_HASH
See Also:
Constant Field Values

HTML_HASH

private static final int HTML_HASH
See Also:
Constant Field Values

SLASH_HTML_HASH

private static final int SLASH_HTML_HASH
See Also:
Constant Field Values

HEAD_HASH

private static final int HEAD_HASH
See Also:
Constant Field Values

TITLE_HASH

private static final int TITLE_HASH
See Also:
Constant Field Values

SLASH_TITLE_HASH

private static final int SLASH_TITLE_HASH
See Also:
Constant Field Values

PARAMETER_HASH

private static final int PARAMETER_HASH
See Also:
Constant Field Values

META_HASH

private static final int META_HASH
See Also:
Constant Field Values

SLASH_HEAD_HASH

private static final int SLASH_HEAD_HASH
See Also:
Constant Field Values

FRAMESET_HASH

private static final int FRAMESET_HASH
See Also:
Constant Field Values

FRAME_HASH

private static final int FRAME_HASH
See Also:
Constant Field Values

BODY_HASH

private static final int BODY_HASH
See Also:
Constant Field Values

SLASH_BODY_HASH

private static final int SLASH_BODY_HASH
See Also:
Constant Field Values

CONTENT_HASH

private static final int CONTENT_HASH
See Also:
Constant Field Values
Constructor Detail

FastPageParser

public FastPageParser()
Method Detail

parse

public Page parse(char[] data)
           throws IOException
Description copied from interface: PageParser
This builds a Page.

Specified by:
parse in interface PageParser
Throws:
IOException

parse

public Page parse(Reader reader)

internalParse

private FastPage internalParse(Reader reader)

writeTag

private static void writeTag(int state,
                             int laststate,
                             boolean hide,
                             CharArray _head,
                             CharArray _buffer,
                             CharArray _body)

shouldWriteToHead

private static boolean shouldWriteToHead(int state,
                                         int laststate)

parseTag

private FastPageParser.Tag parseTag(FastPageParser.Tag tag,
                                    CharArray buf)
Populates a FastPageParser.Tag object using data from the supplied CharArray. The supplied tag parameter is reset and reused - this avoids excess object creation which hwlps performance.

Returns:
the same tag instance that was passed in, except it will be populated with a new name value (and the corresponding nameEndIdx value). However if the tag contained nathing but whitespace, this method will return null.

parseProperties

private static FastPageParser.Tag parseProperties(FastPageParser.Tag tag,
                                                  CharArray buffer)
This is called when we need to extract the properties for the tag from the tag's HTML. We only call this when necessary since it has quite a lot of overhead.

Parameters:
tag - the tag that is currently being processed. This should be the tag that was returned as a result of a call to parseTag(FastPageParser.Tag, CharArray) (ie, it has the name and nameEndIdx fields set correctly for the tag in question. The properties field can be in an undefined state - it will get replaced regardless).
buffer - a CharArray containing the entire tag that is being parsed.
Returns:
the same tag instance that was passed in, only it will now be populated with any properties that were specified in the tag's HTML.

www.opensymphony.com/sitemesh/