diff --git a/src/parser.h b/src/parser.h index c8e1b61..c04e1e8 100644 --- a/src/parser.h +++ b/src/parser.h @@ -1,70 +1,205 @@ #ifndef __PARSER_H__ #define __PARSER_H__ -/* parser.h -- HTML Parser - - (c) 1998-2007 (W3C) MIT, ERCIM, Keio University - See tidy.h for the copyright notice. - -*/ +/**************************************************************************//** + * @file + * HTML and XML Parsers. + * + * Tidy's HTML parser corrects many conditions and enforces certain user + * preferences during the parsing process. The XML parser produces a tree + * of nodes useful to Tidy but also suitable for use in other XML processing + * applications. + * + * @author HTACG, et al (consult git log) + * + * @copyright + * Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts + * Institute of Technology, European Research Consortium for Informatics + * and Mathematics, Keio University) and HTACG. + * @par + * All Rights Reserved. + * @par + * See `tidy.h` for the complete license. + * + * @date Additional updates: consult git log + * + ******************************************************************************/ #include "forward.h" +/** @addtogroup internal_api */ +/** @{ */ + + +/***************************************************************************//** + ** @defgroup parser_h HTML and XML Parsing + ** + ** These functions and structures form the internal API for document + ** parsing. + ** + ** @{ + ******************************************************************************/ + + +/** + * Is used to perform a node integrity check recursively after parsing + * an HTML or XML document. + * @note Actual performance of this check can be disabled by defining the + * macro NO_NODE_INTEGRITY_CHECK. + * @param node The root node for the integrity check. + * @returns Returns yes or no indicating integrity of the node structure. + */ Bool TY_(CheckNodeIntegrity)(Node *node); + +/** + * Indicates whether or not a text node ends with a space or newline. + * @note Implementation of this method is found in @ref pprint.c for + * some reason. + * @param lexer A reference to the lexer used to lex the document. + * @param node The node to check. + * @returns The result of the check. + */ Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node ); -/* - used to determine how attributes - without values should be printed - this was introduced to deal with - user defined tags e.g. ColdFusion -*/ + +/** + * Used to check if a node uses CM_NEW, which determines how attributes + * without values should be printed. This was introduced to deal with + * user-defined tags e.g. ColdFusion. + * @param node The node to check. + * @returns The result of the check. + */ Bool TY_(IsNewNode)(Node *node); + +/** + * Transforms a given node to another element, for example, from a
+ * to a
.
+ * @param doc The document which the node belongs to.
+ * @param node The node to coerce.
+ * @param TidyTagId The tag type to coerce the node into.
+ * @param obsolete If the old node was obsolete, a report will be generated.
+ * @param expected If the old node was not expected to be found in this
+ * particular location, a report will be generated.
+ */
void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
-/* extract a node and its children from a markup tree */
+
+/**
+ * Extract a node and its children from a markup tree.
+ * @param node The node to remove.
+ * @returns Returns the removed node.
+ */
Node *TY_(RemoveNode)(Node *node);
-/* remove node from markup tree and discard it */
+
+/**
+ * Remove node from markup tree and discard it.
+ * @param doc The Tidy document from which to discarb the node.
+ * @param element The node to discard.
+ * @returns Returns the next node.
+ */
Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
-/* insert node into markup tree as the firt element
- of content of element */
+
+/**
+ * Insert node into markup tree as the firt element of content of element.
+ * @param element The new destination node.
+ * @param node The node to insert.
+ */
void TY_(InsertNodeAtStart)(Node *element, Node *node);
-/* insert node into markup tree as the last element
- of content of "element" */
+
+/**
+ * Insert node into markup tree as the last element of content of element.
+ * @param element The new destination node.
+ * @param node The node to insert.
+ */
void TY_(InsertNodeAtEnd)(Node *element, Node *node);
-/* insert node into markup tree before element */
+
+/**
+ * Insert node into markup tree before element.
+ * @param element The node before which the node is inserted.
+ * @param node The node to insert.
+ */
void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
-/* insert node into markup tree after element */
+
+/**
+ * Insert node into markup tree after element.
+ * @param element The node after which the node is inserted.
+ * @param node The node to insert.
+ */
void TY_(InsertNodeAfterElement)(Node *element, Node *node);
+
+/**
+ * Trims a single, empty element, returning the next node.
+ * @param doc The Tidy document.
+ * @param element The element to trim.
+ * @returns Returns the next node.
+ */
Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
+
+
+/**
+ * Trims a tree of empty elements recursively, returning the next node.
+ * @param doc The Tidy document.
+ * @param element The element to trim.
+ * @returns Returns the next node.
+ */
Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
-/* assumes node is a text node */
+/**
+ * Indicates whether or not a text node is blank, meaning that it consists
+ * of nothing, or a single space.
+ * @param lexer The lexer used to lex the document.
+ * @param node The node to test.
+ * @returns Returns the result of the test.
+ */
Bool TY_(IsBlank)(Lexer *lexer, Node *node);
+
+/**
+ * Indicates whether or not a node is declared as containing javascript
+ * code.
+ * @param node The node to test.
+ * @returns Returns the result of the test.
+ */
Bool TY_(IsJavaScript)(Node *node);
-/*
- HTML is the top level element
-*/
+
+/**
+ * Parses a document after lexing using the HTML parser. It begins by properly
+ * configuring the overall HTML structure, and subsequently processes all
+ * remaining nodes. HTML is the root node.
+ * @param doc The Tidy document.
+ */
void TY_(ParseDocument)( TidyDocImpl* doc );
-
-/*
- XML documents
-*/
+/**
+ * Indicates whether or not whitespace is to be preserved in XHTML/XML
+ * documents.
+ * @param lexer The Tidy document.
+ * @param element The node to test.
+ * @returns Returns the result of the test.
+ */
Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
+
+/**
+ * Parses a document after lexing using the XML parser.
+ * @param doc The Tidy document.
+ */
void TY_(ParseXMLDocument)( TidyDocImpl* doc );
+
+/** @} end parser_h group */
+/** @} end internal_api group */
+
#endif /* __PARSER_H__ */
+