This commit is contained in:
Michael[tm] Smith 2011-11-17 11:44:16 +09:00
commit b92d7aab88
618 changed files with 80398 additions and 0 deletions

236
build/gmake/Makefile Normal file
View file

@ -0,0 +1,236 @@
# Makefile - for tidy - HTML parser and pretty printer
#
# CVS Info :
#
# $Author: arnaud02 $
# $Date: 2008/03/22 21:13:38 $
# $Revision: 1.37 $
#
# Copyright (c) 1998-2008 World Wide Web Consortium
# (Massachusetts Institute of Technology, European Research
# Consortium for Informatics and Mathematics, Keio University).
# All Rights Reserved.
#
# Contributing Author(s):
#
# Dave Raggett <dsr@w3.org>
# Terry Teague <terry_teague@users.sourceforge.net>
# Pradeep Padala<ppadala@users.sourceforge.net>
#
# The contributing author(s) would like to thank all those who
# helped with testing, bug fixes, and patience. This wouldn't
# have been possible without all of you.
#
# COPYRIGHT NOTICE:
#
# This software and documentation is provided "as is," and
# the copyright holders and contributing author(s) make no
# representations or warranties, express or implied, including
# but not limited to, warranties of merchantability or fitness
# for any particular purpose or that the use of the software or
# documentation will not infringe any third party patents,
# copyrights, trademarks or other rights.
#
# The copyright holders and contributing author(s) will not be
# liable for any direct, indirect, special or consequential damages
# arising out of any use of the software or documentation, even if
# advised of the possibility of such damage.
#
# Permission is hereby granted to use, copy, modify, and distribute
# this source code, or portions hereof, documentation and executables,
# for any purpose, without fee, subject to the following restrictions:
#
# 1. The origin of this source code must not be misrepresented.
# 2. Altered versions must be plainly marked as such and must
# not be misrepresented as being the original source.
# 3. This Copyright notice may not be removed or altered from any
# source or altered source distribution.
#
# The copyright holders and contributing author(s) specifically
# permit, without fee, and encourage the use of this source code
# as a component for supporting the Hypertext Markup Language in
# commercial products. If you use this source code in a product,
# acknowledgment is not required but would be appreciated.
#
SHELL=/bin/sh
PROJECT=tidy
# Installation variables. Spaces OK, only dir create and file copy operations.
runinst_prefix=/usr/local
devinst_prefix=/usr/local
bininst = ${runinst_prefix}/bin
libinst = ${devinst_prefix}/lib
incinst = ${devinst_prefix}/include/$(PROJECT)
maninst = ${devinst_prefix}/man
# Internal variables. - No spaces allowed: libtool chokes on spaces in directory names.
TOPDIR = ../..
INCDIR = ${TOPDIR}/include
APPDIR = ${TOPDIR}/console
SRCDIR = ${TOPDIR}/src
OBJDIR = ./obj
LIBDIR = ${TOPDIR}/lib
BINDIR = ${TOPDIR}/bin
DOCDIR = ${TOPDIR}/htmldoc
# Note about shared library and exported symbols:
# With gcc, one can control the exported symbols by either using
# "-fvisibility=hidden -DTIDY_EXPORT='__attribute__((visibility("default")))'"
# or using a linker map (see GNU ld "--version-script").
# Lookup based on hash table can be disabled with
# "-DELEMENT_HASH_LOOKUP=0 -DATTRIBUTE_HASH_LOOKUP=0"
# Memory mapped i/o can be disabled with -DSUPPORT_POSIX_MAPPED_FILES=0
#
# CFLAGS etc..
# For optimised builds, flags such as "-O2" should be added and -D_DEBUG=1
# disabled.
CC= gcc
CFLAGS= -g -pedantic -Wall -I $(INCDIR)
# flags only supported with gcc 3.x
CFLAGS += -Wunused-parameter
OTHERCFLAGS=
OTHERCFLAGS+= -D_DEBUG=1
# OTHERCFLAGS+= -fvisibility=hidden -DTIDY_EXPORT='__attribute__((visibility("default")))'
ifdef SUPPORT_UTF16_ENCODINGS
CFLAGS += -DSUPPORT_UTF16_ENCODINGS=$(SUPPORT_UTF16_ENCODINGS)
endif
ifdef SUPPORT_ASIAN_ENCODINGS
CFLAGS += -DSUPPORT_ASIAN_ENCODINGS=$(SUPPORT_ASIAN_ENCODINGS)
endif
ifdef SUPPORT_ACCESSIBILITY_CHECKS
CFLAGS += -DSUPPORT_ACCESSIBILITY_CHECKS=$(SUPPORT_ACCESSIBILITY_CHECKS)
endif
DEBUGFLAGS=-g
ifdef DMALLOC
DEBUGFLAGS += -DDMALLOC
endif
LIBS=
DEBUGLIBS=-ldmalloc
# Tidy lib related variables
TIDY_MAJOR = 1
TIDY_MINOR = 0
# This will come from autoconf again
LIBPREFIX = lib
LIBSUFFIX = .a
OBJSUF = .o
LIBRARY = $(LIBDIR)/$(LIBPREFIX)$(PROJECT)$(LIBSUFFIX)
AR=ar -r
XSLTPROC = xsltproc
EXES = $(BINDIR)/$(PROJECT) $(BINDIR)/tab2space
DOCS = $(DOCDIR)/quickref.html $(DOCDIR)/tidy.1
CONFIGXML = $(DOCDIR)/tidy-config.xml
HELPXML = $(DOCDIR)/tidy-help.xml
OBJFILES=\
$(OBJDIR)/access$(OBJSUF) $(OBJDIR)/attrs$(OBJSUF) $(OBJDIR)/istack$(OBJSUF) \
$(OBJDIR)/parser$(OBJSUF) $(OBJDIR)/tags$(OBJSUF) $(OBJDIR)/entities$(OBJSUF) \
$(OBJDIR)/lexer$(OBJSUF) $(OBJDIR)/pprint$(OBJSUF) $(OBJDIR)/clean$(OBJSUF) \
$(OBJDIR)/localize$(OBJSUF) $(OBJDIR)/config$(OBJSUF) $(OBJDIR)/alloc$(OBJSUF) \
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF)
CFILES= \
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
$(SRCDIR)/parser.c $(SRCDIR)/tags.c $(SRCDIR)/entities.c \
$(SRCDIR)/lexer.c $(SRCDIR)/pprint.c $(SRCDIR)/clean.c \
$(SRCDIR)/localize.c $(SRCDIR)/config.c $(SRCDIR)/alloc.c \
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
$(INCDIR)/buffio.h
LIBHFILES= \
$(SRCDIR)/access.h $(SRCDIR)/attrs.h $(SRCDIR)/attrdict.h \
$(SRCDIR)/clean.h $(SRCDIR)/config.h $(SRCDIR)/entities.h \
$(SRCDIR)/fileio.h $(SRCDIR)/forward.h $(SRCDIR)/lexer.h \
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
$(SRCDIR)/version.h
all: $(LIBRARY) $(EXES)
doc: $(DOCS)
$(LIBRARY): $(OBJFILES)
if [ ! -d $(LIBDIR) ]; then mkdir $(LIBDIR); fi
$(AR) $@ $(OBJFILES)
ifdef RANLIB
$(RANLIB) $@
endif
$(OBJDIR)/%$(OBJSUF): $(SRCDIR)/%.c $(HFILES) $(LIBHFILES) Makefile
if [ ! -d $(OBJDIR) ]; then mkdir $(OBJDIR); fi
$(CC) $(CFLAGS) $(OTHERCFLAGS) -o $@ -c $<
$(BINDIR)/$(PROJECT): $(APPDIR)/tidy.c $(HFILES) $(LIBRARY)
if [ ! -d $(BINDIR) ]; then mkdir $(BINDIR); fi
$(CC) $(CFLAGS) $(OTHERCFLAGS) -o $@ $(APPDIR)/tidy.c -I$(INCDIR) $(LIBRARY)
$(BINDIR)/tab2space: $(APPDIR)/tab2space.c
if [ ! -d $(BINDIR) ]; then mkdir $(BINDIR); fi
$(CC) $(CFLAGS) $(OTHERCFLAGS) -o $@ $(APPDIR)/tab2space.c $(LIBS)
$(HELPXML): $(BINDIR)/$(PROJECT)
$(BINDIR)/$(PROJECT) -xml-help > $@
$(CONFIGXML): $(BINDIR)/$(PROJECT)
$(BINDIR)/$(PROJECT) -xml-config > $@
$(DOCDIR)/quickref.html: $(DOCDIR)/quickref-html.xsl $(CONFIGXML)
$(XSLTPROC) -o $@ $(DOCDIR)/quickref-html.xsl $(CONFIGXML)
$(DOCDIR)/tidy.1: $(DOCDIR)/tidy1.xsl $(HELPXML) $(CONFIGXML)
$(XSLTPROC) -o $@ $(DOCDIR)/tidy1.xsl $(HELPXML)
debug:
@$(MAKE) CFLAGS='$(CFLAGS) $(DEBUGFLAGS)' LIBS='$(LIBS) $(DEBUGLIBS)' all
clean:
rm -f $(OBJFILES) $(EXES) $(LIBRARY) $(DOCS) $(HELPXML) $(CONFIGXML) $(OBJDIR)/*.lo
if [ -d $(OBJDIR)/.libs ]; then rmdir $(OBJDIR)/.libs; fi
if [ -d $(LIBDIR)/.libs ]; then rmdir $(LIBDIR)/.libs; fi
if [ "$(OBJDIR)" != "$(TOPDIR)" -a -d $(OBJDIR) ]; then rmdir $(OBJDIR); fi
if [ "$(LIBDIR)" != "$(TOPDIR)" -a -d $(LIBDIR) ]; then rmdir $(LIBDIR); fi
if [ "$(BINDIR)" != "$(TOPDIR)" -a -d $(BINDIR) ]; then rmdir $(BINDIR); fi
installhdrs: $(HFILES)
if [ ! -d "$(incinst)" ]; then mkdir -p "$(incinst)"; fi
cp -f $(HFILES) "$(incinst)/"
installib: $(LIBRARY)
if [ ! -d "$(libinst)" ]; then mkdir -p "$(libinst)"; fi
cp -f $(LIBRARY) "$(libinst)/"
installexes: $(EXES)
if [ ! -d "$(bininst)" ]; then mkdir -p "$(bininst)"; fi
cp -f $(EXES) "$(bininst)/"
installmanpage: $(DOCDIR)/tidy.1
if [ ! -d "$(maninst)/man1" ]; then mkdir -p "$(maninst)/man1"; fi;
cp -f $(DOCDIR)/tidy.1 "$(maninst)/man1/tidy.1";
install: installhdrs installib installexes installmanpage

16
build/gmake/readme.txt Normal file
View file

@ -0,0 +1,16 @@
This Makefile works on most Unix platforms. Although, by default, it
runs gcc, by setting the CC macro, it runs with many C compilers.
You can override the default build options by setting environment
variables of the same name as the corresponding macro: DMALLOC,
SUPPORT_ACCESSIBILITY_CHECKS, SUPPORT_UTF16_ENCODINGS and
SUPPORT_ASIAN_ENCODINGS.
$ DMALLOC=1 gmake
Note this Makefile will only run with gmake. But you should be able
to easily locate a pre-built executable for your platform.
To customize the location of output files or install locations, just
edit the Makefile. There are variable definitions for just about
everything, so you shouldn't have to alter the build rules.

63
build/gnuauto/Makefile.am Normal file
View file

@ -0,0 +1,63 @@
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
#
# CVS Info :
#
# $Author: creitzel $
# $Date: 2003/03/19 18:37:37 $
# $Revision: 1.3 $
#
# Copyright (c) 1998-2003 World Wide Web Consortium
# (Massachusetts Institute of Technology, European Research
# Consortium for Informatics and Mathematics, Keio University).
# All Rights Reserved.
#
# Contributing Author(s):
#
# Dave Raggett <dsr@w3.org>
# Terry Teague <terry_teague@users.sourceforge.net>
# Pradeep Padala<ppadala@users.sourceforge.net>
#
# The contributing author(s) would like to thank all those who
# helped with testing, bug fixes, and patience. This wouldn't
# have been possible without all of you.
#
# COPYRIGHT NOTICE:
#
# This software and documentation is provided "as is," and
# the copyright holders and contributing author(s) make no
# representations or warranties, express or implied, including
# but not limited to, warranties of merchantability or fitness
# for any particular purpose or that the use of the software or
# documentation will not infringe any third party patents,
# copyrights, trademarks or other rights.
#
# The copyright holders and contributing author(s) will not be
# liable for any direct, indirect, special or consequential damages
# arising out of any use of the software or documentation, even if
# advised of the possibility of such damage.
#
# Permission is hereby granted to use, copy, modify, and distribute
# this source code, or portions hereof, documentation and executables,
# for any purpose, without fee, subject to the following restrictions:
#
# 1. The origin of this source code must not be misrepresented.
# 2. Altered versions must be plainly marked as such and must
# not be misrepresented as being the original source.
# 3. This Copyright notice may not be removed or altered from any
# source or altered source distribution.
#
# The copyright holders and contributing author(s) specifically
# permit, without fee, and encourage the use of this source code
# as a component for supporting the Hypertext Markup Language in
# commercial products. If you use this source code in a product,
# acknowledgment is not required but would be appreciated.
#
SUBDIRS = src console include
#TODO: Pull man page from htmldoc
#installmanpage:
# if [ -f "$(TOPDIR)/htmldoc/man_page.txt" ] ; then \
# if [ ! -d "$(maninst)/man1" ]; then mkdir -p "$(maninst)/man1"; fi; \
# cp -f $(TOPDIR)/htmldoc/man_page.txt "$(maninst)/man1/tidy.1"; \
# fi

133
build/gnuauto/configure.in Normal file
View file

@ -0,0 +1,133 @@
# configure.in - HTML TidyLib GNU autoconf input file
#
# Copyright (c) 2003-2004 World Wide Web Consortium
# (Massachusetts Institute of Technology, European Research
# Consortium for Informatics and Mathematics, Keio University).
# All Rights Reserved.
#
# CVS Info :
#
# $Author: arnaud02 $
# $Date: 2008/03/24 21:08:16 $
# $Revision: 1.4 $
#
AC_INIT([include/tidy.h])
# Making releases:
#
# TIDY_MICRO_VERSION += 1;
# TIDY_INTERFACE_AGE += 1;
# TIDY_BINARY_AGE += 1;
#
# if any functions have been added, set TIDY_INTERFACE_AGE to 0.
# if backwards compatibility has been broken,
# set TIDY_BINARY_AGE and TIDY_INTERFACE_AGE to 0.
#
TIDY_MAJOR_VERSION=0
TIDY_MINOR_VERSION=99
TIDY_MICRO_VERSION=0
TIDY_INTERFACE_AGE=0
TIDY_BINARY_AGE=0
LIBTIDY_VERSION=$TIDY_MAJOR_VERSION.$TIDY_MINOR_VERSION.$TIDY_MICRO_VERSION
AC_SUBST(LIBTIDY_VERSION)
# libtool versioning
#
LT_RELEASE=$TIDY_MAJOR_VERSION.$TIDY_MINOR_VERSION
LT_CURRENT=`expr $TIDY_MICRO_VERSION - $TIDY_INTERFACE_AGE`
LT_REVISION=$TIDY_INTERFACE_AGE
LT_AGE=`expr $TIDY_BINARY_AGE - $TIDY_INTERFACE_AGE`
AC_SUBST(LT_RELEASE)
AC_SUBST(LT_CURRENT)
AC_SUBST(LT_REVISION)
AC_SUBST(LT_AGE)
AM_INIT_AUTOMAKE(tidy,$LIBTIDY_VERSION)
# Checks for programs.
# =============================================
# AC_PROG_CC has a habit of adding -g to CFLAGS
#
save_cflags="$CFLAGS"
AC_PROG_CC
if test "x$GCC" = "xyes"; then
WARNING_CFLAGS="-Wall"
else
WARNING_CFLAGS=""
fi
AC_SUBST(WARNING_CFLAGS)
debug_build=no
AC_ARG_ENABLE(debug,[ --enable-debug add -g (instead of -O2) to CFLAGS],[
if test "x$enableval" = "xyes"; then
debug_build=yes
fi
])
if test $debug_build = yes; then
CFLAGS="$save_cflags -g"
else
CFLAGS="-O2 $save_cflags"
fi
#
# =============================================
AC_PROG_CPP
AC_PROG_CXX
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_LIBTOOL
AC_PROG_MAKE_SET
support_access=yes
AC_ARG_ENABLE(access,[ --enable-access support accessibility checks],[
if test "x$enableval" = "xno"; then
support_access=no
fi
])
if test $support_access = yes; then
AC_DEFINE(SUPPORT_ACCESSIBILITY_CHECKS,1)
else
AC_DEFINE(SUPPORT_ACCESSIBILITY_CHECKS,0)
fi
support_utf16=yes
AC_ARG_ENABLE(utf16,[ --enable-utf16 support UTF-16 encoding],[
if test "x$enableval" = "xno"; then
support_utf16=no
fi
])
if test $support_utf16 = yes; then
AC_DEFINE(SUPPORT_UTF16_ENCODINGS,1)
else
AC_DEFINE(SUPPORT_UTF16_ENCODINGS,0)
fi
support_asian=yes
AC_ARG_ENABLE(asian,[ --enable-asian support asian encodings],[
if test "x$enableval" = "xno"; then
support_asian=no
fi
])
if test $support_asian = yes; then
AC_DEFINE(SUPPORT_ASIAN_ENCODINGS,1)
else
AC_DEFINE(SUPPORT_ASIAN_ENCODINGS,0)
fi
# TODO: this defines "WITH_DMALLOC" but tidy expects "DMALLOC"
# need to do: #if defined(DMALLOC) || defined(WITH_DMALLOC)
#
AM_WITH_DMALLOC
AC_OUTPUT([
Makefile
src/Makefile
console/Makefile
include/Makefile
])

View file

@ -0,0 +1,64 @@
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
#
# CVS Info :
#
# $Author: arnaud02 $
# $Date: 2008/03/17 12:49:40 $
# $Revision: 1.3 $
#
# Copyright (c) 1998-2008 World Wide Web Consortium
# (Massachusetts Institute of Technology, European Research
# Consortium for Informatics and Mathematics, Keio University).
# All Rights Reserved.
#
# Contributing Author(s):
#
# Dave Raggett <dsr@w3.org>
# Terry Teague <terry_teague@users.sourceforge.net>
# Pradeep Padala<ppadala@users.sourceforge.net>
#
# The contributing author(s) would like to thank all those who
# helped with testing, bug fixes, and patience. This wouldn't
# have been possible without all of you.
#
# COPYRIGHT NOTICE:
#
# This software and documentation is provided "as is," and
# the copyright holders and contributing author(s) make no
# representations or warranties, express or implied, including
# but not limited to, warranties of merchantability or fitness
# for any particular purpose or that the use of the software or
# documentation will not infringe any third party patents,
# copyrights, trademarks or other rights.
#
# The copyright holders and contributing author(s) will not be
# liable for any direct, indirect, special or consequential damages
# arising out of any use of the software or documentation, even if
# advised of the possibility of such damage.
#
# Permission is hereby granted to use, copy, modify, and distribute
# this source code, or portions hereof, documentation and executables,
# for any purpose, without fee, subject to the following restrictions:
#
# 1. The origin of this source code must not be misrepresented.
# 2. Altered versions must be plainly marked as such and must
# not be misrepresented as being the original source.
# 3. This Copyright notice may not be removed or altered from any
# source or altered source distribution.
#
# The copyright holders and contributing author(s) specifically
# permit, without fee, and encourage the use of this source code
# as a component for supporting the Hypertext Markup Language in
# commercial products. If you use this source code in a product,
# acknowledgment is not required but would be appreciated.
#
AM_CFLAGS = @CFLAGS@ @WARNING_CFLAGS@
INCLUDES = -I$(top_srcdir)/include
bin_PROGRAMS = tidy tab2space
tidy_LDADD = $(top_builddir)/src/libtidy.la
tab2space_LDADD = $(top_builddir)/src/libtidy.la

View file

@ -0,0 +1,61 @@
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
#
# CVS Info :
#
# $Author: arnaud02 $
# $Date: 2006/10/06 09:25:13 $
# $Revision: 1.3 $
#
# Copyright (c) 1998-2006 World Wide Web Consortium
# (Massachusetts Institute of Technology, European Research
# Consortium for Informatics and Mathematics, Keio University).
# All Rights Reserved.
#
# Contributing Author(s):
#
# Dave Raggett <dsr@w3.org>
# Terry Teague <terry_teague@users.sourceforge.net>
# Pradeep Padala<ppadala@users.sourceforge.net>
#
# The contributing author(s) would like to thank all those who
# helped with testing, bug fixes, and patience. This wouldn't
# have been possible without all of you.
#
# COPYRIGHT NOTICE:
#
# This software and documentation is provided "as is," and
# the copyright holders and contributing author(s) make no
# representations or warranties, express or implied, including
# but not limited to, warranties of merchantability or fitness
# for any particular purpose or that the use of the software or
# documentation will not infringe any third party patents,
# copyrights, trademarks or other rights.
#
# The copyright holders and contributing author(s) will not be
# liable for any direct, indirect, special or consequential damages
# arising out of any use of the software or documentation, even if
# advised of the possibility of such damage.
#
# Permission is hereby granted to use, copy, modify, and distribute
# this source code, or portions hereof, documentation and executables,
# for any purpose, without fee, subject to the following restrictions:
#
# 1. The origin of this source code must not be misrepresented.
# 2. Altered versions must be plainly marked as such and must
# not be misrepresented as being the original source.
# 3. This Copyright notice may not be removed or altered from any
# source or altered source distribution.
#
# The copyright holders and contributing author(s) specifically
# permit, without fee, and encourage the use of this source code
# as a component for supporting the Hypertext Markup Language in
# commercial products. If you use this source code in a product,
# acknowledgment is not required but would be appreciated.
#
#tidyincdir = $(includedir)/tidy
tidyincdir = $(includedir)
tidyinc_HEADERS = \
platform.h \
tidy.h tidyenum.h buffio.h

24
build/gnuauto/readme.txt Normal file
View file

@ -0,0 +1,24 @@
To use GNU "Auto" tools (AutoConf/AutoMake/LibTool), run
/bin/sh build/gnuauto/setup.sh from the top-level Tidy
directory. This script will copy the appropriate
Makefile.am files into each source directory, along with
configure.in.
If the script was successful you should now be able
to build in the usual way:
$ ./configure --prefix=/usr
$ make
$ make install
to get a list of configure options type: ./configure --help
Alternatively, you should be able to build outside of the source
tree. e.g.:
$ mkdir ../build-tidy
$ cd ../build-tidy
$ ../tidy/configure --prefix=/usr
$ make
$ make install

56
build/gnuauto/setup.sh Normal file
View file

@ -0,0 +1,56 @@
#!/bin/sh
if ! test -f build/gnuauto/setup.sh; then
echo ""
echo "* * * Execute this script from the top source directory, e.g.:"
echo ""
echo " $ /bin/sh build/gnuauto/setup.sh"
echo ""
else
for i in libtoolize glibtoolize
do
( $i --version) < /dev/null > /dev/null 2>&1 &&
LIBTOOLIZE=$i
done
if test -z "$LIBTOOLIZE" ; then
echo "You need libtoolize to continue"
exit 1;
fi
top_srcdir=`pwd`
echo ""
echo "Generating the build system in $top_srcdir"
echo ""
echo "copying files into place: cd build/gnuauto && cp -R -f * $top_srcdir"
(cd build/gnuauto && cp -R -f * $top_srcdir)
echo "running: $LIBTOOLIZE --force --copy"
$LIBTOOLIZE --force --copy
echo "running: aclocal"
aclocal
echo "running: automake -a -c --foreign"
automake -a -c --foreign
echo "running: autoconf"
autoconf
echo ""
echo "If the above commands were successful you should now be able"
echo "to build in the usual way:"
echo ""
echo " $ ./configure --prefix=/usr"
echo " $ make"
echo " $ make install"
echo ""
echo "to get a list of configure options type: ./configure --help"
echo ""
echo "Alternatively, you should be able to build outside of the source"
echo "tree. e.g.:"
echo ""
echo " $ mkdir ../build-tidy"
echo " $ cd ../build-tidy"
echo " $ ../tidy/configure --prefix=/usr"
echo " $ make"
echo " $ make install"
echo ""
fi

View file

@ -0,0 +1,81 @@
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
#
# CVS Info :
#
# $Author: arnaud02 $
# $Date: 2008/03/17 12:49:41 $
# $Revision: 1.8 $
#
# Copyright (c) 1998-2008 World Wide Web Consortium
# (Massachusetts Institute of Technology, European Research
# Consortium for Informatics and Mathematics, Keio University).
# All Rights Reserved.
#
# Contributing Author(s):
#
# Dave Raggett <dsr@w3.org>
# Terry Teague <terry_teague@users.sourceforge.net>
# Pradeep Padala<ppadala@users.sourceforge.net>
#
# The contributing author(s) would like to thank all those who
# helped with testing, bug fixes, and patience. This wouldn't
# have been possible without all of you.
#
# COPYRIGHT NOTICE:
#
# This software and documentation is provided "as is," and
# the copyright holders and contributing author(s) make no
# representations or warranties, express or implied, including
# but not limited to, warranties of merchantability or fitness
# for any particular purpose or that the use of the software or
# documentation will not infringe any third party patents,
# copyrights, trademarks or other rights.
#
# The copyright holders and contributing author(s) will not be
# liable for any direct, indirect, special or consequential damages
# arising out of any use of the software or documentation, even if
# advised of the possibility of such damage.
#
# Permission is hereby granted to use, copy, modify, and distribute
# this source code, or portions hereof, documentation and executables,
# for any purpose, without fee, subject to the following restrictions:
#
# 1. The origin of this source code must not be misrepresented.
# 2. Altered versions must be plainly marked as such and must
# not be misrepresented as being the original source.
# 3. This Copyright notice may not be removed or altered from any
# source or altered source distribution.
#
# The copyright holders and contributing author(s) specifically
# permit, without fee, and encourage the use of this source code
# as a component for supporting the Hypertext Markup Language in
# commercial products. If you use this source code in a product,
# acknowledgment is not required but would be appreciated.
#
AM_CFLAGS = @CFLAGS@ @WARNING_CFLAGS@
INCLUDES = -I$(top_srcdir)/include
lib_LTLIBRARIES = libtidy.la
libtidy_la_SOURCES = \
access.c attrs.c istack.c parser.c \
tags.c entities.c lexer.c pprint.c \
clean.c localize.c config.c alloc.c \
attrask.c attrdict.c attrget.c buffio.c \
fileio.c streamio.c tagask.c tmbstr.c \
utf8.c tidylib.c mappedio.c
libtidy_la_LDFLAGS = \
-version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) \
-release $(LT_RELEASE) -no-undefined -export-dynamic
HFILES = \
access.h attrdict.h attrs.h clean.h \
config.h entities.h fileio.h forward.h \
lexer.h mappedio.h message.h parser.h \
pprint.h streamio.h tags.h tmbstr.h \
utf8.h tidy-int.h version.h
EXTRA_DIST = $(HFILES)

304
build/msvc/tidy.def Executable file
View file

@ -0,0 +1,304 @@
LIBRARY libtidy
EXPORTS
tidyCreate @1001
tidyRelease @1002
tidySetAppData @1003
tidyGetAppData @1004
tidyReleaseDate @1005
tidyStatus @1006
tidyDetectedHtmlVersion @1007
tidyDetectedXhtml @1008
tidyDetectedGenericXml @1009
tidyErrorCount @1010
tidyWarningCount @1011
tidyAccessWarningCount @1012
tidyConfigErrorCount @1013
tidyLoadConfig @1014
tidyLoadConfigEnc @1015
tidyFileExists @1016
tidySetCharEncoding @1017
tidySetInCharEncoding @1018
tidySetOutCharEncoding @1019
tidySetOptionCallback @1020
tidyOptGetIdForName @1021
tidyGetOptionList @1022
tidyGetNextOption @1023
tidyGetOption @1024
tidyGetOptionByName @1025
tidyOptGetId @1026
tidyOptGetName @1027
tidyOptGetType @1028
tidyOptIsReadOnly @1029
tidyOptGetCategory @1030
tidyOptGetDefault @1031
tidyOptGetDefaultInt @1032
tidyOptGetDefaultBool @1033
tidyOptGetPickList @1034
tidyOptGetNextPick @1035
tidyOptGetValue @1036
tidyOptSetValue @1037
tidyOptParseValue @1038
tidyOptGetInt @1039
tidyOptSetInt @1040
tidyOptGetBool @1041
tidyOptSetBool @1042
tidyOptResetToDefault @1043
tidyOptResetAllToDefault @1044
tidyOptSnapshot @1045
tidyOptResetToSnapshot @1046
tidyOptDiffThanDefault @1047
tidyOptDiffThanSnapshot @1048
tidyOptCopyConfig @1049
tidyOptGetEncName @1050
tidyOptGetCurrPick @1051
tidyOptGetDeclTagList @1052
tidyOptGetNextDeclTag @1053
tidyOptGetDoc @1054
tidyOptGetDocLinksList @1055
tidyOptGetNextDocLinks @1056
tidyInitSource @1057
tidyGetByte @1058
tidyUngetByte @1059
tidyIsEOF @1060
tidyInitSink @1061
tidyPutByte @1062
tidySetReportFilter @1063
tidySetErrorFile @1064
tidySetErrorBuffer @1065
tidySetErrorSink @1066
tidySetMallocCall @1067
tidySetReallocCall @1068
tidySetFreeCall @1069
tidySetPanicCall @1070
tidyParseFile @1071
tidyParseStdin @1072
tidyParseString @1073
tidyParseBuffer @1074
tidyParseSource @1075
tidyCleanAndRepair @1076
tidyRunDiagnostics @1077
tidySaveFile @1078
tidySaveStdout @1079
tidySaveBuffer @1080
tidySaveString @1081
tidySaveSink @1082
tidyOptSaveFile @1083
tidyOptSaveSink @1084
tidyErrorSummary @1085
tidyGeneralInfo @1086
tidyGetRoot @1087
tidyGetHtml @1088
tidyGetHead @1089
tidyGetBody @1090
tidyGetParent @1091
tidyGetChild @1092
tidyGetNext @1093
tidyGetPrev @1094
tidyAttrFirst @1095
tidyAttrNext @1096
tidyAttrName @1097
tidyAttrValue @1098
tidyNodeGetType @1099
tidyNodeGetName @1100
tidyNodeIsText @1101
tidyNodeIsProp @1102
tidyNodeIsHeader @1103
tidyNodeHasText @1104
tidyNodeGetText @1105
tidyNodeGetId @1106
tidyNodeLine @1107
tidyNodeColumn @1108
tidyNodeIsHTML @1109
tidyNodeIsHEAD @1110
tidyNodeIsTITLE @1111
tidyNodeIsBASE @1112
tidyNodeIsMETA @1113
tidyNodeIsBODY @1114
tidyNodeIsFRAMESET @1115
tidyNodeIsFRAME @1116
tidyNodeIsIFRAME @1117
tidyNodeIsNOFRAMES @1118
tidyNodeIsHR @1119
tidyNodeIsH1 @1120
tidyNodeIsH2 @1121
tidyNodeIsPRE @1122
tidyNodeIsLISTING @1123
tidyNodeIsP @1124
tidyNodeIsUL @1125
tidyNodeIsOL @1126
tidyNodeIsDL @1127
tidyNodeIsDIR @1128
tidyNodeIsLI @1129
tidyNodeIsDT @1130
tidyNodeIsDD @1131
tidyNodeIsTABLE @1132
tidyNodeIsCAPTION @1133
tidyNodeIsTD @1134
tidyNodeIsTH @1135
tidyNodeIsTR @1136
tidyNodeIsCOL @1137
tidyNodeIsCOLGROUP @1138
tidyNodeIsBR @1139
tidyNodeIsA @1140
tidyNodeIsLINK @1141
tidyNodeIsB @1142
tidyNodeIsI @1143
tidyNodeIsSTRONG @1144
tidyNodeIsEM @1145
tidyNodeIsBIG @1146
tidyNodeIsSMALL @1147
tidyNodeIsPARAM @1148
tidyNodeIsOPTION @1149
tidyNodeIsOPTGROUP @1150
tidyNodeIsIMG @1151
tidyNodeIsMAP @1152
tidyNodeIsAREA @1153
tidyNodeIsNOBR @1154
tidyNodeIsWBR @1155
tidyNodeIsFONT @1156
tidyNodeIsLAYER @1157
tidyNodeIsSPACER @1158
tidyNodeIsCENTER @1159
tidyNodeIsSTYLE @1160
tidyNodeIsSCRIPT @1161
tidyNodeIsNOSCRIPT @1162
tidyNodeIsFORM @1163
tidyNodeIsTEXTAREA @1164
tidyNodeIsBLOCKQUOTE @1165
tidyNodeIsAPPLET @1166
tidyNodeIsOBJECT @1167
tidyNodeIsDIV @1168
tidyNodeIsSPAN @1169
tidyNodeIsINPUT @1170
tidyNodeIsQ @1171
tidyNodeIsLABEL @1172
tidyNodeIsH3 @1173
tidyNodeIsH4 @1174
tidyNodeIsH5 @1175
tidyNodeIsH6 @1176
tidyNodeIsADDRESS @1177
tidyNodeIsXMP @1178
tidyNodeIsSELECT @1179
tidyNodeIsBLINK @1180
tidyNodeIsMARQUEE @1181
tidyNodeIsEMBED @1182
tidyNodeIsBASEFONT @1183
tidyNodeIsISINDEX @1184
tidyNodeIsS @1185
tidyNodeIsSTRIKE @1186
tidyNodeIsU @1187
tidyNodeIsMENU @1188
tidyAttrGetId @1189
tidyAttrIsEvent @1190
tidyAttrIsProp @1191
tidyAttrIsHREF @1192
tidyAttrIsSRC @1193
tidyAttrIsID @1194
tidyAttrIsNAME @1195
tidyAttrIsSUMMARY @1196
tidyAttrIsALT @1197
tidyAttrIsLONGDESC @1198
tidyAttrIsUSEMAP @1199
tidyAttrIsISMAP @1200
tidyAttrIsLANGUAGE @1201
tidyAttrIsTYPE @1202
tidyAttrIsVALUE @1203
tidyAttrIsCONTENT @1204
tidyAttrIsTITLE @1205
tidyAttrIsXMLNS @1206
tidyAttrIsDATAFLD @1207
tidyAttrIsWIDTH @1208
tidyAttrIsHEIGHT @1209
tidyAttrIsFOR @1210
tidyAttrIsSELECTED @1211
tidyAttrIsCHECKED @1212
tidyAttrIsLANG @1213
tidyAttrIsTARGET @1214
tidyAttrIsHTTP_EQUIV @1215
tidyAttrIsREL @1216
tidyAttrIsOnMOUSEMOVE @1217
tidyAttrIsOnMOUSEDOWN @1218
tidyAttrIsOnMOUSEUP @1219
tidyAttrIsOnCLICK @1220
tidyAttrIsOnMOUSEOVER @1221
tidyAttrIsOnMOUSEOUT @1222
tidyAttrIsOnKEYDOWN @1223
tidyAttrIsOnKEYUP @1224
tidyAttrIsOnKEYPRESS @1225
tidyAttrIsOnFOCUS @1226
tidyAttrIsOnBLUR @1227
tidyAttrIsBGCOLOR @1228
tidyAttrIsLINK @1229
tidyAttrIsALINK @1230
tidyAttrIsVLINK @1231
tidyAttrIsTEXT @1232
tidyAttrIsSTYLE @1233
tidyAttrIsABBR @1234
tidyAttrIsCOLSPAN @1235
tidyAttrIsROWSPAN @1236
tidyAttrGetById @1237
tidyAttrGetHREF @1238
tidyAttrGetSRC @1239
tidyAttrGetID @1240
tidyAttrGetNAME @1241
tidyAttrGetSUMMARY @1242
tidyAttrGetALT @1243
tidyAttrGetLONGDESC @1244
tidyAttrGetUSEMAP @1245
tidyAttrGetISMAP @1246
tidyAttrGetLANGUAGE @1247
tidyAttrGetTYPE @1248
tidyAttrGetVALUE @1249
tidyAttrGetCONTENT @1250
tidyAttrGetTITLE @1251
tidyAttrGetXMLNS @1252
tidyAttrGetDATAFLD @1253
tidyAttrGetWIDTH @1254
tidyAttrGetHEIGHT @1255
tidyAttrGetFOR @1256
tidyAttrGetSELECTED @1257
tidyAttrGetCHECKED @1258
tidyAttrGetLANG @1259
tidyAttrGetTARGET @1260
tidyAttrGetHTTP_EQUIV @1261
tidyAttrGetREL @1262
tidyAttrGetOnMOUSEMOVE @1263
tidyAttrGetOnMOUSEDOWN @1264
tidyAttrGetOnMOUSEUP @1265
tidyAttrGetOnCLICK @1266
tidyAttrGetOnMOUSEOVER @1267
tidyAttrGetOnMOUSEOUT @1268
tidyAttrGetOnKEYDOWN @1269
tidyAttrGetOnKEYUP @1270
tidyAttrGetOnKEYPRESS @1271
tidyAttrGetOnFOCUS @1272
tidyAttrGetOnBLUR @1273
tidyAttrGetBGCOLOR @1274
tidyAttrGetLINK @1275
tidyAttrGetALINK @1276
tidyAttrGetVLINK @1277
tidyAttrGetTEXT @1278
tidyAttrGetSTYLE @1279
tidyAttrGetABBR @1280
tidyAttrGetCOLSPAN @1281
tidyAttrGetROWSPAN @1282
tidyCreateWithAllocator @1283
tidyInitInputBuffer @2001
tidyInitOutputBuffer @2002
tidyBufInit @2003
tidyBufAlloc @2004
tidyBufCheckAlloc @2005
tidyBufFree @2006
tidyBufClear @2007
tidyBufAttach @2008
tidyBufDetach @2009
tidyBufAppend @2010
tidyBufPutByte @2011
tidyBufPopByte @2012
tidyBufGetByte @2013
tidyBufEndOfInput @2014
tidyBufUngetByte @2015
tidyBufInitWithAllocator @2016
tidyBufAllocWithAllocator @2017
tidyNodeGetValue @2018

94
build/msvc/tidy.dsp Normal file
View file

@ -0,0 +1,94 @@
# Microsoft Developer Studio Project File - Name="tidy" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=tidy - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "tidy.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "tidy.mak" CFG="tidy - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "tidy - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "tidy - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "tidy - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /MT /Za /W3 /GX /O2 /I "..\..\include" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /D TIDYDLL_EXPORT=__declspec(dllimport) /D _CRT_SECURE_NO_DEPRECATE /YX /FD /c
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /profile /map /machine:I386
!ELSEIF "$(CFG)" == "tidy - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
# ADD CPP /nologo /MTd /Za /W3 /Gm /GX /ZI /Od /I "..\..\include" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /D TIDYDLL_EXPORT=__declspec(dllimport) /D _CRT_SECURE_NO_DEPRECATE /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
!ENDIF
# Begin Target
# Name "tidy - Win32 Release"
# Name "tidy - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=..\..\console\tidy.c
# End Source File
# End Group
# End Target
# End Project

56
build/msvc/tidy.dsw Normal file
View file

@ -0,0 +1,56 @@
Microsoft Developer Studio Workspace File, Format Version 6.00
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
###############################################################################
Project: "tidy"=.\tidy.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
Begin Project Dependency
Project_Dep_Name tidylib
End Project Dependency
}}}
###############################################################################
Project: "tidydll"=.\tidydll.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Project: "tidylib"=.\tidylib.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Global:
Package=<5>
{{{
}}}
Package=<3>
{{{
}}}
###############################################################################

296
build/msvc/tidydll.dsp Normal file
View file

@ -0,0 +1,296 @@
# Microsoft Developer Studio Project File - Name="tidydll" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
CFG=tidydll - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "tidydll.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "tidydll.mak" CFG="tidydll - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "tidydll - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library")
!MESSAGE "tidydll - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
MTL=midl.exe
RSC=rc.exe
!IF "$(CFG)" == "tidydll - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "tidydll___Win32_Release"
# PROP BASE Intermediate_Dir "tidydll___Win32_Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "ReleaseDLL"
# PROP Intermediate_Dir "ReleaseDLL"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "TIDYDLL_EXPORTS" /YX /FD /c
# ADD CPP /nologo /MD /W3 /GX /O2 /I "..\..\include" /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "TIDYDLL_EXPORTS" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /YX /FD /c
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 /out:"ReleaseDLL/libtidy.dll"
!ELSEIF "$(CFG)" == "tidydll - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "tidydll___Win32_Debug"
# PROP BASE Intermediate_Dir "tidydll___Win32_Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "DebugDLL"
# PROP Intermediate_Dir "DebugDLL"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "TIDYDLL_EXPORTS" /YX /FD /GZ /c
# ADD CPP /nologo /MDd /W3 /ZI /Od /I "..\..\include" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /FD /GZ /c
# SUBTRACT CPP /YX
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /out:"DebugDLL/libtidy.dll" /pdbtype:sept
!ENDIF
# Begin Target
# Name "tidydll - Win32 Release"
# Name "tidydll - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=..\..\src\access.c
# End Source File
# Begin Source File
SOURCE=..\..\src\alloc.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrask.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrdict.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrget.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrs.c
# End Source File
# Begin Source File
SOURCE=..\..\src\buffio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\clean.c
# End Source File
# Begin Source File
SOURCE=..\..\src\config.c
# End Source File
# Begin Source File
SOURCE=..\..\src\entities.c
# End Source File
# Begin Source File
SOURCE=..\..\src\fileio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\istack.c
# End Source File
# Begin Source File
SOURCE=..\..\src\lexer.c
# End Source File
# Begin Source File
SOURCE=..\..\src\localize.c
# End Source File
# Begin Source File
SOURCE=..\..\src\mappedio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\parser.c
# End Source File
# Begin Source File
SOURCE=..\..\src\pprint.c
# End Source File
# Begin Source File
SOURCE=..\..\src\streamio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tagask.c
# End Source File
# Begin Source File
SOURCE=.\tidy.def
# End Source File
# Begin Source File
SOURCE=..\..\src\tags.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tidylib.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tmbstr.c
# End Source File
# Begin Source File
SOURCE=..\..\src\utf8.c
# End Source File
# Begin Source File
SOURCE=..\..\src\win32tc.c
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
SOURCE=..\..\src\access.h
# End Source File
# Begin Source File
SOURCE=..\..\src\attrdict.h
# End Source File
# Begin Source File
SOURCE=..\..\src\attrs.h
# End Source File
# Begin Source File
SOURCE=..\..\include\buffio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\clean.h
# End Source File
# Begin Source File
SOURCE=..\..\src\config.h
# End Source File
# Begin Source File
SOURCE=..\..\src\entities.h
# End Source File
# Begin Source File
SOURCE=..\..\src\fileio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\forward.h
# End Source File
# Begin Source File
SOURCE=..\..\src\lexer.h
# End Source File
# Begin Source File
SOURCE=..\..\src\mappedio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\message.h
# End Source File
# Begin Source File
SOURCE=..\..\src\parser.h
# End Source File
# Begin Source File
SOURCE=..\..\include\platform.h
# End Source File
# Begin Source File
SOURCE=..\..\src\pprint.h
# End Source File
# Begin Source File
SOURCE=..\..\src\streamio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\tags.h
# End Source File
# Begin Source File
SOURCE="..\..\src\tidy-int.h"
# End Source File
# Begin Source File
SOURCE=..\..\include\tidy.h
# End Source File
# Begin Source File
SOURCE=..\..\include\tidyenum.h
# End Source File
# Begin Source File
SOURCE=..\..\src\tmbstr.h
# End Source File
# Begin Source File
SOURCE=..\..\src\utf8.h
# End Source File
# Begin Source File
SOURCE=..\..\src\version.h
# End Source File
# Begin Source File
SOURCE=..\..\src\win32tc.h
# End Source File
# End Group
# End Target
# End Project

295
build/msvc/tidylib.dsp Normal file
View file

@ -0,0 +1,295 @@
# Microsoft Developer Studio Project File - Name="tidylib" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Static Library" 0x0104
CFG=tidylib - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "tidylib.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "tidylib.mak" CFG="tidylib - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "tidylib - Win32 Release" (based on "Win32 (x86) Static Library")
!MESSAGE "tidylib - Win32 Debug" (based on "Win32 (x86) Static Library")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "tidylib - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
# ADD CPP /nologo /MT /W4 /GX /O2 /I "../../include" /D "NDEBUG" /D "_LIB" /D "WIN32" /D "_MBCS" /D "SUPPORT_UTF16_ENCODINGS" /D "SUPPORT_ASIAN_ENCODINGS" /D "SUPPORT_ACCESSIBILITY_CHECKS" /FD /c
# SUBTRACT CPP /YX
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LIB32=link.exe -lib
# ADD BASE LIB32 /nologo
# ADD LIB32 /nologo /out:"Release\libtidy.lib"
!ELSEIF "$(CFG)" == "tidylib - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
# ADD CPP /nologo /MTd /Za /W4 /Gm /ZI /Od /I "../../include" /D "_DEBUG" /D "_WIN32" /D "_LIB" /D "WIN32" /D "_MBCS" /D "SUPPORT_UTF16_ENCODINGS" /D "SUPPORT_ASIAN_ENCODINGS" /D "SUPPORT_ACCESSIBILITY_CHECKS" /U "WINDOWS" /FD /GZ /c
# SUBTRACT CPP /YX
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LIB32=link.exe -lib
# ADD BASE LIB32 /nologo
# ADD LIB32 /nologo /out:"Debug\libtidy.lib"
!ENDIF
# Begin Target
# Name "tidylib - Win32 Release"
# Name "tidylib - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=..\..\src\access.c
# End Source File
# Begin Source File
SOURCE=..\..\src\alloc.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrask.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrdict.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrget.c
# End Source File
# Begin Source File
SOURCE=..\..\src\attrs.c
# End Source File
# Begin Source File
SOURCE=..\..\src\buffio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\clean.c
# End Source File
# Begin Source File
SOURCE=..\..\src\config.c
# End Source File
# Begin Source File
SOURCE=..\..\src\entities.c
# End Source File
# Begin Source File
SOURCE=..\..\src\fileio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\istack.c
# End Source File
# Begin Source File
SOURCE=..\..\src\lexer.c
# End Source File
# Begin Source File
SOURCE=..\..\src\localize.c
# End Source File
# Begin Source File
SOURCE=..\..\src\mappedio.c
!IF "$(CFG)" == "tidylib - Win32 Release"
!ELSEIF "$(CFG)" == "tidylib - Win32 Debug"
# ADD CPP /Ze
!ENDIF
# End Source File
# Begin Source File
SOURCE=..\..\src\parser.c
# End Source File
# Begin Source File
SOURCE=..\..\src\pprint.c
# End Source File
# Begin Source File
SOURCE=..\..\src\streamio.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tagask.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tags.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tidylib.c
# End Source File
# Begin Source File
SOURCE=..\..\src\tmbstr.c
# End Source File
# Begin Source File
SOURCE=..\..\src\utf8.c
# End Source File
# Begin Source File
SOURCE=..\..\src\win32tc.c
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
SOURCE=..\..\src\access.h
# End Source File
# Begin Source File
SOURCE=..\..\src\attrdict.h
# End Source File
# Begin Source File
SOURCE=..\..\src\attrs.h
# End Source File
# Begin Source File
SOURCE=..\..\include\buffio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\clean.h
# End Source File
# Begin Source File
SOURCE=..\..\src\config.h
# End Source File
# Begin Source File
SOURCE=..\..\src\entities.h
# End Source File
# Begin Source File
SOURCE=..\..\src\fileio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\forward.h
# End Source File
# Begin Source File
SOURCE=..\..\src\lexer.h
# End Source File
# Begin Source File
SOURCE=..\..\src\mappedio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\message.h
# End Source File
# Begin Source File
SOURCE=..\..\src\parser.h
# End Source File
# Begin Source File
SOURCE=..\..\include\platform.h
# End Source File
# Begin Source File
SOURCE=..\..\src\pprint.h
# End Source File
# Begin Source File
SOURCE=..\..\src\streamio.h
# End Source File
# Begin Source File
SOURCE=..\..\src\tags.h
# End Source File
# Begin Source File
SOURCE="..\..\src\tidy-int.h"
# End Source File
# Begin Source File
SOURCE=..\..\include\tidy.h
# End Source File
# Begin Source File
SOURCE=..\..\include\tidyenum.h
# End Source File
# Begin Source File
SOURCE=..\..\src\tmbstr.h
# End Source File
# Begin Source File
SOURCE=..\..\src\utf8.h
# End Source File
# Begin Source File
SOURCE=..\..\src\version.h
# End Source File
# Begin Source File
SOURCE=..\..\src\win32tc.h
# End Source File
# End Group
# End Target
# End Project

38
build/readme.txt Normal file
View file

@ -0,0 +1,38 @@
Tidy Build Files
Each subdirectory contains input files to a selected
build system for TidyLib and the command line driver.
Some build systems are cross-platform (gmake, autoconf),
others (msvc) are platform specific. For details
on any given build system, see the readme file for
that system.
Directory System Comments
--------- -------------------- --------------------------
gmake GNU Make Used for "official" builds
gnuauto GNU AutoConf Supports shared lib builds
msvc MS Visual C++ v6 Win32 only
rpm Script for packages For Linux distribution supporting rpm
Common Build Options
There are some basic build options for TidyLib, independent
of platform and build system. Typically, these options can
be enabled or disabled by setting a macro value within the
Makefile or its equivalent. An option may be disabled by
setting its value to "0". Enable by setting to "1". Again,
consult the directions for each build system for details
on how to enable/disable each option.
Option Default Description
---------------------------- -------- ---------------------------------
DMALLOC Disabled Use dmalloc for memory debugging
SUPPORT_ACCESSIBILITY_CHECKS Enabled Support W3C WAI checks
SUPPORT_UTF16_ENCODINGS Enabled Support Unicode documents
SUPPORT_ASIAN_ENCODINGS Enabled Support Big5 and ShiftJIS docs

40
build/rpm/readme.txt Normal file
View file

@ -0,0 +1,40 @@
# Script for Building tidy rpm packages
# To build the RPM packages for tidy on Redhat and other distros which support rpm.
# For making Debian packages, first create rpm package and then generate
# debian package by command "rpm2deb filename"
The steps are as follows:
1. Let's suppose TIDY_VERSION you are building is 02October2003
2. Unpack original source tree
tar zxvf tidy_src.tgz
This will extract to a directory called tidy
3. mv tidy tidy-02October2003
Edit the tidy.spec file inside directory tidy-02October2003
and make sure the Version variable is changed to 02October2003.
Also edit the Makefile and change prefix to "exactly" say this:
runinst_prefix=${RPMTMP}
devinst_prefix=${RPMTMP}
4. tar zcvf tidy-02October2003.tgz tidy-02October2003
5. rpmbuild -ta tidy-02October2003.tgz
6. rm tidy-02October2003.tgz
7. To derive Debian package for tidy run command on created rpm packages
rpm2deb tidy-02October2003-1.rpm

155
build/rpm/tidy.spec Normal file
View file

@ -0,0 +1,155 @@
#
# spec file for package tidy
#
# (c) 2006 (W3C) MIT, ERCIM, Keio University
# See tidy.h for the copyright notice.
#
# CVS Info :
#
# $Author: arnaud02 $
# $Date: 2006/02/22 14:21:12 $
# $Revision: 1.2 $
#
# Contributing Author(s):
# Sierk Bornemann <bornemann@users.sourceforge.net>
#
# norootforbuild
# neededforbuild doxygen libxslt libtool
BuildRequires: doxygen libxslt libtool
Name: tidy
Version: 1.0
Release: YYMMDD
%define docrelease YYMMDD
Summary: Utility to clean up and pretty print HTML/XHTML/XML
Group: Applications/Tools
License: W3C Software License, MIT Licence, Other License(s), see package
Autoreqprov: on
URL: http://tidy.sourceforge.net/
Source0: http://sourceforge.net/cvs/?group_id=27659
Source1: http://tidy.sourceforge.net/src/tidy_src.tgz
Source2: http://tidy.sourceforge.net/docs/tidy_docs.tgz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-build
%description
When editing HTML it's easy to make mistakes. Wouldn't it be nice if
there was a simple way to fix these mistakes automatically and tidy up
sloppy editing into nicely layed out markup? Well now there is! Dave
Raggett's HTML TIDY is a free utility for doing just that. It also
works great on the atrociously hard to read markup generated by
specialized HTML editors and conversion tools, and can help you
identify where you need to pay further attention on making your pages
more accessible to people with disabilities.
Tidy is able to fix up a wide range of problems and to bring to your
attention things that you need to work on yourself. Each item found is
listed with the line number and column so that you can see where the
problem lies in your markup. Tidy won't generate a cleaned up version
when there are problems that it can't be sure of how to handle. These
are logged as "errors" rather than "warnings".
Authors:
--------
Tidy was written by Dave Raggett <dsr@w3.org> and is now maintained
and developed by the Tidy team at http://tidy.sourceforge.net/.
%package -n libtidy
Summary: Shared library for tidy
Group: Development/Libraries
Autoreqprov: on
%description -n libtidy
This package contains the library needed to run programs dynamically
linked with tidy.
%package -n libtidy-devel
Summary: Development files for tidy
Group: Development/Libraries
Requires: libtidy = %{version}-%{release}
Autoreqprov: on
%description -n libtidy-devel
This package contains the headers, the shared libraries and the API
documentation which programmers will need to develop applications based on
tidy.
%debug_package
%prep
%setup -q -n %{name} -b 1
mv htmldoc/doxygen.cfg Doxyfile
%build
export CFLAGS="$RPM_OPT_FLAGS"
/bin/sh build/gnuauto/setup.sh
%configure --disable-dependency-tracking \
--includedir=%{_includedir}/%{name}
make %{?_smp_mflags} all
make -C build/gmake/ doc
doxygen
%install
rm -rf $RPM_BUILD_ROOT _api
make install DESTDIR=$RPM_BUILD_ROOT
# Manpage
install -Dpm 644 htmldoc/tidy.1 $RPM_BUILD_ROOT%{_mandir}/man1/tidy.1
# Quick Reference
install -Dpm 644 htmldoc/quickref.html $RPM_BUILD_ROOT%{_defaultdocdir}/%{name}/quickref.html
# Move API directory out of the way
mv htmldoc/api _api
%clean
if ! test -f /.buildenv; then
rm -rf $RPM_BUILD_ROOT;
fi
%post -n lib%{name} -p /sbin/ldconfig
%postun -n lib%{name} -p /sbin/ldconfig
%files
%defattr(-, root, root)
%doc htmldoc/*
%{_bindir}/tidy
%{_bindir}/tab2space
%{_mandir}/man1/tidy.1*
%files -n libtidy
%defattr(-, root, root)
%doc htmldoc/license.html
%{_libdir}/libtidy*.so.*
%files -n libtidy-devel
%defattr(-, root, root)
%doc _api/*
%{_includedir}/%{name}/*.h
%{_libdir}/libtidy.so
%{_libdir}/libtidy.a
%exclude %{_libdir}/libtidy.la
%changelog -n tidy
* Thu Feb 22 2006 - Sierk Bornemann <bornemann@sourceforge.net>
Rewritten RPM Spec file:
- respects filesystem layout of current FHS-compliant linux distributions.
- respects current tidy Makefile and
creation of tidy docs (XSL transformation from tidy's XML output).
* Mon Oct 25 2003 - Al Dev (Alavoor Vasudevan) <alavoor[at]yahoo.com>
- Initial version of %{name} rpm

362
console/tab2space.c Normal file
View file

@ -0,0 +1,362 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "platform.h"
#define true 1
#define false 0
#define TABSIZE 4
#define DOS_CRLF 0
#define UNIX_LF 1
#define MAC_CR 2
typedef struct
{
Bool pushed;
int tabs;
int curcol;
int lastcol;
int maxcol;
int curline;
int pushed_char;
uint size;
uint length;
char *buf;
FILE *fp;
} Stream;
static int tabsize = TABSIZE;
static int endline = DOS_CRLF;
static Bool tabs = false;
/*
Memory allocation functions vary from one environment to
the next, and experience shows that wrapping the local
mechanisms up provides for greater flexibility and allows
out of memory conditions to be detected in one place.
*/
void *MemAlloc(size_t size)
{
void *p;
p = malloc(size);
if (!p)
{
fprintf(stderr, "***** Out of memory! *****\n");
exit(1);
}
return p;
}
void *MemRealloc(void *old, size_t size)
{
void *p;
p = realloc(old, size);
if (!p)
{
fprintf(stderr, "***** Out of memory! *****\n");
return NULL;
}
return p;
}
void MemFree(void *p)
{
free(p);
p = NULL;
}
static Stream *NewStream(FILE *fp)
{
Stream *in;
in = (Stream *)MemAlloc(sizeof(Stream));
memset(in, 0, sizeof(Stream));
in->fp = fp;
return in;
}
static void FreeStream(Stream *in)
{
if (in->buf)
MemFree(in->buf);
MemFree(in);
}
static void AddByte(Stream *in, uint c)
{
if (in->size + 1 >= in->length)
{
while (in->size + 1 >= in->length)
{
if (in->length == 0)
in->length = 8192;
else
in->length = in->length * 2;
}
in->buf = (char *)MemRealloc(in->buf, in->length*sizeof(char));
}
in->buf[in->size++] = (char)c;
in->buf[in->size] = '\0'; /* debug */
}
/*
Read a character from a stream, keeping track
of lines, columns etc. This is used for parsing
markup and plain text etc. A single level
pushback is allowed with UngetChar(c, in).
Returns EndOfStream if there's nothing more to read.
*/
static int ReadChar(Stream *in)
{
int c;
if (in->pushed)
{
in->pushed = false;
if (in->pushed_char == '\n')
in->curline--;
return in->pushed_char;
}
in->lastcol = in->curcol;
/* expanding tab ? */
if (in->tabs > 0)
{
in->curcol++;
in->tabs--;
return ' ';
}
/* Else go on with normal buffer: */
for (;;)
{
c = getc(in->fp);
/* end of file? */
if (c == EOF)
break;
/* coerce \r\n and isolated \r as equivalent to \n : */
if (c == '\r')
{
c = getc(in->fp);
if (c != '\n')
ungetc(c, in->fp);
c = '\n';
}
if (c == '\n')
{
if (in->maxcol < in->curcol)
in->maxcol = in->curcol;
in->curcol = 1;
in->curline++;
break;
}
if (c == '\t')
{
if (tabs)
in->curcol += tabsize - ((in->curcol - 1) % tabsize);
else /* expand to spaces */
{
in->tabs = tabsize - ((in->curcol - 1) % tabsize) - 1;
in->curcol++;
c = ' ';
}
break;
}
if (c == '\033')
break;
/* strip control characters including '\r' */
if (0 < c && c < 32)
continue;
in->curcol++;
break;
}
return c;
}
static Stream *ReadFile(FILE *fin)
{
int c;
Stream *in = NewStream(fin);
while ((c = ReadChar(in)) >= 0)
AddByte(in, (uint)c);
return in;
}
static void WriteFile(Stream *in, FILE *fout)
{
int i, c;
char *p;
i = in->size;
p = in->buf;
while (i--)
{
c = *p++;
if (c == '\n')
{
if (endline == DOS_CRLF)
{
putc('\r', fout);
putc('\n', fout);
}
else if (endline == UNIX_LF)
putc('\n', fout);
else if (endline == MAC_CR)
putc('\r', fout);
continue;
}
putc(c, fout);
}
}
static void HelpText(FILE *errout, char *prog)
{
fprintf(errout, "%s: [options] [infile [outfile]] ...\n", prog);
fprintf(errout, "Utility to expand tabs and ensure consistent line endings\n");
fprintf(errout, "options for tab2space vers: 6th February 2003\n");
fprintf(errout, " -help or -h display this help message\n");
fprintf(errout, " -dos or -crlf set line ends to CRLF (PC-DOS/Windows - default)\n");
fprintf(errout, " -mac or -cr set line ends to CR (classic Mac OS)\n");
fprintf(errout, " -unix or -lf set line ends to LF (Unix)\n");
fprintf(errout, " -tabs preserve tabs, e.g. for Makefile\n");
fprintf(errout, " -t<n> set tabs to <n> (default is 4) spaces\n");
fprintf(errout, "\nNote this utility doesn't map spaces to tabs!\n");
}
int main(int argc, char **argv)
{
char const *infile, *outfile;
char *prog;
FILE *fin, *fout;
Stream *in = NULL;
prog = argv[0];
while (argc > 0)
{
if (argc > 1 && argv[1][0] == '-')
{
if (strcmp(argv[1], "-help") == 0 || argv[1][1] == 'h')
{
HelpText(stdout, prog);
return 1;
}
if (strcmp(argv[1], "-dos") == 0 ||
strcmp(argv[1], "-crlf") == 0)
endline = DOS_CRLF;
else if (strcmp(argv[1], "-mac") == 0 ||
strcmp(argv[1], "-cr") == 0)
endline = MAC_CR;
else if (strcmp(argv[1], "-unix") == 0 ||
strcmp(argv[1], "-lf") == 0)
endline = UNIX_LF;
else if (strcmp(argv[1], "-tabs") == 0)
tabs = true;
else if (strncmp(argv[1], "-t", 2) == 0)
sscanf(argv[1]+2, "%d", &tabsize);
--argc;
++argv;
continue;
}
if (argc > 1)
{
infile = argv[1];
fin = fopen(infile, "rb");
}
else
{
infile = "stdin";
fin = stdin;
}
if (argc > 2)
{
outfile = argv[2];
fout = NULL;
--argc;
++argv;
}
else
{
outfile = "stdout";
fout = stdout;
}
if (fin)
{
in = ReadFile(fin);
if (fin != stdin)
fclose(fin);
if (fout != stdout)
fout = fopen(outfile, "wb");
if (fout)
{
WriteFile(in, fout);
if (fout != stdout)
fclose(fout);
}
else
fprintf(stderr, "%s - can't open \"%s\" for writing\n", prog, outfile);
FreeStream(in);
}
else
fprintf(stderr, "%s - can't open \"%s\" for reading\n", prog, infile);
--argc;
++argv;
if (argc <= 1)
break;
}
return 0;
}

1321
console/tidy.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,47 @@
#include "platform.h"
#include "tidy-int.h"
#include "TidyNodeIter.h"
TidyNodeIter *newTidyNodeIter( Node *pStart )
{
TidyNodeIter *pThis = NULL;
if (NULL != (pThis = MemAlloc( sizeof( TidyNodeIter ))))
{
ClearMemory( pThis, sizeof( TidyNodeIter ));
pThis->pTop = pStart;
}
return pThis;
}
Node *nextTidyNode( TidyNodeIter *pThis )
{
if (NULL == pThis->pCurrent)
{
// just starting out, initialize
pThis->pCurrent = pThis->pTop->content;
}
else if (NULL != pThis->pCurrent->content)
{
// the next element, if any, is my first-born child
pThis->pCurrent = pThis->pCurrent->content;
}
else
{
// no children, I guess my next younger brother inherits the throne.
while ( NULL == pThis->pCurrent->next
&& pThis->pTop != pThis->pCurrent->parent )
{
// no siblings, do any of my ancestors have younger sibs?
pThis->pCurrent = pThis->pCurrent->parent;
}
pThis->pCurrent = pThis->pCurrent->next;
}
return pThis->pCurrent;
}
void setCurrentNode( TidyNodeIter *pThis, Node *newCurr )
{
if (NULL != newCurr)
pThis->pCurrent = newCurr;
}

View file

@ -0,0 +1,51 @@
/* TidyNodeIter
(c) 1998-2003 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
These files contain utility routines to perform in-order traversals of the
Tidy document tree, beginning at an arbitrary node.
A traversal of the tree can be performed in a manner similar to the following:
Node *testNode;
TidyNodeIter *iter = newTidyNodeIter( FindBody( tdoc ));
for (testNode = nextTidyNode( &iter );
NULL != testNode;
testNode = nextTidyNode( &iter ))
{
}
TODO: Add a prevTidyNode() function.
*/
#include "lexer.h"
typedef struct _TidyNodeIter
{
Node *pTop, *pCurrent;
} TidyNodeIter;
TidyNodeIter *newTidyNodeIter( Node *pStart );
/*
nextTidyNode( TidyNodeIter *pIter )
if pCurrent is NULL, this function initializes it to match pTop, and
returns that value, otherwise it advances to the next node in order,
and returns that value. When pTop == pCurrent, the function returns NULL
to indicate that the entire tree has been visited.
*/
Node *nextTidyNode( TidyNodeIter *pIter );
/*
setCurrentNode( TidyNodeIter *pThis, Node *newCurr )
Resets pCurrent to match the passed value; useful if you need to back up
to an unaltered point in the tree, or to skip a section. The next call to
nextTidyNode() will return the node which follows newCurr in order.
Minimal error checking is performed; unexpected results _will_ occur if
newCurr is not a descendant node of pTop.
*/
void setCurrentNode( TidyNodeIter *pThis, Node *newCurr );

224
experimental/httpio.c Normal file
View file

@ -0,0 +1,224 @@
#include "tmbstr.h"
#include "httpio.h"
int
makeConnection ( HTTPInputSource *pHttp )
{
struct sockaddr_in sock;
struct hostent *pHost;
/* Get internet address of the host. */
if (!(pHost = gethostbyname ( pHttp->pHostName )))
{
return -1;
}
/* Copy the address of the host to socket description. */
memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length);
/* Set port and protocol */
sock.sin_family = AF_INET;
sock.sin_port = htons( pHttp->nPort );
/* Make an internet socket, stream type. */
if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1)
return -1;
/* Connect the socket to the remote host. */
if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock )))
{
if (errno == ECONNREFUSED)
return ECONNREFUSED;
else
return -1;
}
return 0;
}
int parseURL( HTTPInputSource *pHttp, tmbstr url )
{
int i, j = 0;
ctmbstr pStr;
pStr = tmbsubstr( url, "://" );
/* If protocol is there, but not http, bail out, else assume http. */
if (NULL != pStr)
{
if (tmbstrncasecmp( url, "http://", 7 ))
return -1;
}
if (NULL != pStr)
j = pStr - url + 3;
for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {}
if (i == j)
return -1;
/* Get the hostname. */
pHttp->pHostName = tmbstrndup (&url[j], i - j );
if (url[i] == ':')
{
/* We have a colon delimiting the hostname. It should mean that
a port number is following it */
pHttp->nPort = 0;
if (isdigit( url[++i] )) /* A port number */
{
for (; url[i] && url[i] != '/'; i++)
{
if (isdigit( url[i] ))
pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0');
else
return -1;
}
if (!pHttp->nPort)
return -1;
}
else /* or just a misformed port number */
return -1;
}
else
/* Assume default port. */
pHttp->nPort = 80;
/* skip past the delimiting slash (we'll add it later ) */
while (url[i] && url[i] == '/')
i++;
pHttp->pResource = tmbstrdup (url + i );
return 0;
}
int fillBuffer( HTTPInputSource *in )
{
if (0 < in->s)
{
in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0);
in->nextBytePos = 0;
if (in->nBufSize < sizeof( in->buffer ))
in->buffer[in->nBufSize] = '\0';
}
else
in->nBufSize = 0;
return in->nBufSize;
}
int openURL( HTTPInputSource *in, tmbstr pUrl )
{
int rc = -1;
#ifdef WIN32
WSADATA wsaData;
rc = WSAStartup( 514, &wsaData );
#endif
in->tis.getByte = (TidyGetByteFunc) HTTPGetByte;
in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte;
in->tis.eof = (TidyEOFFunc) HTTPIsEOF;
in->tis.sourceData = (uint) in;
in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0;
parseURL( in, pUrl );
if (0 == (rc = makeConnection( in )))
{
char ch, lastCh = '\0';
int blanks = 0;
char *getCmd = MemAlloc( 48 + strlen( in->pResource ));
sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource );
send( in->s, getCmd, strlen( getCmd ), 0 );
MemFree( getCmd );
/* skip past the header information */
while ( in->nextBytePos >= in->nBufSize
&& 0 < (rc = fillBuffer( in )))
{
if (1 < blanks)
break;
for (; in->nextBytePos < sizeof( in->buffer )
&& 0 != in->buffer[ in->nextBytePos ];
in->nextBytePos++ )
{
ch = in->buffer[ in->nextBytePos ];
if (ch == '\r' || ch == '\n')
{
if (ch == lastCh)
{
/* Two carriage returns or two newlines in a row,
that's good enough */
blanks++;
}
if (lastCh == '\r' || lastCh == '\n')
{
blanks++;
}
}
else
blanks = 0;
lastCh = ch;
if (1 < blanks)
{
/* end of header, scan to first non-white and return */
while ('\0' != ch && isspace( ch ))
ch = in->buffer[ ++in->nextBytePos ];
break;
}
}
}
}
return rc;
}
void closeURL( HTTPInputSource *source )
{
if (0 < source->s)
closesocket( source->s );
source->s = -1;
source->tis.sourceData = 0;
#ifdef WIN32
WSACleanup();
#endif
}
int HTTPGetByte( HTTPInputSource *source )
{
if (source->nextUnGotBytePos)
return source->unGetBuffer[ --source->nextUnGotBytePos ];
if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize)
{
fillBuffer( source );
}
if (0 == source->nBufSize)
return EndOfStream;
return source->buffer[ source->nextBytePos++ ];
}
void HTTPUngetByte( HTTPInputSource *source, uint byteValue )
{
if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */
source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue;
}
Bool HTTPIsEOF( HTTPInputSource *source )
{
if (source->nextUnGotBytePos)
/* pending ungot bytes, not done */
return no;
if ( 0 != source->nBufSize
&& source->nextBytePos >= source->nBufSize)
/* We've consumed the existing buffer, get another */
fillBuffer( source );
if (source->nextBytePos < source->nBufSize)
/* we have stuff in the buffer, must not be done. */
return no;
/* Nothing in the buffer, and the last receive failed, must be done. */
return yes;
}

48
experimental/httpio.h Normal file
View file

@ -0,0 +1,48 @@
#ifndef __HTTPIO_H__
#define __HTTPIO_H__
#include "platform.h"
#include "tidy.h"
#ifdef WIN32
# include <winsock.h>
# define ECONNREFUSED WSAECONNREFUSED
#else
# include <sys/socket.h>
# include <netdb.h>
# include <netinet/in.h>
#ifndef __BEOS__
# include <arpa/inet.h>
#endif
#endif /* WIN32 */
TIDY_STRUCT
typedef struct _HTTPInputSource
{
TidyInputSource tis; // This declaration must be first and must not be changed!
tmbstr pHostName;
tmbstr pResource;
unsigned short nPort, nextBytePos, nextUnGotBytePos, nBufSize;
SOCKET s;
char buffer[1024];
char unGetBuffer[16];
} HTTPInputSource;
/* get next byte from input source */
int HTTPGetByte( HTTPInputSource *source );
/* unget byte back to input source */
void HTTPUngetByte( HTTPInputSource *source, uint byteValue );
/* check if input source at end */
Bool HTTPIsEOF( HTTPInputSource *source );
int parseURL( HTTPInputSource* source, tmbstr pUrl );
int openURL( HTTPInputSource* source, tmbstr pUrl );
void closeURL( HTTPInputSource *source );
#endif

11815
html5-for-html-tidy.patch-2 Normal file

File diff suppressed because it is too large Load diff

1546
htmldoc/Overview.html Normal file

File diff suppressed because it is too large Load diff

BIN
htmldoc/checked_by_tidy.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

1252
htmldoc/doxygen.cfg Normal file

File diff suppressed because it is too large Load diff

300
htmldoc/faq.html Normal file
View file

@ -0,0 +1,300 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name="generator" content=
"HTML Tidy for Mac OS X (vers 1st June 2003), see www.w3.org" />
<link type="text/css" rel="stylesheet" href="tidy.css" />
<title>HTML Tidy - Frequently Asked Questions</title>
<style type="text/css">
code { font-weight: bold; }
</style>
</head>
<body>
<h1>HTML Tidy - Frequently Asked Questions</h1>
<h2>Overview</h2>
<p class="abstract">Certain questions about Tidy come up on a
regular basis. These are some that have been culled from postings
to the html-tidy@w3.org and tidy-develop@lists.sourceforge.net
mailing lists. If you don't see your question addressed here, see
<a href="#support">How To Get Support</a> below.</p>
<ul>
<li><a href="#what-now">What Now?</a></li>
<li><a href="#support">How to Get Support?</a></li>
<li><a href="#bug">How to Submit A Bug Report</a></li>
<li><a href="#feature">How to Submit A Feature Request</a></li>
<li><a href="#layout">How Do I Control the Output Layout?</a></li>
<li><a href="#version">What Version of Tidy Should I Use?</a></li>
<li><a href="#regression">How Do I Run A Regression Test?</a></li>
</ul>
<hr />
<dl>
<dt><a name="what-now" id="what-now"></a>What Now?</dt>
<dd><p>If you have a popup screen that reads as follows:
<pre>
HTML Tidy for Windows &lt;vers 1st August 2002; built on Aug 8 2002, at 15:41:13&gt;
Parsing Console input &lt;stdin&gt;
</pre>
<p>and do not know what to do next, read on.</p>
<p>Tidy is waiting for your HTML to come in, so it can parse it.
Tidy is fundamentally a tool that reads in HTML cleans it up and
writes it out again. It was developed as a program you run from the
console prompt, but there are GUI encapsulations available, e.g.
HTML-Kit, which you might prefer.</p>
<p>If you are using Windows, the first step is to unzip the zip file
and place the tidy.exe file in a folder somewhere on your executables
path. You may also want to set up a config file to save having to type
lots of options each time you run Tidy. From the console prompt you can
run Tidy like this:</p>
<pre>
C> tidy -m mywebpage.html
</pre>
<p>In this case, the <code>-m</code> option requests Tidy to write
the tidied file back to the same filename as it read from
(mywebpage.html). Tidy will give you a breakdown of the problems it
found and the version of HTML the file appears to be using.</p>
<p>To get a listing of Tidy command line options, just type
<code>tidy -?</code>. To see a listing on configuration options,
try <code>tidy -help-config</code>. To get more info on the
config options, see the <a
href="http://tidy.sourceforge.net/docs/quickref.html">Quick Reference</a>.</p>
<p>See also Dave Raggett's <a href="http://tidy.sourceforge.net/docs/Overview.html#help">User Guide</a>.</p>
<p>If you're not comfortable with the DOS command line, you should
try one of the <a href="http://tidy.sourceforge.net/#tidylibapps">GUI
Applications</a>.</p>
</dd>
<dt><a name="support" id="support"></a>How To Get Support</dt>
<dd>
<p>For general HTML Tidy support, the original mailing list
html-tidy@w3.org is best. Sometimes developers are the last to
know... Also, this list covers both Java and C versions, not to
mention various value-added products such as GUI front ends, Perl
and Python integration, etc. If you don't get a response after a
couple tries or if you have a bug fix, bump it over to the
developer list at tidy-develop@lists.sourceforge.net. It's not a
hard line, but that is the general arrangement.</p>
</dd>
<dt><a name="bug" id="bug"></a>How to Submit A Bug Report</dt>
<dd>
<p>You are encouraged to report bugs you found to the Tidy
developer team. Tidy's quality depends on your feedback. You can
either file your bug report in the Sourceforge <a
href="http://sourceforge.net/tracker/?func=add&amp;group_id=27659&amp;atid=390963">
bug tracker</a> for HTML Tidy (<em>recommended</em>) or send a mail
to the mailing list at html-tidy@w3.org. Note you do <em>not</em>
have to have a Sourceforge account in order to file bug reports, or
be subscribed to html-tidy@w3.org in order to post messages to the
list.</p>
<p>Prior to submitting a bug report, please check that the bug is
not already known. Many are. If you are not sure, just ask. If it
is new bug, make sure to include at least the following information
in your report:</p>
<ul>
<li>A desciption of what you think went wrong.</li>
<li>The HTML Tidy version (find it out by running <code>tidy
-v</code>) and operating system you are running.</li>
<li>The input, that exposes the bug.<br />
A small HTML document that reproduces the problem is best.</li>
<li>The configuration options you've used. Command line options
like<br />
<code>-asxml</code>, configuration files, etc. You may use
<code>tidy -show-config</code> to get an overview of the active
Tidy settings.</li>
<li>Your e-mail address for further questions and comments.</li>
</ul>
<p>These information are necessary to reproduce whatever is
failing, without them we cannot help you. Additional information -
and patches - are very welcome!</p>
<p><em>Please include only one bug per report.</em> Reports with
multiple bugs are less easy to track and some bugs may get
missed.</p>
</dd>
<dt><a name="feature" id="feature"></a>How to Submit A Feature
Request</dt>
<dd>
<p>If you want Tidy to do something new that it doesn't do today
(or stop doing something), then it is probably a feature
request.</p>
<p>The process for submitting a feature request is very similar to
bug requests. A different <a
href="http://sourceforge.net/tracker/?atid=390966&amp;group_id=27659">
tracker</a> is used on SourceForge to denote the difference in
subject matter.</p>
<p>As with bugs, please be sure that the feature has not already
been requested. If the feature has already requested, you can add
your comments to the feature request tracker, or send mail to the
<a href="mailto:html-tidy@w3.org">mailing list</a> indicating your
wish to also have the feature implemented. If the feature has not
already been requested, send the same information as for a bug
report, but place special emphasis on the desired output for a
given input, desired options, etc. - please be as specific as
possible about what you want Tidy to <em>do</em>.</p>
</dd>
<dt><a name="layout" id="layout"></a>How Do I Control the Output Layout?</dt>
<dd>
<p>There are three primary options that control how Tidy
formats your markup:</p>
<ul>
<li><a class="code"
href="quickref.html#indent">indent</a></li>
<li><a class="code"
href="quickref.html#indent-attributes">indent-attributes</a></li>
<li><a class="code"
href="quickref.html#vertical-space">vertical-space</a></li>
</ul>
<p>Briefly, <code>indent</code> sets the level of left-to-right indenting
and, somewhat, how often elements are put onto a new line. The options
are <code>yes</code>, <code>no</code>, and <code>auto</code>.
<code>indent-attributes</code> is a flag that, when set, tells Tidy to
put each attribute on a new line. <code>vertical-space</code> is a flag
that, when set, tells Tidy to add some empty lines for readability. The
default for all three is <code>no</code>. These options may be used in
any combination to control you you want your markup to look. The best
thing is to experiment a bit to see what you like. Be aware that
<code>indent yes</code> is deprecated for production use as it will
cause visual changes in most browsers.</p>
<p>To get Tidy <em>Classic</em> <code>--indent auto</code> layout, use the following options:</p>
<pre>
indent: auto
indent-attributes: no
vertical-space: yes
</pre>
<p>You can read about more <em>Pretty Print</em> options
<a href="quickref.html#PrettyPrintHeader">here</a>.</p>
</dd>
<dt><a name="version" id="version"></a>What Version of Tidy Should
I Use?</dt>
<dd>
<p>The current Source Forge builds are recommended. You can find these at
<a href="http://tidy.sourceforge.net">http://tidy.sourceforge.net</a>.
People continue to report examples where Tidy does not catch some
ill-formed HTML or, worse, generates ill-formed HTML. These cases have
been significantly reduced. That said, be sure to test Tidy with some
representative files from your environment.</p>
<p>For development work, use CVS directly on your development
system. For information on how to pull Tidy sources from <a
href="http://sourceforge.net/cvs/?group_id=27659">CVS</a>. This way
you can keep abreast of changes to Tidy and quickly resolve
conflicts.</p>
<p>For building a front end (e.g. GUI or language binding), the
simplest approach is to use TidyLib. For more information
about building and coding with TidyLib, see the <a
href="http://tidy.sourceforge.net/libintro.html">Introduction To TidyLib</a>.</p>
</dd>
<dt><a name="regression" id="regression">How Do I Run A
Regression Test?</a></dt>
<dd>
<p>You might ask, "Why should I run a regression test?". If you
are a Tidy user, you might want to compare a new version of Tidy
to the version you are currently running. This is a good idea
if you are using Tidy in production applications such as web
publishing. If you are a Tidy developer, it is a good idea to
run the regression test suite to make sure your fix or enhancement
doesn't add new bugs.</p>
<p>Detecting new bugs is easier said than done, because sometimes
they are subtle and can only be seen in browsers (or one particular
browser you don't even have). But you can catch most crashes and
many layout problems by running the test suite as described here.</p>
<p>The basic process is simple: run the test suite <strong>before</strong>
and <strong>after</strong> making changes to TidyLib and compare the output
markup and messages. Be aware that the test scripts for WinNT/2K/XP
(alltest.cmd) and Linux/Unix (testall.sh) place the output files in
<code>tidy/test/tmp</code>. If you forget to run the <strong>before</strong>
test, you can always download a binary from the <a
href="http://tidy.sourceforge.net/#binaries">Project Page</a>. If you
are not a TidyLib developer, you can download the <a
href="http://tidy.sourceforge.net/test/tidy_test.tgz">Test Suite</a>
directly. Here are the steps to evaluate the impact of a TidyLib change.</p>
<h3>For Windows</h3>
<p><strong>Before</strong> making changes:</p>
<pre>
C:\tidy\test> alltest.cmd
C:\tidy\test> ren tmp baseline
</pre>
<p><strong>After</strong> making changes and building Tidy:</p>
<pre>
C:\tidy\test> alltest.cmd
C:\tidy\test> windiff tmp baseline
</pre>
<h3>For Linux/Unix</h3>
<p><strong>Before</strong> making changes:</p>
<pre>
~/tidy/test$ ./testall.sh
~/tidy/test$ mv tmp baseline
</pre>
<p><strong>After</strong> making changes and building Tidy:</p>
<pre>
~/tidy/test$ ./testall.sh
~/tidy/test$ diff -u tmp baseline > diff.txt
</pre>
</dd>
<!--
<dt><a name="" id=""></a></dt>
<dd>
</dd>
<dt><a name="" id=""></a></dt>
<dd>
</dd>
-->
<!-- Save for future questions
<dt><a name="" id=""></a></dt>
<dd>
</dd>
-->
</dl>
</body>
</html>

BIN
htmldoc/grid.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

50
htmldoc/license.html Normal file
View file

@ -0,0 +1,50 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>HTML Tidy License</title>
</head>
<body>
<pre>
HTML Tidy
HTML parser and pretty printer
Copyright (c) 1998-2003 World Wide Web Consortium
(Massachusetts Institute of Technology, European Research
Consortium for Informatics and Mathematics, Keio University).
All Rights Reserved.
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be held
liable for any direct, indirect, special or consequential damages
arising out of any use of the software or documentation, even if
advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute
this source code, or portions hereof, documentation and executables,
for any purpose, without fee, subject to the following restrictions:
1. The origin of this source code must not be misrepresented.
2. Altered versions must be plainly marked as such and must
not be misrepresented as being the original source.
3. This Copyright notice may not be removed or altered from any
source or altered source distribution.
The copyright holders and contributing author(s) specifically
permit, without fee, and encourage the use of this source code
as a component for supporting the Hypertext Markup Language in
commercial products. If you use this source code in a product,
acknowledgment is not required but would be appreciated.
</pre>
</body>
</html>

554
htmldoc/pending.html Normal file
View file

@ -0,0 +1,554 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name="generator" content="HTML Tidy, see www.w3.org" />
<title>HTML TIDY - Notes on pending work</title>
<meta name="keywords"
content="HTML, validation, error correction, pretty-printing" />
<meta name="author" content="Dave Raggett &lt;dsr@w3.org&gt;" />
<style type="text/css">
body {
margin-left: 10%;
margin-right: 10%;
font-family: sans-serif
}
h1 { margin-left: -8% }
h2,h3,h4,h5,h6 { margin-left: -4% }
pre { color: green; font-weight: bold;
font-size: 80%; font-family: monospace}
em { font-style: italic; font-weight: bold }
strong { text-transform: uppercase; font-weight: bold }
.note {font-style: italic; color: rgb(192, 101, 101) }
//hr {text-align: center; width: 60% }
blockquote {
color: navy;
margin-left: 1%;
margin-right: 1%;
text-align: center;
font-family: "Comic Sans MS", "Times New Roman", serif
}
table {
font-family: sans-serif;
font-size: 80%;
background: rgb(255,255,153)
}
td {
font-size: 80%
}
.people {font-family: "Lucida Calligraphy", serif}
:link { color: rgb(0, 0, 153) }
:visited { color: rgb(153, 0, 153) }
:active { color: rgb(255, 0, 102) }
a :hover { color: rgb(0, 0, 255) }
</style>
<style type="text/css">
p.c1 {font-style: italic}
</style>
</head>
<body bgcolor="#FFFFFF" background="grid.gif" text="black"
link="navy" vlink="black" alink="red">
<h1>HTML TIDY - Notes on Pending Work</h1>
<p><a href="http://www.w3.org/People/Raggett">Dave Raggett</a> <a
href="mailto:dsr@w3.org">dsr@w3.org</a></p>
<p>This is a page where I am keeping the suggestions for
improvements or bug fixes. My current work load means that I
don't get much time to work on HTML Tidy, so I am interested in
offers of help!</p>
<h4>Public Email List for Tidy: &lt;<a
href="mailto:html-tidy@w3.org">html-tidy@w3.org</a>&gt;</h4>
<p>I have set up an archived mailing list devoted to Tidy. To
subscribe send an email to html-tidy-request@w3.org with the word
subscribe in the subject line (include the word unsubscribe if
you want to unsubscribe). The <a
href="http://lists.w3.org/Archives/Public/html-tidy/">archive</a>
for this list is accessible online. Please use this list to
report errors or enhancement requests.</p>
<h2>Things awaiting further attention</h2>
<ul>
<li>Support for BIG5 and ShiftJIS (Rick Jelliffe)</li>
<li>Stronger checking on which attributes appear on what
elements</li>
<li>Sorting attributes in a canonical order</li>
<li>Version checking for HTML 4.01 vs 4.0 (Tidy currently will
set the document type to 4.01 in preference to 4.0)</li>
<li>Noticing that the document isn't really XHTML if it isn't
wellformed, i.e. it lacks end tags and quotes on attribute
values</li>
<li>Converting &lt;font face="Symbol"&gt;a&lt;/font&gt; etc. to
the corresponding Unicode characters, when cleaning HTML.</li>
<li>link checking - this would involve some platform dependent
code as the network interface varies significantly from one
platform to the next.</li>
<li>When exporting Word2000 to Web page, there is a need for
smarter rules of thumb for working out whether the paragraph is a
bulletted or numbered list item, and determining the level of
nesting. Perhaps the style attribute holds the key? This tends to
include substrings like: "mso-list:l0 level1 lfo2;" and
"mso-list:l1 level1 lfo1;". Unfortunately, these aren't always
present, and I have yet to figure out a foolproof heuristic.</li>
</ul>
<p>I need to set up an index of precisely what attributes are
supported on each element. Right now, some elements check their
own attributes, whilst others are checked via default checks
defined for each attribute independently of the element. Until
this is done, you sometimes find that validation services
discovering errors unnoticed by Tidy itself.</p>
<p>Jelks Cabaniss asks: <i>Could Tidy be made to automatically
"clean" (FONTs to CSS) if the Strict DOCTYPE is requested? An
HTML or XHTML Strict document can't have FONT tags according to
the DTDs</i>. Jelks has a bunch of other good ideas such as
converting the bgcolor attribute over to CSS.</p>
<p>Adding an option to select slide transition effects. I would
also like to provide an optional feature for sorting attribute
values.</p>
<p>I am having problems with form elements as direct children of
tr or table. It is dangerous to create an implicit table cell,
and what is needed is a way to move the form element into the
next cell. If this can't be done an error needs to be raised
since Tidy will be stuck. On a separate note, Tidy is still
breaking lines between &lt;img&gt; and &lt;/a&gt; which in
Netscape shows as an underlined space. It's fine in IE.</p>
<p>Benjamin Holzman &lt;bah@orientation.com&gt; writes: I'm
wrapping tidy (release-date 2000.01.13) in some perl objects
(using SWIG), and CharEncoding being a global is a bit of a pain.
I was wondering what your thoughts would be on how to fix that.
The character encoding is already a property of struct Out; is
there any reason why making it part of struct StreamIn as well,
and perhaps setting that property in OpenInput, based on the
existing CharEncoding variable, wouldn't allow us to move
CharEncoding to be local to main?</p>
<p>Oh, in case you're curious about the API, here's a short
script using my wrappers to be an html to xhtml filter:</p>
<pre>
#!/usr/bin/perl
require tidy;
my $tidy = Tidy-&gt;new(*STDIN);
my $document = $tidy-&gt;parse;
$tidy-&gt;as_xhtml(*STDOUT);
</pre>
<p>Rick Parsons would like there to be a new wrap-attributes
option that can be used to suppress line wrapping within
attributes. There is already a similar option for JavaScript
literals.</p>
<p>Vijay Patil would like tidy -h to display options sorted
alphabetically.</p>
<p>Julian Reschke would like there to be an option to add the
xml:space="preserve" attribute to pre elements when outputting
xml.</p>
<p>Armando Asantos would like to use Tidy to produce a list of
URLs for images or hypertext links according to a config option.
This would be straightforward, but is a lower priority than bug
fixes etc.</p>
<p>Omri Traub would like an option to wrap the contents of style
and script elements in CDATA marked sections when converting to
XHTML. He is also interested in direct support for 16 bit
character file I/O.</p>
<p>Bertilo Wennergren notes:</p>
<blockquote>If I configure Tidy to "upgrade to style sheets", it
does so for a few things in my main document, but the code thus
created get error reports if I feed it back to Tidy. It turns out
that Tidy creates extra "class" attributes on tags that already
have "class" attributes set. This happens with this page:
&lt;http://www.concinnity.se/bertilow/index.htm&gt;.</blockquote>
<p>Randi Waki notes:</p>
<blockquote>
<p>If a quoted URL attribute value (e.g., href in &lt;a&gt;
elements) contains a line break, 13-Jan-2000 Tidy changes the
line break to a space while IE and Netscape discard the line
break. This can result in a broken link in the tidied
document.</p>
<p>I believe the following change fixes the problem. In lexer.c,
insert the following lines before line 2502:</p>
<pre>
/* discard line breaks in quoted URLs */
if (c == '\n' &amp;&amp; IsUrl(name))
continue;
/* existing line 2502 */ c = ' ';
</pre>
</blockquote>
<p>Stephen Reynolds would like Tidy to keep track of whether a
comment started on a new line and preserve this in the
output.</p>
<p>Terry Teague says:</p>
<blockquote>
<p>Sorry, I should have been more clear. Part of the problem is
the current HelpText() function in localize.c doesn't actually
reflect current reality.</p>
<p>You need to at least add the following line to HelpText()
:</p>
<pre>
tidy_out(out, " -version or -v show version\n");
</pre>
<p>And I suppose it should mention the use of the new
"--&lt;config options&gt;" type syntax.</p>
<p>Regards, Terry</p>
</blockquote>
<p>John Russel notes:</p>
<pre>
what i wonder is
1] does the specification indicate these are WRONG
2] if so why do they pass thru tidy ....
is url syntax such a can of worms that it is left to user
to check .......
CASE 1: misuse of slash for folders
site had background="pics\fancy.jpg"
instead of "pics/fancy.jpg"
CASE 2: spaces in filename
site had href="coin album.html"
instead of "coin%20album.html"
</pre>
<p>Andre Stechert would like a way to prevent Tidy from
"cleaning" newly declared elements which don't have any content
but do have end tags, see his mail of 17th January 2000</p>
<p>Todd Clark would like to use Tidy with Microsoft's WebClass
tags. Unfortunately these include unusual characters in the tag
names such as @ which Tidy objects to, for instance:</p>
<pre>
&lt;WC@DOMAINNAME&gt;test.com&lt;/WC@DOMAINNAME&gt;
</pre>
<p>Perhaps it makes sense to offer an option to make Tidy less
picky about what characters it accepts in tag names. Or perhaps
"WebClass: yes".</p>
<p>Jelks Cabaniss suggests an option to control dropping of empty
elements, e.g. according to what attributes they have.</p>
<p>Paavo Hartikainen writes:</p>
<blockquote>
<p>Tidy always expands '&amp;' to '&amp;' even if I have
'quote-ampersand: no' defined in configuration file. This is not
a good thing to do for URLs that have '&amp;' characters in them.
OS is Debian GNU/Linux 2.1 SPARC. Same thing happens on Alpha.
Other architectures I have not tried.</p>
<p>My configuration looks like this:</p>
<pre>
char-encoding: latin1
error-file: ./errors
indent-spaces: 2
logical-emphasis: yes
output-xhtml: yes
quiet: no
quote-ampersand: no
show-warnings: yes
tidy-mark: yes
wrap: 78
wrap-attributes: no
write-back: yes
keep-time: yes
</pre>
</blockquote>
<p>Paul White reports that Tidy isn't recognizing HTML 3.2 when
the doctype is "-//W3C//DTD HTML 3.2 Final//EN" (as per the REC),
and similarly for HTML 4.01. This would appear to call for a
change to the table of names in lexer.c.</p>
<p>Stuart Hungerford would like Tidy to detect and fix duplicate
attributes e.g. multiple class attributes. Celeste Suliin Burris
would like Tidy to replace spaces in URLs by %20 as some versions
of Netscape "croak big time" on this. Denis Kokarev also wants
Tidy to remove duplicate attributes when the values are the same.
This apparently stops XSLT from working. Brian Schweitzer notes
that Tidy adds a 2nd class attribute rather than merging the
classes into a space separated list.</p>
<p>Bertilo Wennergren writes: Tidy seems not to recognize frame
elements with a closing "/". It actually removes them. Try his <a
href="http://www.concinnity.se/bertilow/pmeg/pmeg9/k_bazo.htm">example</a>.
Tidy can produce XHTML Frameset docs, but when fed them back</p>
<p>again it cries foul.</p>
<p>Jose Manuel Cerqueira Esteves notes:</p>
<pre>
I've used `tidy' to convert a few HTML 4.0 files to XHTML 1.0 and noticed
a problem when dealing with constructs like
&lt;small&gt;&lt;small&gt;some text&lt;/small&gt;&lt;/small&gt;
First, `tidy' acts as if the second "&lt;small&gt;" was meant as a closing tag:
Warning: "&lt;small&gt; is probably intended as &lt;/small&gt;"
Then it trims the resulting empty &lt;small&gt;&lt;/small&gt;:
Warning: trimming empty &lt;small&gt;
And finally both remaining closing tags ("&lt;/small&gt;"), now spurious,
are removed:
Warning: discarding unexpected &lt;/small&gt;
Warning: discarding unexpected &lt;/small&gt;
It would be convenient to have at least some `tidy' option to prevent this
from happening (or perhaps some different heuristics?).
</pre>
<p>Robbert Hans Baron would like to see Tidy warning about
duplicate attributes and fixing these when the values are
identical.</p>
<p>Jutta Wrage notes that: When parsing HTML 3.2 Pages, tidy
doesn't accept textareas in forms correctly. The HTML Reference
specification (HTML 3.2 Final) allows: name, rows and cols, but
upon seeing these Tidy thinks the document is 4.0.</p>
<p>Matthew Brealey notes that a heading start tag is coerced to
an end heading tag when the end tag is missing. This is
deliberate, but perhaps not the best heuristic.</p>
<p>HIYAMA Masayuki notes that Tidy should set the encoding
attribute to match the language encoding, e.g. ?xml version="1.0"
encoding="iso-2022-jp"?&gt;&lt;.</p>
<p>Mark Modrall has extended Tidy to support selectively
stripping out listed tags and attributes, see his email of March
14th.</p>
<p>Yong Taek Bae notes that with the omit end tags option Tidy
omits the body tag even if it has attributes. This is an
error.</p>
<p>Tapio Markula reports that Tidy is incorrectly replacing
accented characters in script elements by entities. The script
element (in HTML but not XHTML) is CDATA and as such entities
won't be expanded. This bug needs to be fixed along with the
support for CDATA sections.</p>
<p>Terrill Bennett reports tidy crashing when producing slides,
and when the -i option has been set. He later added the crash
occurs when the page doesn't include an h1 element. See
Terrill-Bennett-11mar00.txt.</p>
<p>Stephen Lewis notes that if an &lt;hr&gt; element is present
in the head before the title element, then Tidy gets confused and
adds in a spurious extra empty title element. This would be
avoided if Tidy could move the hr into the body before the body
element is encountered. This raises a number of problems for
instance working out when to copy in attributes from an explicit
body element.</p>
<p>Carl Osterly would like Tidy to avoid breaking lines before or
after the = sign in attribute values when this is practical.
Perhaps a simple rule of thumb could be used to decide this?</p>
<p>Rick H Wesson notes that Tidy crashes on CDATA marked sections
when parsing XML.</p>
<p>Luigi Federici would like an option to set the DTD URI for XML
or XHTML.</p>
<p>Mat Sander notes: If I have php code the indentation behaves
strange. Repeated tidying php content and end tag indented one
level extra for each time. The result ends up something like
this:</p>
<pre>
...
&lt;?php
$r=0;
?&lt;
...
I have the fillowing config file for Tidy:
---
tidy-mark: no
markup: yes
wrap: 0
indent: auto
output-xml: no
output-xhtml: yes
doctype: loose
char-encoding: latin1
quote-marks: yes
assume-xml-procins: yes
word-2000: yes
clean: yes
logical-emphasis: yes
drop-empty-paras: yes
enclose-text: yes
fix-bad-comments: yes
alt-text: .
write-back: bool
keep-time: yes
show-warnings: no
quiet: yes
split: no
---
Best Regards,
Mats-Olof Sander
</pre>
<p>Don Hasson notes that if you make a mistake and leave off the
ending "/" in the &lt;title&gt; tag, tidy will generate an extra
set of &lt;title&gt;s.</p>
<p>Example:</p>
<pre>
&lt;html&gt;
&lt;head&gt;&lt;title&gt;No end here&lt;title&gt;&lt;/head&gt;
&lt;body&gt;
Empty
&lt;/body&gt;
&lt;/html&gt;
</pre>
<p>produces this:</p>
<pre>
&lt;html&gt;
&lt;head&gt;
&lt;title&gt;No end here&lt;/title&gt;
&lt;title&gt;&lt;/title&gt;
&lt;/head&gt;
&lt;body&gt;
Empty
&lt;/body&gt;
&lt;/html&gt;
</pre>
<p>Jeff Wilkinson would like the HTML Tidy page to include
internal anchors so that he can link directly to the appropriate
sections.</p>
<p>Peter Vince would like to be able to clean presentation
attributes on the body element, as well as translating b and i to
span.</p>
<p>Dave Bryan and Mathew Brealey would like there to be a way to
suppress the default handling of inline elements in favor of
simply inserting the appropriate end tag when encountering an
element that isn't allowed in an inline context. The default
behavior replicates the rendering on existing browsers but can
cause problems for hand editors.</p>
<p>Dave Bryan notes that tidy isn't updating the column position
when parsing attributes.</p>
<p>Can Tidy track when a line break occurs after a PI or comment
and reproduce this in the output? This idea occurred to me after
reading a comment from Brad Stowers.</p>
<p>One interesting suggestion is to make some of Tidy's rules of
thumb sensitive to the program that generated the markup as
indicated by the meta element. This would allow for greater
robustness in how the rules operate.</p>
<p>Dave Bryan would like the quiet mode to be tweaked to suppress
the general info at the end of the report. see
Dave-Bryan-24mar00.txt.</p>
<p>Erik Rossen would like an option to suppress line wrap within
tags, so that the tag is always on the same line regardless of
the number and length of the attributes.</p>
<p>Dan Satria suggest that the clean mechanism check to see if
there are any existing matching style rules before adding new
ones.</p>
<p>Zoltan Hawryluk suggests mapping the Netscape layer tag into
the equivalent CSS positioning syntax.</p>
<p>Jim Walker says Tidy doesn't correctly report errors such as
<tt>&lt;/&lt;/head&gt;</tt>.</p>
<p>Tidy's slide feature: see Johannes-Poutre-12jul00.txt</p>
<p>Carole Mah suggests Tidy should recover from multiple class
attributes on the same element.</p>
<h2>Other ideas</h2>
<ul>
<li>Recursion through subdirectories, so you can fix up your
entire web site at one go. This assumes I can find a way that is
portable across a wide range of platforms!</li>
<li>Support for W3C's <a
href="http://www.w3.org/TR/REC-DOM-Level-1/">Document Object
Model</a> (DOM) level one.</li>
<li>Full validation of all attribute values.</li>
<li>Mapping Unicode bidi control characters to HTML tags.</li>
<li>Full support for parsing XML (still somewhat limited).</li>
<li>How to say which XML elements should be printed
"inline".</li>
<li>Acting on the XML encoding attribute, e.g.
&lt;?xml&#160;encoding="iso-8859-1"&gt;</li>
<li>Improved mapping from HTML presentation attributes/elements
to CSS.</li>
<li>Improved support for <a
href="http://java.sun.com/products/jsp/">JSP</a> (Java Server
pages)</li>
<li>Ugly print option which removes all optional whitespace</li>
</ul>
</body>
</html>

247
htmldoc/quickref-html.xsl Normal file
View file

@ -0,0 +1,247 @@
<?xml version="1.0"?>
<!--
For generating the `quickref.html` web page from output of
`tidy -xml-config`
(c) 2005 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Written by Charles Reitzel and Jelks Cabaniss
CVS Info :
$Author: arnaud02 $
$Date: 2005/04/13 11:10:58 $
$Revision: 1.2 $
-->
<xsl:stylesheet version="1.0"
xmlns="http://www.w3.org/1999/xhtml"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes"
encoding="us-ascii"
omit-xml-declaration="yes"
doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" />
<xsl:template match="/">
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>HTML Tidy Configuration Options Quick Reference</title>
<link type="text/css" rel="stylesheet" href="tidy.css" />
</head>
<body>
<h1 id="top">Quick Reference</h1>
<h2>HTML Tidy Configuration Options</h2>
<p><em>Generated automatically with HTML Tidy released
on <xsl:value-of select="config/@version" />.</em></p>
<p><a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a><br />
<a class="h3" href="#DiagnosticsHeader">Diagnostics</a><br />
<a class="h3" href="#PrettyPrintHeader">Pretty Print</a><br />
<a class="h3" href="#EncodingHeader">Character Encoding</a><br />
<a class="h3" href="#MiscellaneousHeader">Miscellaneous</a></p>
<xsl:call-template name="link-section" />
<xsl:call-template name="detail-section" />
</body>
</html>
</xsl:template>
<!-- Named Templates: -->
<xsl:template name="link-section">
<table summary="Tidy Options Quick Reference Header Section" border="0"
cellpadding="3" cellspacing="0">
<colgroup>
<col width="33%" />
<col width="33%" />
<col width="33%" />
</colgroup>
<xsl:call-template name="links">
<xsl:with-param name="class">markup</xsl:with-param>
<xsl:with-param name="header">HTML, XHTML, XML</xsl:with-param>
<xsl:with-param name="headerID">MarkupHeader</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="links">
<xsl:with-param name="class">diagnostics</xsl:with-param>
<xsl:with-param name="header">Diagnostics</xsl:with-param>
<xsl:with-param name="headerID">DiagnosticsHeader</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="links">
<xsl:with-param name="class">print</xsl:with-param>
<xsl:with-param name="header">Pretty Print</xsl:with-param>
<xsl:with-param name="headerID">PrettyPrintHeader</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="links">
<xsl:with-param name="class">encoding</xsl:with-param>
<xsl:with-param name="header">Character Encoding</xsl:with-param>
<xsl:with-param name="headerID">EncodingHeader</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="links">
<xsl:with-param name="class">misc</xsl:with-param>
<xsl:with-param name="header">Miscellaneous</xsl:with-param>
<xsl:with-param name="headerID">MiscellaneousHeader</xsl:with-param>
</xsl:call-template>
</table>
</xsl:template>
<xsl:template name="detail-section">
<table summary="Tidy Options Quick Reference Detail Section" border="0"
cellpadding="3" cellspacing="0">
<xsl:call-template name="reference">
<xsl:with-param name="class">markup</xsl:with-param>
<xsl:with-param name="header">HTML, XHTML, XML</xsl:with-param>
<xsl:with-param name="headerID">MarkupReference</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="reference">
<xsl:with-param name="class">diagnostics</xsl:with-param>
<xsl:with-param name="header">Diagnostics</xsl:with-param>
<xsl:with-param name="headerID">DiagnosticsReference</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="reference">
<xsl:with-param name="class">print</xsl:with-param>
<xsl:with-param name="header">Pretty Print</xsl:with-param>
<xsl:with-param name="headerID">PrettyPrintReference</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="reference">
<xsl:with-param name="class">encoding</xsl:with-param>
<xsl:with-param name="header">Character Encoding</xsl:with-param>
<xsl:with-param name="headerID">EncodingReference</xsl:with-param>
</xsl:call-template>
<xsl:call-template name="reference">
<xsl:with-param name="class">misc</xsl:with-param>
<xsl:with-param name="header">Miscellaneous</xsl:with-param>
<xsl:with-param name="headerID">MiscellaneousReference</xsl:with-param>
</xsl:call-template>
</table>
</xsl:template>
<xsl:template name="links">
<xsl:param name="class"/>
<xsl:param name="header"/>
<xsl:param name="headerID"/>
<tr valign="bottom">
<td class="h3" colspan="2" id="{$headerID}">
<xsl:value-of select="$header"/> Options</td>
<td valign="top"><a href="#top">Top</a></td>
</tr>
<xsl:call-template name="ClassHeaders" />
<xsl:for-each select="/config/option[@class=$class]">
<xsl:sort select="name" order="ascending" />
<tr>
<td><a href="#{name}"><xsl:value-of select="name"/></a></td>
<td><xsl:apply-templates select="type"/></td>
<td><xsl:choose>
<xsl:when test="string-length(default) &gt; 0 ">
<xsl:apply-templates select="default" />
</xsl:when>
<xsl:otherwise>
<strong>-</strong>
</xsl:otherwise>
</xsl:choose>
</td>
</tr>
</xsl:for-each>
<tr valign="bottom">
<td colspan="3">&#160;</td>
</tr>
</xsl:template>
<xsl:template name="reference">
<xsl:param name="class"/>
<xsl:param name="header"/>
<xsl:param name="headerID"/>
<tr>
<td>&#160;</td>
</tr>
<tr valign="bottom">
<td valign="top" colspan="2" class="h2" id="{$headerID}">
<xsl:value-of select="$header"/> Options Reference
</td>
</tr>
<tr>
<td>&#160;</td>
</tr>
<xsl:for-each select="/config/option[@class=$class]">
<xsl:sort select="name" order="ascending" />
<tr>
<td class="tabletitle" valign="top" id="{name}">
<xsl:value-of select="name"/>
</td>
<td class="tabletitlelink" valign="top" align="right">
<a href="#top">Top</a></td>
</tr>
<tr>
<td valign="top">Type: <strong><xsl:value-of
select="type"/></strong><br />
<xsl:choose>
<xsl:when test="string-length(default) &gt; 0">
Default: <strong><xsl:apply-templates select="default" /></strong>
</xsl:when>
<xsl:otherwise>
Default: <strong>-</strong>
</xsl:otherwise>
</xsl:choose>
<xsl:choose>
<xsl:when test="string-length(example) &gt; 0">
<br />Example: <strong><xsl:apply-templates
select="example"/></strong>
</xsl:when>
<xsl:otherwise>
<br />Example: <strong>-</strong>
</xsl:otherwise>
</xsl:choose>
</td>
<td align="right" valign="top">
<xsl:for-each select="seealso">
<a href="#{.}"><xsl:apply-templates select="." /></a>
<xsl:if test="position() != last()">
<br />
</xsl:if>
</xsl:for-each>
</td>
</tr>
<tr>
<td colspan="2"><xsl:apply-templates select="description"/></td>
</tr>
<tr>
<td>&#160;</td>
</tr>
</xsl:for-each>
</xsl:template>
<xsl:template name="ClassHeaders">
<tr>
<td class="tabletitle">Option</td>
<td class="tabletitle">Type</td>
<td class="tabletitle">Default</td>
</tr>
</xsl:template>
<!-- Regular Templates: -->
<xsl:template match="a | code | em | strong | br">
<xsl:element name="{local-name(.)}">
<xsl:copy-of select="@* | node()" />
</xsl:element>
</xsl:template>
</xsl:stylesheet>

1771
htmldoc/release-notes.html Normal file

File diff suppressed because it is too large Load diff

329
htmldoc/tidy.css Normal file
View file

@ -0,0 +1,329 @@
/* 1st Style ignored by Netscape */
td.dummy, font.dummy, .dummy, a:link.dummy, a:visited.dummy, a:active.dummy
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 16pt;
color: #336699;
text-decoration: none;
font-weight: normal
}
body
{
margin-left: 10%;
margin-right: 10%;
font-family: sans-serif;
background-color: #FFFFFF
}
/* Blue TITLE */
td.title, font.title, .title, a:link.title, a:visited.title, a:active.title
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 16pt;
color: #336699;
text-decoration: none;
font-weight: normal
}
/* BODY TEXT */
td.text, font.text, .text, a:link.text, a:visited.text, a:active.text
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 9pt;
color: #000000;
text-decoration: none;
font-weight: normal
}
/* BOLD BODY TEXT */
td.textbold, font.textbold, .textbold, a:link.textbold, a:visited.textbold, a:active.textbold
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 9pt;
color: #000000;
text-decoration: none;
font-weight: bold
}
/* BOLD BODY TEXT LINK WITH UNDERLINE*/
td.textboldlink, font.textboldlink, .textboldlink, a:link.textboldlink, a:visited.textboldlink, a:active.textboldlink
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 9pt;
color: #000000;
font-weight: bold
}
/* SMALL BODY TEXT */
td.smtext, font.smtext, .smtext, a:link.smtext, a:visited.smtext, a:active.smtext
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 8pt;
color: #000000;
text-decoration: none;
font-weight: normal
}
/* SMALL BOLD BODY TEXT */
td.smtextbold, font.smtextbold, .smtextbold, a:link.smtextbold, a:visited.smtextbold, a:active.smtextbold
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 8pt;
color: #000000;
text-decoration: none;
font-weight: bold
}
/* TITLES
td.title, font.title, .title, a:link.title, a:visited.title, a:active.title
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 12pt;
color: #CC3300;
text-decoration: none;
font-weight: bold
}
*/
/* SUBTITLES */
td.subtitle, font.subtitle, .subtitle, a:link.subtitle, a:visited.subtitle, a:active.subtitle
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 11pt;
color: #000000;
text-decoration: none;
font-weight: bold
}
/* LEGAL TEXT */
td.legal, font.legal, .legal, a:link.legal, a:visited.legal, a:active.legal
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 8pt;
color: #000000;
text-decoration: none;
font-weight: normal
}
td.legallink, font.legallink, .legallink, a:link.legallink, a:visited.legallink, a:active.legallink
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 8pt;
color: #CC3300;
font-weight: normal
}
/* RED TEXT */
td.textred, font.textred, .textred, a:link.textred, a:visited.textred, a:active.textred
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #CC3300;
text-decoration: none;
font-weight: normal
}
/* RED TEXT BOLD*/
td.textredbold, font.textredbold, .textredbold, a:link.textredbold, a:visited.textredbold, a:active.textredbold
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #CC3300;
text-decoration: none;
font-weight: bold
}
/* LINKS */
td.link, font.link, .link, a:link.link, a:visited.link, a:active.link
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #3366CC;
font-weight: normal
}
td.tabletitlelink, font.tabletitlelink, .tabletitlelink, a:link.tabletitlelink, a:visited.tabletitlelink, a:active.tabletitlelink
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
background-color: #e9e9d3;
color: #000000;
}
/* TABLE TITLES */
td.tabletitle, font.tabletitle, .tabletitle, a:link.tabletitle, a:visited.tabletitle, a:active.tabletitle
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #336699;
background-color: #e9e9d3;
/* text-decoration: none; */
font-weight: bold
}
/* TABLE CELL */
td.cell, tr.cell, font.cell, .cell, a:link.cell, a:visited.cell, a:active.cell
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 12pt;
color: #000000;
font-weight: normal;
/* background-color: #e9e9d3 */
background-color: #f5f5f5
}
/* SHADED TABLE CELL */
td.shaded, tr.shaded, font.shaded, .shaded, a:link.shaded, a:visited.shaded, a:active.shaded
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 12pt;
color: #000000;
font-weight: normal;
background-color: #f5f5f5
}
/* GLOSSARY TERM */
td.term, font.term, .term, a:link.term, a:visited.term, a:active.term
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
font-style: normal;
color: #000000;
text-decoration: none;
font-weight: normal
}
/* ELEMENT TAGS */
ul
{
font-family: Arial, Helvetica, sans-serif;
font-size: 10pt;
font-style: normal;
font-weight: normal
}
li
{
font-family: Arial, Helvetica, sans-serif;
font-size: 10pt;
font-style: normal;
font-weight: normal
}
a:link.h1, a:visited.h1, .h1
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 12pt;
color: #0066CC
}
a:active.h1
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 12pt;
font-weight: bold;
color: #0066CC
}
h1
{
margin-left: -8%;
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 12pt;
color: #0066CC
}
.h2
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 11pt;
/* font-weight: bold; */
color: #000000
}
h2
{
margin-left: -4%;
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 11pt;
/* font-weight: bold; */
color: #000000
}
A:link.h3, A:visited.h3, .h3
{ font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #000000;
font-weight: bold
}
A:active.h3
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #000000;
font-weight: bold
}
h3
{
margin-left: -4%;
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
font-weight: bold;
color: #000000
}
h4
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 9pt;
font-weight: bold;
color: #000000
}
.code, A:active.code, A:link.code, A:visited.code
{
font-family: "Courier New", Courier, monospace;
}
.abstract
{
font-style : italic;
}
p
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
font-style: normal
}
td
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
font-style: normal
}
/* LINKS */
a:link, a:active
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #3366CC;
font-weight: normal
}
a:visited
{
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
font-size: 10pt;
color: #333366;
font-weight: normal
}
code {
/* use browser/user default for `font-family` */
font-weight: bold;
color: brown;
background: transparent;
}

BIN
htmldoc/tidy.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 244 B

423
htmldoc/tidy1.xsl Normal file
View file

@ -0,0 +1,423 @@
<?xml version="1.0"?>
<!--
For generating the `tidy.1` man page from the
output of `tidy -xml-help` and `tidy -xml-config`
(c) 2005-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Written by Jelks Cabaniss and Arnaud Desitter
CVS Info :
$Author: arnaud02 $
$Date: 2007/02/01 12:25:21 $
$Revision: 1.8 $
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:strip-space elements="description" />
<xsl:output method="text" />
<!--
The default template match is to the document passed on the
command line to the XSLT processor, currently "tidy-help.xml".
For the detailed config options section however, the template
match is to the file "tidy-config.xml". This is captured in
the $CONFIG variable, declared here:
-->
<xsl:variable name="CONFIG" select="document('tidy-config.xml')"/>
<!-- Main Template: -->
<xsl:template match="/">
<xsl:call-template name="header-section" />
<xsl:call-template name="cmdline-section" />
<xsl:call-template name="config-section" />
<xsl:call-template name="manpage-see-also-section" />
</xsl:template>
<!-- Named Templates: -->
<xsl:template name="header-section">
<xsl:text/>.\" tidy man page for the Tidy Sourceforge project
.TH tidy 1 "$Date: 2007/02/01 12:25:21 $" "HTML Tidy <xsl:value-of select="cmdline/@version" />" "User commands"
</xsl:template>
<xsl:template name="cmdline-section">
.SH NAME
\fBtidy\fR - validate, correct, and pretty-print HTML files
.br
(version: <xsl:value-of select="cmdline/@version" />)
.SH SYNOPSIS
\fBtidy\fR [option ...] [file ...] [option ...] [file ...]
.SH DESCRIPTION
Tidy reads HTML, XHTML and XML files and writes cleaned up markup. For HTML variants, it detects and corrects many common coding errors and strives to produce visually equivalent markup that is both W3C compliant and works on most browsers. A common use of Tidy is to convert plain HTML to XHTML. For generic XML files, Tidy is limited to correcting basic well-formedness errors and pretty printing.
.LP
If no input file is specified, Tidy reads the standard input. If no output file is specified, Tidy writes the tidied markup to the standard output. If no error file is specified, Tidy writes messages to the standard error.
For command line options that expect a numerical argument, a default is assumed if no meaningful value can be found.
.SH OPTIONS
<xsl:call-template name="show-cmdline-options" />
.SH USAGE
.LP
Use \fB--\fR\fIoptionX valueX\fR for the detailed configuration option "optionX" with argument "valueX". See also below under \fBDetailed Configuration Options\fR as to how to conveniently group all such options in a single config file.
.LP
Input/Output default to stdin/stdout respectively. Single letter options apart from \fB-f\fR and \fB-o\fR may be combined as in:
.LP
.in 1i
\fBtidy -f errs.txt -imu foo.html\fR
.LP
For further info on HTML see \fIhttp://www.w3.org/MarkUp\fR.
.LP
For more information about HTML Tidy, visit the project home page at \fIhttp://tidy.sourceforge.net\fR. Here, you will find links to documentation, mailing lists (with searchable archives) and links to report bugs.
.SH ENVIRONMENT
.TP
.B HTML_TIDY
Name of the default configuration file. This should be an absolute path, since you will probably invoke \fBtidy\fR from different directories. The value of HTML_TIDY will be parsed after the compiled-in default (defined with -DTIDY_CONFIG_FILE), but before any of the files specified using \fB-config\fR.
.SH "EXIT STATUS"
.IP 0
All input files were processed successfully.
.IP 1
There were warnings.
.IP 2
There were errors.
</xsl:template>
<xsl:template name="config-section">
.SH ______________________________
.SH " "
.SH "DETAILED CONFIGURATION OPTIONS"
This section describes the Detailed (i.e., "expanded") Options, which may be specified by preceding each option with \fB--\fR at the command line, followed by its desired value, OR by placing the options and values in a configuration file, and telling tidy to read that file with the \fB-config\fR standard option.
.SH SYNOPSIS
\fBtidy --\fR\fIoption1 \fRvalue1 \fB--\fIoption2 \fRvalue2 [standard options ...]
.br
\fBtidy -config \fIconfig-file \fR[standard options ...]
.SH WARNING
The options detailed here do not include the "standard" command-line options (i.e., those preceded by a single '\fB-\fR') described above in the first section of this man page.
.SH DESCRIPTION
A list of options for configuring the behavior of Tidy, which can be passed either on the command line, or specified in a configuration file.
.LP
A Tidy configuration file is simply a text file, where each option
is listed on a separate line in the form
.LP
.in 1i
\fBoption1\fR: \fIvalue1\fR
.br
\fBoption2\fR: \fIvalue2\fR
.br
etc.
.LP
The permissible values for a given option depend on the option's \fBType\fR. There are five types: \fIBoolean\fR, \fIAutoBool\fR, \fIDocType\fR, \fIEnum\fR, and \fIString\fR. Boolean types allow any of \fIyes/no, y/n, true/false, t/f, 1/0\fR. AutoBools allow \fIauto\fR in addition to the values allowed by Booleans. Integer types take non-negative integers. String types generally have no defaults, and you should provide them in non-quoted form (unless you wish the output to contain the literal quotes).
.LP
Enum, Encoding, and DocType "types" have a fixed repertoire of items; consult the \fIExample\fR[s] provided below for the option[s] in question.
.LP
You only need to provide options and values for those whose defaults you wish to override, although you may wish to include some already-defaulted options and values for the sake of documentation and explicitness.
.LP
Here is a sample config file, with at least one example of each of the five Types:
.LP
\fI
// sample Tidy configuration options
output-xhtml: yes
add-xml-decl: no
doctype: strict
char-encoding: ascii
indent: auto
wrap: 76
repeated-attributes: keep-last
error-file: errs.txt
\fR
.LP
Below is a summary and brief description of each of the options. They are listed alphabetically within each category. There are five categories: \fIHTML, XHTML, XML\fR options, \fIDiagnostics\fR options, \fIPretty Print\fR options, \fICharacter Encoding\fR options, and \fIMiscellaneous\fR options.
.LP
.SH OPTIONS
<xsl:call-template name="show-config-options" />
</xsl:template>
<xsl:template name="show-cmdline-options">
.SS File manipulation
<xsl:call-template name="cmdline-detail">
<xsl:with-param name="category">file-manip</xsl:with-param>
</xsl:call-template>
.SS Processing directives
<xsl:call-template name="cmdline-detail">
<xsl:with-param name="category">process-directives</xsl:with-param>
</xsl:call-template>
.SS Character encodings
<xsl:call-template name="cmdline-detail">
<xsl:with-param name="category">char-encoding</xsl:with-param>
</xsl:call-template>
.SS Miscellaneous
<xsl:call-template name="cmdline-detail">
<xsl:with-param name="category">misc</xsl:with-param>
</xsl:call-template>
</xsl:template>
<xsl:template name="cmdline-detail">
<!--
For each option in one of the 3 categories/classes, provide its
1. names
2. description
3. equivalent configuration option
-->
<xsl:param name="category" />
<xsl:for-each select='/cmdline/option[@class=$category]'>
<xsl:text>
.TP
</xsl:text>
<xsl:call-template name="process-names" />
<xsl:text>
</xsl:text>
<xsl:apply-templates select="description" />
<xsl:text>
</xsl:text>
<xsl:call-template name="process-eqconfig" />
</xsl:for-each>
</xsl:template>
<xsl:template name="process-names">
<!-- Used only in the cmdline section -->
<xsl:for-each select="name">
<xsl:text />\fB<xsl:value-of select="." />\fR<xsl:text />
<xsl:if test="position() != last()">
<xsl:text>, </xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:template>
<xsl:template name="process-eqconfig">
<!-- Used only in the cmdline section -->
<xsl:if test="string-length(eqconfig) &gt; 0">
<xsl:for-each select="eqconfig">
<xsl:text>(\fI</xsl:text>
<xsl:value-of select="." />
<xsl:text>\fR)</xsl:text>
</xsl:for-each>
</xsl:if>
</xsl:template>
<xsl:template name="show-config-options">
<!-- Used only in the cmdline section -->
.SS HTML, XHTML, XML options:
<xsl:call-template name="config-detail">
<xsl:with-param name="category">markup</xsl:with-param>
</xsl:call-template>
.SS Diagnostics options:
<xsl:call-template name="config-detail">
<xsl:with-param name="category">diagnostics</xsl:with-param>
</xsl:call-template>
.SS Pretty Print options:
<xsl:call-template name="config-detail">
<xsl:with-param name="category">print</xsl:with-param>
</xsl:call-template>
.SS Character Encoding options:
<xsl:call-template name="config-detail">
<xsl:with-param name="category">encoding</xsl:with-param>
</xsl:call-template>
.SS Miscellaneous options:
<xsl:call-template name="config-detail">
<xsl:with-param name="category">misc</xsl:with-param>
</xsl:call-template>
</xsl:template>
<!--
Note that any templates called implicitly or explicitly
from the "config-detail" template below will match on
the document referred to by the $CONFIG variable, i.e.,
the file "tidy-config.xml", created by running
tidy -xml-config > tidy-config.xml
The $CONFIG variable is set at the top level of this
stylesheet.
-->
<xsl:template name="config-detail">
<!--
For each option in one of the 5 categories/classes, provide its
1. name
2. type
3. default (if any)
4. example (if any)
5. seealso (if any)
6. description
-->
<xsl:param name="category" />
<xsl:for-each select='$CONFIG/config/option[@class=$category]'>
<xsl:sort select="name" order="ascending" />
.TP
\fB<xsl:apply-templates select="name" />\fR
Type: \fI<xsl:apply-templates select="type" />\fR
.br
<xsl:call-template name="provide-default" />
.br
<xsl:call-template name="provide-example" />
<xsl:text>
</xsl:text>
<xsl:apply-templates select="description" />
<xsl:call-template name="seealso" />
</xsl:for-each>
</xsl:template>
<!-- Used only in the config options section: -->
<xsl:template name="seealso">
<xsl:if test="seealso">
<xsl:text>
</xsl:text>
.rj 1
\fBSee also\fR: <xsl:text />
<xsl:for-each select="seealso">
<xsl:text />\fI<xsl:value-of select="." />\fR<xsl:text />
<xsl:if test="position() != last()">
<xsl:text>, </xsl:text>
</xsl:if>
</xsl:for-each>
</xsl:if>
</xsl:template>
<!-- Used only in the config options section: -->
<xsl:template name="provide-default">
<!--
Picks up the default from the XML. If the `default` element
doesn't exist, or it's empty, a single '-' is provided.
-->
<xsl:choose>
<xsl:when test="string-length(default) &gt; 0 ">
<xsl:text />Default: \fI<xsl:apply-templates
select="default" />\fR<xsl:text />
</xsl:when>
<xsl:otherwise>
<xsl:text />Default: \fI-\fR<xsl:text />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Used only in the config options section: -->
<xsl:template name="provide-example">
<!--
By default, doesn't output examples for String types (mirroring the
quickref page). But for *any* options in the XML instance that
have an `example` child, that example will be used in lieu of a
stylesheet-provided one. (Useful e.g. for `repeated-attributes`).
-->
<xsl:choose>
<xsl:when test="string-length(example) &gt; 0">
<xsl:text />Example: \fI<xsl:apply-templates
select="example" />\fR<xsl:text />
</xsl:when>
<xsl:otherwise>
<xsl:text />Default: \fI-\fR<xsl:text />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Called from the templates below matching `code`, `em`, `strong`: -->
<xsl:template name="escape-backslash">
<!--
Since backslashes are "special" to the *roff processors used
to generate man pages, we need to escape backslash characters
appearing in content with another backslash.
-->
<xsl:choose>
<xsl:when test="contains(.,'\')">
<xsl:value-of select=
"concat( substring-before(.,'\'), '\\', substring-after(.,'\') )" />
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates />
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Appears at the bottom of the man page: -->
<xsl:template name="manpage-see-also-section">
.SH "SEE ALSO"
HTML Tidy Project Page at \fIhttp://tidy.sourceforge.net\fR
.SH AUTHOR
\fBTidy\fR was written by Dave Raggett &lt;\fIdsr@w3.org\fR&gt;, and is now maintained and developed by the Tidy team at \fIhttp://tidy.sourceforge.net/\fR. It is released under the \fIMIT Licence\fR.
.LP
Generated automatically with HTML Tidy released on <xsl:value-of select="cmdline/@version" />.
</xsl:template>
<!-- Regular Templates: -->
<xsl:template match="description">
<xsl:apply-templates />
</xsl:template>
<xsl:template match="a">
<xsl:apply-templates />
<xsl:text /> at \fI<xsl:value-of select="@href" />\fR<xsl:text />
</xsl:template>
<xsl:template match="code | em">
<xsl:text />\fI<xsl:call-template name="escape-backslash" />\fR<xsl:text />
</xsl:template>
<xsl:template match="br">
<xsl:text>
.br
</xsl:text>
</xsl:template>
<xsl:template match="strong">
<xsl:text />\fB<xsl:call-template name="escape-backslash" />\fR<xsl:text />
</xsl:template>
<!--
The following templates
a) normalize whitespace, primarily necessary for `description`
b) do so without stripping possible whitespace surrounding `code`
d) strip leading and trailing whitespace in 'description` and `code`
(courtesy of Ken Holman on the XSL-list):
-->
<xsl:template match="text()[preceding-sibling::node() and
following-sibling::node()]">
<xsl:variable name="ns" select="normalize-space(concat('x',.,'x'))"/>
<xsl:value-of select="substring( $ns, 2, string-length($ns) - 2 )" />
</xsl:template>
<xsl:template match="text()[preceding-sibling::node() and
not( following-sibling::node() )]">
<xsl:variable name="ns" select="normalize-space(concat('x',.))"/>
<xsl:value-of select="substring( $ns, 2, string-length($ns) - 1 )" />
</xsl:template>
<xsl:template match="text()[not( preceding-sibling::node() ) and
following-sibling::node()]">
<xsl:variable name="ns" select="normalize-space(concat(.,'x'))"/>
<xsl:value-of select="substring( $ns, 1, string-length($ns) - 1 )" />
</xsl:template>
<xsl:template match="text()[not( preceding-sibling::node() ) and
not( following-sibling::node() )]">
<xsl:value-of select="normalize-space(.)"/>
</xsl:template>
</xsl:stylesheet>

118
include/buffio.h Normal file
View file

@ -0,0 +1,118 @@
#ifndef __TIDY_BUFFIO_H__
#define __TIDY_BUFFIO_H__
/** @file buffio.h - Treat buffer as an I/O stream.
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/01/23 11:17:45 $
$Revision: 1.9 $
Requires buffer to automatically grow as bytes are added.
Must keep track of current read and write points.
*/
#include "platform.h"
#include "tidy.h"
#ifdef __cplusplus
extern "C" {
#endif
/** TidyBuffer - A chunk of memory */
TIDY_STRUCT
struct _TidyBuffer
{
TidyAllocator* allocator; /**< Memory allocator */
byte* bp; /**< Pointer to bytes */
uint size; /**< # bytes currently in use */
uint allocated; /**< # bytes allocated */
uint next; /**< Offset of current input position */
};
/** Initialize data structure using the default allocator */
TIDY_EXPORT void TIDY_CALL tidyBufInit( TidyBuffer* buf );
/** Initialize data structure using the given custom allocator */
TIDY_EXPORT void TIDY_CALL tidyBufInitWithAllocator( TidyBuffer* buf, TidyAllocator* allocator );
/** Free current buffer, allocate given amount, reset input pointer,
use the default allocator */
TIDY_EXPORT void TIDY_CALL tidyBufAlloc( TidyBuffer* buf, uint allocSize );
/** Free current buffer, allocate given amount, reset input pointer,
use the given custom allocator */
TIDY_EXPORT void TIDY_CALL tidyBufAllocWithAllocator( TidyBuffer* buf,
TidyAllocator* allocator,
uint allocSize );
/** Expand buffer to given size.
** Chunk size is minimum growth. Pass 0 for default of 256 bytes.
*/
TIDY_EXPORT void TIDY_CALL tidyBufCheckAlloc( TidyBuffer* buf,
uint allocSize, uint chunkSize );
/** Free current contents and zero out */
TIDY_EXPORT void TIDY_CALL tidyBufFree( TidyBuffer* buf );
/** Set buffer bytes to 0 */
TIDY_EXPORT void TIDY_CALL tidyBufClear( TidyBuffer* buf );
/** Attach to existing buffer */
TIDY_EXPORT void TIDY_CALL tidyBufAttach( TidyBuffer* buf, byte* bp, uint size );
/** Detach from buffer. Caller must free. */
TIDY_EXPORT void TIDY_CALL tidyBufDetach( TidyBuffer* buf );
/** Append bytes to buffer. Expand if necessary. */
TIDY_EXPORT void TIDY_CALL tidyBufAppend( TidyBuffer* buf, void* vp, uint size );
/** Append one byte to buffer. Expand if necessary. */
TIDY_EXPORT void TIDY_CALL tidyBufPutByte( TidyBuffer* buf, byte bv );
/** Get byte from end of buffer */
TIDY_EXPORT int TIDY_CALL tidyBufPopByte( TidyBuffer* buf );
/** Get byte from front of buffer. Increment input offset. */
TIDY_EXPORT int TIDY_CALL tidyBufGetByte( TidyBuffer* buf );
/** At end of buffer? */
TIDY_EXPORT Bool TIDY_CALL tidyBufEndOfInput( TidyBuffer* buf );
/** Put a byte back into the buffer. Decrement input offset. */
TIDY_EXPORT void TIDY_CALL tidyBufUngetByte( TidyBuffer* buf, byte bv );
/**************
TIDY
**************/
/* Forward declarations
*/
/** Initialize a buffer input source */
TIDY_EXPORT void TIDY_CALL tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf );
/** Initialize a buffer output sink */
TIDY_EXPORT void TIDY_CALL tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf );
#ifdef __cplusplus
}
#endif
#endif /* __TIDY_BUFFIO_H__ */
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

636
include/platform.h Normal file
View file

@ -0,0 +1,636 @@
#ifndef __TIDY_PLATFORM_H__
#define __TIDY_PLATFORM_H__
/* platform.h -- Platform specifics
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2008/03/17 12:57:01 $
$Revision: 1.66 $
*/
#ifdef __cplusplus
extern "C" {
#endif
/*
Uncomment and edit one of the following #defines if you
want to specify the config file at compile-time.
*/
/* #define TIDY_CONFIG_FILE "/etc/tidy_config.txt" */ /* original */
/* #define TIDY_CONFIG_FILE "/etc/tidyrc" */
/* #define TIDY_CONFIG_FILE "/etc/tidy.conf" */
/*
Uncomment the following #define if you are on a system
supporting the HOME environment variable.
It enables tidy to find config files named ~/.tidyrc if
the HTML_TIDY environment variable is not set.
*/
/* #define TIDY_USER_CONFIG_FILE "~/.tidyrc" */
/*
Uncomment the following #define if your
system supports the call getpwnam().
E.g. Unix and Linux.
It enables tidy to find files named
~your/foo for use in the HTML_TIDY environment
variable or CONFIG_FILE or USER_CONFIGFILE or
on the command line: -config ~joebob/tidy.cfg
Contributed by Todd Lewis.
*/
/* #define SUPPORT_GETPWNAM */
/* Enable/disable support for Big5 and Shift_JIS character encodings */
#ifndef SUPPORT_ASIAN_ENCODINGS
#define SUPPORT_ASIAN_ENCODINGS 1
#endif
/* Enable/disable support for UTF-16 character encodings */
#ifndef SUPPORT_UTF16_ENCODINGS
#define SUPPORT_UTF16_ENCODINGS 1
#endif
/* Enable/disable support for additional accessibility checks */
#ifndef SUPPORT_ACCESSIBILITY_CHECKS
#define SUPPORT_ACCESSIBILITY_CHECKS 1
#endif
/* Convenience defines for Mac platforms */
#if defined(macintosh)
/* Mac OS 6.x/7.x/8.x/9.x, with or without CarbonLib - MPW or Metrowerks 68K/PPC compilers */
#define MAC_OS_CLASSIC
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Mac OS"
#endif
/* needed for access() */
#if !defined(_POSIX) && !defined(NO_ACCESS_SUPPORT)
#define NO_ACCESS_SUPPORT
#endif
#ifdef SUPPORT_GETPWNAM
#undef SUPPORT_GETPWNAM
#endif
#elif defined(__APPLE__) && defined(__MACH__)
/* Mac OS X (client) 10.x (or server 1.x/10.x) - gcc or Metrowerks MachO compilers */
#define MAC_OS_X
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Mac OS X"
#endif
#endif
#if defined(MAC_OS_CLASSIC) || defined(MAC_OS_X)
/* Any OS on Mac platform */
#define MAC_OS
#define FILENAMES_CASE_SENSITIVE 0
#define strcasecmp strcmp
#ifndef DFLT_REPL_CHARENC
#define DFLT_REPL_CHARENC MACROMAN
#endif
#endif
/* Convenience defines for BSD like platforms */
#if defined(__FreeBSD__)
#define BSD_BASED_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "FreeBSD"
#endif
#elif defined(__NetBSD__)
#define BSD_BASED_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "NetBSD"
#endif
#elif defined(__OpenBSD__)
#define BSD_BASED_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "OpenBSD"
#endif
#elif defined(__DragonFly__)
#define BSD_BASED_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "DragonFly"
#endif
#elif defined(__MINT__)
#define BSD_BASED_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "FreeMiNT"
#endif
#elif defined(__bsdi__)
#define BSD_BASED_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "BSD/OS"
#endif
#endif
/* Convenience defines for Windows platforms */
#if defined(WINDOWS) || defined(_WIN32)
#define WINDOWS_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Windows"
#endif
#if defined(__MWERKS__) || defined(__MSL__)
/* not available with Metrowerks Standard Library */
#ifdef SUPPORT_GETPWNAM
#undef SUPPORT_GETPWNAM
#endif
/* needed for setmode() */
#if !defined(NO_SETMODE_SUPPORT)
#define NO_SETMODE_SUPPORT
#endif
#define strcasecmp _stricmp
#endif
#if defined(__BORLANDC__)
#define strcasecmp stricmp
#endif
#define FILENAMES_CASE_SENSITIVE 0
#define SUPPORT_POSIX_MAPPED_FILES 0
#endif
/* Convenience defines for Linux platforms */
#if defined(linux) && defined(__alpha__)
/* Linux on Alpha - gcc compiler */
#define LINUX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Linux/Alpha"
#endif
#elif defined(linux) && defined(__sparc__)
/* Linux on Sparc - gcc compiler */
#define LINUX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Linux/Sparc"
#endif
#elif defined(linux) && (defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__))
/* Linux on x86 - gcc compiler */
#define LINUX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Linux/x86"
#endif
#elif defined(linux) && defined(__powerpc__)
/* Linux on PPC - gcc compiler */
#define LINUX_OS
#if defined(__linux__) && defined(__powerpc__)
/* #if #system(linux) */
/* MkLinux on PPC - gcc (egcs) compiler */
/* #define MAC_OS_MKLINUX */
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "MkLinux"
#endif
#else
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Linux/PPC"
#endif
#endif
#elif defined(linux) || defined(__linux__)
/* generic Linux */
#define LINUX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Linux"
#endif
#endif
/* Convenience defines for Solaris platforms */
#if defined(sun)
#define SOLARIS_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Solaris"
#endif
#endif
/* Convenience defines for HPUX + gcc platforms */
#if defined(__hpux)
#define HPUX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "HPUX"
#endif
#endif
/* Convenience defines for RISCOS + gcc platforms */
#if defined(__riscos__)
#define RISC_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "RISC OS"
#endif
#endif
/* Convenience defines for OS/2 + icc/gcc platforms */
#if defined(__OS2__) || defined(__EMX__)
#define OS2_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "OS/2"
#endif
#define FILENAMES_CASE_SENSITIVE 0
#define strcasecmp stricmp
#endif
/* Convenience defines for IRIX */
#if defined(__sgi)
#define IRIX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "SGI IRIX"
#endif
#endif
/* Convenience defines for AIX */
#if defined(_AIX)
#define AIX_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "IBM AIX"
#endif
#endif
/* Convenience defines for BeOS platforms */
#if defined(__BEOS__)
#define BE_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "BeOS"
#endif
#endif
/* Convenience defines for Cygwin platforms */
#if defined(__CYGWIN__)
#define CYGWIN_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Cygwin"
#endif
#define FILENAMES_CASE_SENSITIVE 0
#endif
/* Convenience defines for OpenVMS */
#if defined(__VMS)
#define OPENVMS_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "OpenVMS"
#endif
#define FILENAMES_CASE_SENSITIVE 0
#endif
/* Convenience defines for DEC Alpha OSF + gcc platforms */
#if defined(__osf__)
#define OSF_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "DEC Alpha OSF"
#endif
#endif
/* Convenience defines for ARM platforms */
#if defined(__arm)
#define ARM_OS
#if defined(forARM) && defined(__NEWTON_H)
/* Using Newton C++ Tools ARMCpp compiler */
#define NEWTON_OS
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "Newton"
#endif
#else
#ifndef PLATFORM_NAME
#define PLATFORM_NAME "ARM"
#endif
#endif
#endif
#include <ctype.h>
#include <stdio.h>
#include <setjmp.h> /* for longjmp on error exit */
#include <stdlib.h>
#include <stdarg.h> /* may need <varargs.h> for Unix V */
#include <string.h>
#include <assert.h>
#ifdef NEEDS_MALLOC_H
#include <malloc.h>
#endif
#ifdef SUPPORT_GETPWNAM
#include <pwd.h>
#endif
#ifdef NEEDS_UNISTD_H
#include <unistd.h> /* needed for unlink on some Unix systems */
#endif
/* This can be set at compile time. Usually Windows,
** except for Macintosh builds.
*/
#ifndef DFLT_REPL_CHARENC
#define DFLT_REPL_CHARENC WIN1252
#endif
/* By default, use case-sensitive filename comparison.
*/
#ifndef FILENAMES_CASE_SENSITIVE
#define FILENAMES_CASE_SENSITIVE 1
#endif
/*
Tidy preserves the last modified time for the files it
cleans up.
*/
/*
If your platform doesn't support <utime.h> and the
utime() function, or <sys/futime> and the futime()
function then set PRESERVE_FILE_TIMES to 0.
If your platform doesn't support <sys/utime.h> and the
futime() function, then set HAS_FUTIME to 0.
If your platform supports <utime.h> and the
utime() function requires the file to be
closed first, then set UTIME_NEEDS_CLOSED_FILE to 1.
*/
/* Keep old PRESERVEFILETIMES define for compatibility */
#ifdef PRESERVEFILETIMES
#undef PRESERVE_FILE_TIMES
#define PRESERVE_FILE_TIMES PRESERVEFILETIMES
#endif
#ifndef PRESERVE_FILE_TIMES
#if defined(RISC_OS) || defined(OPENVMS_OS) || defined(OSF_OS)
#define PRESERVE_FILE_TIMES 0
#else
#define PRESERVE_FILE_TIMES 1
#endif
#endif
#if PRESERVE_FILE_TIMES
#ifndef HAS_FUTIME
#if defined(CYGWIN_OS) || defined(BE_OS) || defined(OS2_OS) || defined(HPUX_OS) || defined(SOLARIS_OS) || defined(LINUX_OS) || defined(BSD_BASED_OS) || defined(MAC_OS) || defined(__MSL__) || defined(IRIX_OS) || defined(AIX_OS) || defined(__BORLANDC__)
#define HAS_FUTIME 0
#else
#define HAS_FUTIME 1
#endif
#endif
#ifndef UTIME_NEEDS_CLOSED_FILE
#if defined(SOLARIS_OS) || defined(BSD_BASED_OS) || defined(MAC_OS) || defined(__MSL__) || defined(LINUX_OS)
#define UTIME_NEEDS_CLOSED_FILE 1
#else
#define UTIME_NEEDS_CLOSED_FILE 0
#endif
#endif
#if defined(MAC_OS_X) || (!defined(MAC_OS_CLASSIC) && !defined(__MSL__))
#include <sys/types.h>
#include <sys/stat.h>
#else
#include <stat.h>
#endif
#if HAS_FUTIME
#include <sys/utime.h>
#else
#include <utime.h>
#endif /* HASFUTIME */
/*
MS Windows needs _ prefix for Unix file functions.
Not required by Metrowerks Standard Library (MSL).
Tidy uses following for preserving the last modified time.
WINDOWS automatically set by Win16 compilers.
_WIN32 automatically set by Win32 compilers.
*/
#if defined(_WIN32) && !defined(__MSL__) && !defined(__BORLANDC__)
#define futime _futime
#define fstat _fstat
#define utimbuf _utimbuf /* Windows seems to want utimbuf */
#define stat _stat
#define utime _utime
#define vsnprintf _vsnprintf
#endif /* _WIN32 */
#endif /* PRESERVE_FILE_TIMES */
/*
MS Windows needs _ prefix for Unix file functions.
Not required by Metrowerks Standard Library (MSL).
WINDOWS automatically set by Win16 compilers.
_WIN32 automatically set by Win32 compilers.
*/
#if defined(_WIN32) && !defined(__MSL__) && !defined(__BORLANDC__)
#ifndef __WATCOMC__
#define fileno _fileno
#define setmode _setmode
#endif
#define access _access
#define strcasecmp _stricmp
#if _MSC_VER > 1000
#pragma warning( disable : 4189 ) /* local variable is initialized but not referenced */
#pragma warning( disable : 4100 ) /* unreferenced formal parameter */
#pragma warning( disable : 4706 ) /* assignment within conditional expression */
#endif
#if _MSC_VER > 1300
#pragma warning( disable : 4996 ) /* disable depreciation warning */
#endif
#endif /* _WIN32 */
#if defined(_WIN32)
#if (defined(_USRDLL) || defined(_WINDLL)) && !defined(TIDY_EXPORT)
#define TIDY_EXPORT __declspec( dllexport )
#endif
#ifndef TIDY_CALL
#ifdef _WIN64
# define TIDY_CALL __fastcall
#else
# define TIDY_CALL __stdcall
#endif
#endif
#endif /* _WIN32 */
/* hack for gnu sys/types.h file which defines uint and ulong */
#if defined(BE_OS) || defined(SOLARIS_OS) || defined(BSD_BASED_OS) || defined(OSF_OS) || defined(IRIX_OS) || defined(AIX_OS)
#include <sys/types.h>
#endif
#if !defined(HPUX_OS) && !defined(CYGWIN_OS) && !defined(MAC_OS_X) && !defined(BE_OS) && !defined(SOLARIS_OS) && !defined(BSD_BASED_OS) && !defined(OSF_OS) && !defined(IRIX_OS) && !defined(AIX_OS) && !defined(LINUX_OS)
# undef uint
typedef unsigned int uint;
#endif
#if defined(HPUX_OS) || defined(CYGWIN_OS) || defined(MAC_OS) || defined(BSD_BASED_OS) || defined(_WIN32)
# undef ulong
typedef unsigned long ulong;
#endif
/*
With GCC 4, __attribute__ ((visibility("default"))) can be used along compiling with tidylib
with "-fvisibility=hidden". See http://gcc.gnu.org/wiki/Visibility and build/gmake/Makefile.
*/
/*
#if defined(__GNUC__) && __GNUC__ >= 4
#define TIDY_EXPORT __attribute__ ((visibility("default")))
#endif
*/
#ifndef TIDY_EXPORT /* Define it away for most builds */
#define TIDY_EXPORT
#endif
#ifndef TIDY_STRUCT
#define TIDY_STRUCT
#endif
typedef unsigned char byte;
typedef uint tchar; /* single, full character */
typedef char tmbchar; /* single, possibly partial character */
#ifndef TMBSTR_DEFINED
typedef tmbchar* tmbstr; /* pointer to buffer of possibly partial chars */
typedef const tmbchar* ctmbstr; /* Ditto, but const */
#define NULLSTR (tmbstr)""
#define TMBSTR_DEFINED
#endif
#ifndef TIDY_CALL
#define TIDY_CALL
#endif
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
# define ARG_UNUSED(x) x __attribute__((unused))
#else
# define ARG_UNUSED(x) x
#endif
/* HAS_VSNPRINTF triggers the use of "vsnprintf", which is safe related to
buffer overflow. Therefore, we make it the default unless HAS_VSNPRINTF
has been defined. */
#ifndef HAS_VSNPRINTF
# define HAS_VSNPRINTF 1
#endif
#ifndef SUPPORT_POSIX_MAPPED_FILES
# define SUPPORT_POSIX_MAPPED_FILES 1
#endif
/*
bool is a reserved word in some but
not all C++ compilers depending on age
work around is to avoid bool altogether
by introducing a new enum called Bool
*/
/* We could use the C99 definition where supported
typedef _Bool Bool;
#define no (_Bool)0
#define yes (_Bool)1
*/
typedef enum
{
no,
yes
} Bool;
/* for NULL pointers
#define null ((const void*)0)
extern void* null;
*/
#if defined(DMALLOC)
#include "dmalloc.h"
#endif
/* Opaque data structure.
* Cast to implementation type struct within lib.
* This will reduce inter-dependencies/conflicts w/ application code.
*/
#if 1
#define opaque_type( typenam )\
struct _##typenam { int _opaque; };\
typedef struct _##typenam const * typenam
#else
#define opaque_type(typenam) typedef const void* typenam
#endif
/* Opaque data structure used to pass back
** and forth to keep current position in a
** list or other collection.
*/
opaque_type( TidyIterator );
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __TIDY_PLATFORM_H__ */
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

1097
include/tidy.h Normal file

File diff suppressed because it is too large Load diff

747
include/tidyenum.h Normal file
View file

@ -0,0 +1,747 @@
#ifndef __TIDYENUM_H__
#define __TIDYENUM_H__
/* @file tidyenum.h -- Split public enums into separate header
Simplifies enum re-use in various wrappers. e.g. SWIG
generated wrappers and COM IDL files.
Copyright (c) 1998-2008 World Wide Web Consortium
(Massachusetts Institute of Technology, European Research
Consortium for Informatics and Mathematics, Keio University).
All Rights Reserved.
CVS Info :
$Author: arnaud02 $
$Date: 2008/06/18 20:18:54 $
$Revision: 1.18 $
Contributing Author(s):
Dave Raggett <dsr@w3.org>
The contributing author(s) would like to thank all those who
helped with testing, bug fixes and suggestions for improvements.
This wouldn't have been possible without your help.
COPYRIGHT NOTICE:
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be held
liable for any direct, indirect, special or consequential damages
arising out of any use of the software or documentation, even if
advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute
this source code, or portions hereof, documentation and executables,
for any purpose, without fee, subject to the following restrictions:
1. The origin of this source code must not be misrepresented.
2. Altered versions must be plainly marked as such and must
not be misrepresented as being the original source.
3. This Copyright notice may not be removed or altered from any
source or altered source distribution.
The copyright holders and contributing author(s) specifically
permit, without fee, and encourage the use of this source code
as a component for supporting the Hypertext Markup Language in
commercial products. If you use this source code in a product,
acknowledgment is not required but would be appreciated.
Created 2001-05-20 by Charles Reitzel
Updated 2002-07-01 by Charles Reitzel - 1st Implementation
*/
#ifdef __cplusplus
extern "C" {
#endif
/* Enumerate configuration options
*/
/** Categories of Tidy configuration options
*/
typedef enum
{
TidyMarkup, /**< Markup options: (X)HTML version, etc */
TidyDiagnostics, /**< Diagnostics */
TidyPrettyPrint, /**< Output layout */
TidyEncoding, /**< Character encodings */
TidyMiscellaneous /**< File handling, message format, etc. */
} TidyConfigCategory;
/** Option IDs Used to get/set option values.
*/
typedef enum
{
TidyUnknownOption, /**< Unknown option! */
TidyIndentSpaces, /**< Indentation n spaces */
TidyWrapLen, /**< Wrap margin */
TidyTabSize, /**< Expand tabs to n spaces */
TidyCharEncoding, /**< In/out character encoding */
TidyInCharEncoding, /**< Input character encoding (if different) */
TidyOutCharEncoding, /**< Output character encoding (if different) */
TidyNewline, /**< Output line ending (default to platform) */
TidyDoctypeMode, /**< See doctype property */
TidyDoctype, /**< User specified doctype */
TidyDuplicateAttrs, /**< Keep first or last duplicate attribute */
TidyAltText, /**< Default text for alt attribute */
/* obsolete */
TidySlideStyle, /**< Style sheet for slides: not used for anything yet */
TidyErrFile, /**< File name to write errors to */
TidyOutFile, /**< File name to write markup to */
TidyWriteBack, /**< If true then output tidied markup */
TidyShowMarkup, /**< If false, normal output is suppressed */
TidyShowWarnings, /**< However errors are always shown */
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
TidyIndentContent, /**< Indent content of appropriate tags */
/**< "auto" does text/block level content indentation */
TidyHideEndTags, /**< Suppress optional end tags */
TidyXmlTags, /**< Treat input as XML */
TidyXmlOut, /**< Create output as XML */
TidyXhtmlOut, /**< Output extensible HTML */
TidyHtmlOut, /**< Output plain HTML, even for XHTML input.
Yes means set explicitly. */
TidyXmlDecl, /**< Add <?xml?> for XML docs */
TidyUpperCaseTags, /**< Output tags in upper not lower case */
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
TidyMakeClean, /**< Replace presentational clutter by style rules */
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
TidyDropPropAttrs, /**< Discard proprietary attributes */
TidyDropFontTags, /**< Discard presentation tags */
TidyDropEmptyParas, /**< Discard empty p elements */
TidyFixComments, /**< Fix comments with adjacent hyphens */
TidyBreakBeforeBR, /**< Output newline before <br> or not? */
/* obsolete */
TidyBurstSlides, /**< Create slides on each h2 element */
TidyNumEntities, /**< Use numeric entities */
TidyQuoteMarks, /**< Output " marks as &quot; */
TidyQuoteNbsp, /**< Output non-breaking space as entity */
TidyQuoteAmpersand, /**< Output naked ampersand as &amp; */
TidyWrapAttVals, /**< Wrap within attribute values */
TidyWrapScriptlets, /**< Wrap within JavaScript string literals */
TidyWrapSection, /**< Wrap within <![ ... ]> section tags */
TidyWrapAsp, /**< Wrap within ASP pseudo elements */
TidyWrapJste, /**< Wrap within JSTE pseudo elements */
TidyWrapPhp, /**< Wrap within PHP pseudo elements */
TidyFixBackslash, /**< Fix URLs by replacing \ with / */
TidyIndentAttributes,/**< Newline+indent before each attribute */
TidyXmlPIs, /**< If set to yes PIs must end with ?> */
TidyXmlSpace, /**< If set to yes adds xml:space attr as needed */
TidyEncloseBodyText, /**< If yes text at body is wrapped in P's */
TidyEncloseBlockText,/**< If yes text in blocks is wrapped in P's */
TidyKeepFileTimes, /**< If yes last modied time is preserved */
TidyWord2000, /**< Draconian cleaning for Word2000 */
TidyMark, /**< Add meta element indicating tidied doc */
TidyEmacs, /**< If true format error output for GNU Emacs */
TidyEmacsFile, /**< Name of current Emacs file */
TidyLiteralAttribs, /**< If true attributes may use newlines */
TidyBodyOnly, /**< Output BODY content only */
TidyFixUri, /**< Applies URI encoding if necessary */
TidyLowerLiterals, /**< Folds known attribute values to lower case */
TidyHideComments, /**< Hides all (real) comments in output */
TidyIndentCdata, /**< Indent <!CDATA[ ... ]]> section */
TidyForceOutput, /**< Output document even if errors were found */
TidyShowErrors, /**< Number of errors to put out */
TidyAsciiChars, /**< Convert quotes and dashes to nearest ASCII char */
TidyJoinClasses, /**< Join multiple class attributes */
TidyJoinStyles, /**< Join multiple style attributes */
TidyEscapeCdata, /**< Replace <![CDATA[]]> sections with escaped text */
#if SUPPORT_ASIAN_ENCODINGS
TidyLanguage, /**< Language property: not used for anything yet */
TidyNCR, /**< Allow numeric character references */
#else
TidyLanguageNotUsed,
TidyNCRNotUsed,
#endif
#if SUPPORT_UTF16_ENCODINGS
TidyOutputBOM, /**< Output a Byte Order Mark (BOM) for UTF-16 encodings */
/**< auto: if input stream has BOM, we output a BOM */
#else
TidyOutputBOMNotUsed,
#endif
TidyReplaceColor, /**< Replace hex color attribute values with names */
TidyCSSPrefix, /**< CSS class naming for -clean option */
TidyInlineTags, /**< Declared inline tags */
TidyBlockTags, /**< Declared block tags */
TidyEmptyTags, /**< Declared empty tags */
TidyPreTags, /**< Declared pre tags */
TidyAccessibilityCheckLevel, /**< Accessibility check level
0 (old style), or 1, 2, 3 */
TidyVertSpace, /**< degree to which markup is spread out vertically */
#if SUPPORT_ASIAN_ENCODINGS
TidyPunctWrap, /**< consider punctuation and breaking spaces for wrapping */
#else
TidyPunctWrapNotUsed,
#endif
TidyMergeDivs, /**< Merge multiple DIVs */
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
TidyPreserveEntities, /**< Preserve entities */
TidySortAttributes, /**< Sort attributes */
TidyMergeSpans, /**< Merge multiple SPANs */
TidyAnchorAsName, /**< Define anchors as name attributes */
N_TIDY_OPTIONS /**< Must be last */
} TidyOptionId;
/** Option data types
*/
typedef enum
{
TidyString, /**< String */
TidyInteger, /**< Integer or enumeration */
TidyBoolean /**< Boolean flag */
} TidyOptionType;
/** AutoBool values used by ParseBool, ParseTriState, ParseIndent, ParseBOM
*/
typedef enum
{
TidyNoState, /**< maps to 'no' */
TidyYesState, /**< maps to 'yes' */
TidyAutoState /**< Automatic */
} TidyTriState;
/** TidyNewline option values to control output line endings.
*/
typedef enum
{
TidyLF, /**< Use Unix style: LF */
TidyCRLF, /**< Use DOS/Windows style: CR+LF */
TidyCR /**< Use Macintosh style: CR */
} TidyLineEnding;
/** Mode controlling treatment of doctype
*/
typedef enum
{
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */
TidyDoctypeLoose, /**< Convert document to HTML 4 transitional
content model */
TidyDoctypeUser /**< Set DOCTYPE FPI explicitly */
} TidyDoctypeModes;
/** Mode controlling treatment of duplicate Attributes
*/
typedef enum
{
TidyKeepFirst,
TidyKeepLast
} TidyDupAttrModes;
/** Mode controlling treatment of sorting attributes
*/
typedef enum
{
TidySortAttrNone,
TidySortAttrAlpha
} TidyAttrSortStrategy;
/* I/O and Message handling interface
**
** By default, Tidy will define, create and use
** instances of input and output handlers for
** standard C buffered I/O (i.e. FILE* stdin,
** FILE* stdout and FILE* stderr for content
** input, content output and diagnostic output,
** respectively. A FILE* cfgFile input handler
** will be used for config files. Command line
** options will just be set directly.
*/
/** Message severity level
*/
typedef enum
{
TidyInfo, /**< Information about markup usage */
TidyWarning, /**< Warning message */
TidyConfig, /**< Configuration error */
TidyAccess, /**< Accessibility message */
TidyError, /**< Error message - output suppressed */
TidyBadDocument, /**< I/O or file system error */
TidyFatal /**< Crash! */
} TidyReportLevel;
/* Document tree traversal functions
*/
/** Node types
*/
typedef enum
{
TidyNode_Root, /**< Root */
TidyNode_DocType, /**< DOCTYPE */
TidyNode_Comment, /**< Comment */
TidyNode_ProcIns, /**< Processing Instruction */
TidyNode_Text, /**< Text */
TidyNode_Start, /**< Start Tag */
TidyNode_End, /**< End Tag */
TidyNode_StartEnd, /**< Start/End (empty) Tag */
TidyNode_CDATA, /**< Unparsed Text */
TidyNode_Section, /**< XML Section */
TidyNode_Asp, /**< ASP Source */
TidyNode_Jste, /**< JSTE Source */
TidyNode_Php, /**< PHP Source */
TidyNode_XmlDecl /**< XML Declaration */
} TidyNodeType;
/** Known HTML element types
*/
typedef enum
{
TidyTag_UNKNOWN, /**< Unknown tag! */
TidyTag_A, /**< A */
TidyTag_ABBR, /**< ABBR */
TidyTag_ACRONYM, /**< ACRONYM */
TidyTag_ADDRESS, /**< ADDRESS */
TidyTag_ALIGN, /**< ALIGN */
TidyTag_APPLET, /**< APPLET */
TidyTag_AREA, /**< AREA */
TidyTag_B, /**< B */
TidyTag_BASE, /**< BASE */
TidyTag_BASEFONT, /**< BASEFONT */
TidyTag_BDO, /**< BDO */
TidyTag_BGSOUND, /**< BGSOUND */
TidyTag_BIG, /**< BIG */
TidyTag_BLINK, /**< BLINK */
TidyTag_BLOCKQUOTE, /**< BLOCKQUOTE */
TidyTag_BODY, /**< BODY */
TidyTag_BR, /**< BR */
TidyTag_BUTTON, /**< BUTTON */
TidyTag_CAPTION, /**< CAPTION */
TidyTag_CENTER, /**< CENTER */
TidyTag_CITE, /**< CITE */
TidyTag_CODE, /**< CODE */
TidyTag_COL, /**< COL */
TidyTag_COLGROUP, /**< COLGROUP */
TidyTag_COMMENT, /**< COMMENT */
TidyTag_DD, /**< DD */
TidyTag_DEL, /**< DEL */
TidyTag_DFN, /**< DFN */
TidyTag_DIR, /**< DIR */
TidyTag_DIV, /**< DIF */
TidyTag_DL, /**< DL */
TidyTag_DT, /**< DT */
TidyTag_EM, /**< EM */
TidyTag_EMBED, /**< EMBED */
TidyTag_FIELDSET, /**< FIELDSET */
TidyTag_FONT, /**< FONT */
TidyTag_FORM, /**< FORM */
TidyTag_FRAME, /**< FRAME */
TidyTag_FRAMESET, /**< FRAMESET */
TidyTag_H1, /**< H1 */
TidyTag_H2, /**< H2 */
TidyTag_H3, /**< H3 */
TidyTag_H4, /**< H4 */
TidyTag_H5, /**< H5 */
TidyTag_H6, /**< H6 */
TidyTag_HEAD, /**< HEAD */
TidyTag_HR, /**< HR */
TidyTag_HTML, /**< HTML */
TidyTag_I, /**< I */
TidyTag_IFRAME, /**< IFRAME */
TidyTag_ILAYER, /**< ILAYER */
TidyTag_IMG, /**< IMG */
TidyTag_INPUT, /**< INPUT */
TidyTag_INS, /**< INS */
TidyTag_ISINDEX, /**< ISINDEX */
TidyTag_KBD, /**< KBD */
TidyTag_KEYGEN, /**< KEYGEN */
TidyTag_LABEL, /**< LABEL */
TidyTag_LAYER, /**< LAYER */
TidyTag_LEGEND, /**< LEGEND */
TidyTag_LI, /**< LI */
TidyTag_LINK, /**< LINK */
TidyTag_LISTING, /**< LISTING */
TidyTag_MAP, /**< MAP */
TidyTag_MARQUEE, /**< MARQUEE */
TidyTag_MENU, /**< MENU */
TidyTag_META, /**< META */
TidyTag_MULTICOL, /**< MULTICOL */
TidyTag_NOBR, /**< NOBR */
TidyTag_NOEMBED, /**< NOEMBED */
TidyTag_NOFRAMES, /**< NOFRAMES */
TidyTag_NOLAYER, /**< NOLAYER */
TidyTag_NOSAVE, /**< NOSAVE */
TidyTag_NOSCRIPT, /**< NOSCRIPT */
TidyTag_OBJECT, /**< OBJECT */
TidyTag_OL, /**< OL */
TidyTag_OPTGROUP, /**< OPTGROUP */
TidyTag_OPTION, /**< OPTION */
TidyTag_P, /**< P */
TidyTag_PARAM, /**< PARAM */
TidyTag_PLAINTEXT,/**< PLAINTEXT */
TidyTag_PRE, /**< PRE */
TidyTag_Q, /**< Q */
TidyTag_RB, /**< RB */
TidyTag_RBC, /**< RBC */
TidyTag_RP, /**< RP */
TidyTag_RT, /**< RT */
TidyTag_RTC, /**< RTC */
TidyTag_RUBY, /**< RUBY */
TidyTag_S, /**< S */
TidyTag_SAMP, /**< SAMP */
TidyTag_SCRIPT, /**< SCRIPT */
TidyTag_SELECT, /**< SELECT */
TidyTag_SERVER, /**< SERVER */
TidyTag_SERVLET, /**< SERVLET */
TidyTag_SMALL, /**< SMALL */
TidyTag_SPACER, /**< SPACER */
TidyTag_SPAN, /**< SPAN */
TidyTag_STRIKE, /**< STRIKE */
TidyTag_STRONG, /**< STRONG */
TidyTag_STYLE, /**< STYLE */
TidyTag_SUB, /**< SUB */
TidyTag_SUP, /**< SUP */
TidyTag_TABLE, /**< TABLE */
TidyTag_TBODY, /**< TBODY */
TidyTag_TD, /**< TD */
TidyTag_TEXTAREA, /**< TEXTAREA */
TidyTag_TFOOT, /**< TFOOT */
TidyTag_TH, /**< TH */
TidyTag_THEAD, /**< THEAD */
TidyTag_TITLE, /**< TITLE */
TidyTag_TR, /**< TR */
TidyTag_TT, /**< TT */
TidyTag_U, /**< U */
TidyTag_UL, /**< UL */
TidyTag_VAR, /**< VAR */
TidyTag_WBR, /**< WBR */
TidyTag_XMP, /**< XMP */
TidyTag_NEXTID, /**< NEXTID */
TidyTag_ARTICLE,
TidyTag_ASIDE,
TidyTag_AUDIO,
TidyTag_CANVAS,
TidyTag_COMMAND,
TidyTag_DATALIST,
TidyTag_DETAILS,
TidyTag_FIGCAPTION,
TidyTag_FIGURE,
TidyTag_FOOTER,
TidyTag_HEADER,
TidyTag_HGROUP,
TidyTag_MARK,
TidyTag_METER,
TidyTag_NAV,
TidyTag_OUTPUT,
TidyTag_PROGRESS,
TidyTag_SECTION,
TidyTag_SOURCE,
TidyTag_SUMMARY,
TidyTag_TIME,
TidyTag_TRACK,
TidyTag_VIDEO,
N_TIDY_TAGS /**< Must be last */
} TidyTagId;
/* Attribute interrogation
*/
/** Known HTML attributes
*/
typedef enum
{
TidyAttr_UNKNOWN, /**< UNKNOWN= */
TidyAttr_ABBR, /**< ABBR= */
TidyAttr_ACCEPT, /**< ACCEPT= */
TidyAttr_ACCEPT_CHARSET, /**< ACCEPT_CHARSET= */
TidyAttr_ACCESSKEY, /**< ACCESSKEY= */
TidyAttr_ACTION, /**< ACTION= */
TidyAttr_ADD_DATE, /**< ADD_DATE= */
TidyAttr_ALIGN, /**< ALIGN= */
TidyAttr_ALINK, /**< ALINK= */
TidyAttr_ALT, /**< ALT= */
TidyAttr_ARCHIVE, /**< ARCHIVE= */
TidyAttr_AXIS, /**< AXIS= */
TidyAttr_BACKGROUND, /**< BACKGROUND= */
TidyAttr_BGCOLOR, /**< BGCOLOR= */
TidyAttr_BGPROPERTIES, /**< BGPROPERTIES= */
TidyAttr_BORDER, /**< BORDER= */
TidyAttr_BORDERCOLOR, /**< BORDERCOLOR= */
TidyAttr_BOTTOMMARGIN, /**< BOTTOMMARGIN= */
TidyAttr_CELLPADDING, /**< CELLPADDING= */
TidyAttr_CELLSPACING, /**< CELLSPACING= */
TidyAttr_CHAR, /**< CHAR= */
TidyAttr_CHAROFF, /**< CHAROFF= */
TidyAttr_CHARSET, /**< CHARSET= */
TidyAttr_CHECKED, /**< CHECKED= */
TidyAttr_CITE, /**< CITE= */
TidyAttr_CLASS, /**< CLASS= */
TidyAttr_CLASSID, /**< CLASSID= */
TidyAttr_CLEAR, /**< CLEAR= */
TidyAttr_CODE, /**< CODE= */
TidyAttr_CODEBASE, /**< CODEBASE= */
TidyAttr_CODETYPE, /**< CODETYPE= */
TidyAttr_COLOR, /**< COLOR= */
TidyAttr_COLS, /**< COLS= */
TidyAttr_COLSPAN, /**< COLSPAN= */
TidyAttr_COMPACT, /**< COMPACT= */
TidyAttr_CONTENT, /**< CONTENT= */
TidyAttr_COORDS, /**< COORDS= */
TidyAttr_DATA, /**< DATA= */
TidyAttr_DATAFLD, /**< DATAFLD= */
TidyAttr_DATAFORMATAS, /**< DATAFORMATAS= */
TidyAttr_DATAPAGESIZE, /**< DATAPAGESIZE= */
TidyAttr_DATASRC, /**< DATASRC= */
TidyAttr_DATETIME, /**< DATETIME= */
TidyAttr_DECLARE, /**< DECLARE= */
TidyAttr_DEFER, /**< DEFER= */
TidyAttr_DIR, /**< DIR= */
TidyAttr_DISABLED, /**< DISABLED= */
TidyAttr_ENCODING, /**< ENCODING= */
TidyAttr_ENCTYPE, /**< ENCTYPE= */
TidyAttr_FACE, /**< FACE= */
TidyAttr_FOR, /**< FOR= */
TidyAttr_FRAME, /**< FRAME= */
TidyAttr_FRAMEBORDER, /**< FRAMEBORDER= */
TidyAttr_FRAMESPACING, /**< FRAMESPACING= */
TidyAttr_GRIDX, /**< GRIDX= */
TidyAttr_GRIDY, /**< GRIDY= */
TidyAttr_HEADERS, /**< HEADERS= */
TidyAttr_HEIGHT, /**< HEIGHT= */
TidyAttr_HREF, /**< HREF= */
TidyAttr_HREFLANG, /**< HREFLANG= */
TidyAttr_HSPACE, /**< HSPACE= */
TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */
TidyAttr_ID, /**< ID= */
TidyAttr_ISMAP, /**< ISMAP= */
TidyAttr_LABEL, /**< LABEL= */
TidyAttr_LANG, /**< LANG= */
TidyAttr_LANGUAGE, /**< LANGUAGE= */
TidyAttr_LAST_MODIFIED, /**< LAST_MODIFIED= */
TidyAttr_LAST_VISIT, /**< LAST_VISIT= */
TidyAttr_LEFTMARGIN, /**< LEFTMARGIN= */
TidyAttr_LINK, /**< LINK= */
TidyAttr_LONGDESC, /**< LONGDESC= */
TidyAttr_LOWSRC, /**< LOWSRC= */
TidyAttr_MARGINHEIGHT, /**< MARGINHEIGHT= */
TidyAttr_MARGINWIDTH, /**< MARGINWIDTH= */
TidyAttr_MAXLENGTH, /**< MAXLENGTH= */
TidyAttr_MEDIA, /**< MEDIA= */
TidyAttr_METHOD, /**< METHOD= */
TidyAttr_MULTIPLE, /**< MULTIPLE= */
TidyAttr_NAME, /**< NAME= */
TidyAttr_NOHREF, /**< NOHREF= */
TidyAttr_NORESIZE, /**< NORESIZE= */
TidyAttr_NOSHADE, /**< NOSHADE= */
TidyAttr_NOWRAP, /**< NOWRAP= */
TidyAttr_OBJECT, /**< OBJECT= */
TidyAttr_OnAFTERUPDATE, /**< OnAFTERUPDATE= */
TidyAttr_OnBEFOREUNLOAD, /**< OnBEFOREUNLOAD= */
TidyAttr_OnBEFOREUPDATE, /**< OnBEFOREUPDATE= */
TidyAttr_OnBLUR, /**< OnBLUR= */
TidyAttr_OnCHANGE, /**< OnCHANGE= */
TidyAttr_OnCLICK, /**< OnCLICK= */
TidyAttr_OnDATAAVAILABLE, /**< OnDATAAVAILABLE= */
TidyAttr_OnDATASETCHANGED, /**< OnDATASETCHANGED= */
TidyAttr_OnDATASETCOMPLETE, /**< OnDATASETCOMPLETE= */
TidyAttr_OnDBLCLICK, /**< OnDBLCLICK= */
TidyAttr_OnERRORUPDATE, /**< OnERRORUPDATE= */
TidyAttr_OnFOCUS, /**< OnFOCUS= */
TidyAttr_OnKEYDOWN, /**< OnKEYDOWN= */
TidyAttr_OnKEYPRESS, /**< OnKEYPRESS= */
TidyAttr_OnKEYUP, /**< OnKEYUP= */
TidyAttr_OnLOAD, /**< OnLOAD= */
TidyAttr_OnMOUSEDOWN, /**< OnMOUSEDOWN= */
TidyAttr_OnMOUSEMOVE, /**< OnMOUSEMOVE= */
TidyAttr_OnMOUSEOUT, /**< OnMOUSEOUT= */
TidyAttr_OnMOUSEOVER, /**< OnMOUSEOVER= */
TidyAttr_OnMOUSEUP, /**< OnMOUSEUP= */
TidyAttr_OnRESET, /**< OnRESET= */
TidyAttr_OnROWENTER, /**< OnROWENTER= */
TidyAttr_OnROWEXIT, /**< OnROWEXIT= */
TidyAttr_OnSELECT, /**< OnSELECT= */
TidyAttr_OnSUBMIT, /**< OnSUBMIT= */
TidyAttr_OnUNLOAD, /**< OnUNLOAD= */
TidyAttr_PROFILE, /**< PROFILE= */
TidyAttr_PROMPT, /**< PROMPT= */
TidyAttr_RBSPAN, /**< RBSPAN= */
TidyAttr_READONLY, /**< READONLY= */
TidyAttr_REL, /**< REL= */
TidyAttr_REV, /**< REV= */
TidyAttr_RIGHTMARGIN, /**< RIGHTMARGIN= */
TidyAttr_ROWS, /**< ROWS= */
TidyAttr_ROWSPAN, /**< ROWSPAN= */
TidyAttr_RULES, /**< RULES= */
TidyAttr_SCHEME, /**< SCHEME= */
TidyAttr_SCOPE, /**< SCOPE= */
TidyAttr_SCROLLING, /**< SCROLLING= */
TidyAttr_SELECTED, /**< SELECTED= */
TidyAttr_SHAPE, /**< SHAPE= */
TidyAttr_SHOWGRID, /**< SHOWGRID= */
TidyAttr_SHOWGRIDX, /**< SHOWGRIDX= */
TidyAttr_SHOWGRIDY, /**< SHOWGRIDY= */
TidyAttr_SIZE, /**< SIZE= */
TidyAttr_SPAN, /**< SPAN= */
TidyAttr_SRC, /**< SRC= */
TidyAttr_STANDBY, /**< STANDBY= */
TidyAttr_START, /**< START= */
TidyAttr_STYLE, /**< STYLE= */
TidyAttr_SUMMARY, /**< SUMMARY= */
TidyAttr_TABINDEX, /**< TABINDEX= */
TidyAttr_TARGET, /**< TARGET= */
TidyAttr_TEXT, /**< TEXT= */
TidyAttr_TITLE, /**< TITLE= */
TidyAttr_TOPMARGIN, /**< TOPMARGIN= */
TidyAttr_TYPE, /**< TYPE= */
TidyAttr_USEMAP, /**< USEMAP= */
TidyAttr_VALIGN, /**< VALIGN= */
TidyAttr_VALUE, /**< VALUE= */
TidyAttr_VALUETYPE, /**< VALUETYPE= */
TidyAttr_VERSION, /**< VERSION= */
TidyAttr_VLINK, /**< VLINK= */
TidyAttr_VSPACE, /**< VSPACE= */
TidyAttr_WIDTH, /**< WIDTH= */
TidyAttr_WRAP, /**< WRAP= */
TidyAttr_XML_LANG, /**< XML_LANG= */
TidyAttr_XML_SPACE, /**< XML_SPACE= */
TidyAttr_XMLNS, /**< XMLNS= */
TidyAttr_EVENT, /**< EVENT= */
TidyAttr_METHODS, /**< METHODS= */
TidyAttr_N, /**< N= */
TidyAttr_SDAFORM, /**< SDAFORM= */
TidyAttr_SDAPREF, /**< SDAPREF= */
TidyAttr_SDASUFF, /**< SDASUFF= */
TidyAttr_URN, /**< URN= */
TidyAttr_ASYNC,
TidyAttr_AUTOCOMPLETE,
TidyAttr_AUTOFOCUS,
TidyAttr_AUTOPLAY,
TidyAttr_CHALLENGE,
TidyAttr_CONTENTEDITABLE,
TidyAttr_CONTEXTMENU,
TidyAttr_CONTROLS,
TidyAttr_DEFAULT,
TidyAttr_DIRNAME,
TidyAttr_DRAGGABLE,
TidyAttr_DROPZONE,
TidyAttr_FORM,
TidyAttr_FORMACTION,
TidyAttr_FORMENCTYPE,
TidyAttr_FORMMETHOD,
TidyAttr_FORMNOVALIDATE,
TidyAttr_FORMTARGET,
TidyAttr_HIDDEN,
TidyAttr_HIGH,
TidyAttr_ICON,
TidyAttr_KEYTYPE,
TidyAttr_KIND,
TidyAttr_LIST,
TidyAttr_LOOP,
TidyAttr_LOW,
TidyAttr_MANIFEST,
TidyAttr_MAX,
TidyAttr_MEDIAGROUP,
TidyAttr_MIN,
TidyAttr_NOVALIDATE,
TidyAttr_OPEN,
TidyAttr_OPTIMUM,
TidyAttr_OnABORT,
TidyAttr_OnAFTERPRINT,
TidyAttr_OnBEFOREPRINT,
TidyAttr_OnCANPLAY,
TidyAttr_OnCANPLAYTHROUGH,
TidyAttr_OnCONTEXTMENU,
TidyAttr_OnCUECHANGE,
TidyAttr_OnDRAG,
TidyAttr_OnDRAGEND,
TidyAttr_OnDRAGENTER,
TidyAttr_OnDRAGLEAVE,
TidyAttr_OnDRAGOVER,
TidyAttr_OnDRAGSTART,
TidyAttr_OnDROP,
TidyAttr_OnDURATIONCHANGE,
TidyAttr_OnEMPTIED,
TidyAttr_OnENDED,
TidyAttr_OnERROR,
TidyAttr_OnHASHCHANGE,
TidyAttr_OnINPUT,
TidyAttr_OnINVALID,
TidyAttr_OnLOADEDDATA,
TidyAttr_OnLOADEDMETADATA,
TidyAttr_OnLOADSTART,
TidyAttr_OnMESSAGE,
TidyAttr_OnMOUSEWHEEL,
TidyAttr_OnOFFLINE,
TidyAttr_OnONLINE,
TidyAttr_OnPAGEHIDE,
TidyAttr_OnPAGESHOW,
TidyAttr_OnPAUSE,
TidyAttr_OnPLAY,
TidyAttr_OnPLAYING,
TidyAttr_OnPOPSTATE,
TidyAttr_OnPROGRESS,
TidyAttr_OnRATECHANGE,
TidyAttr_OnREADYSTATECHANGE,
TidyAttr_OnREDO,
TidyAttr_OnRESIZE,
TidyAttr_OnSCROLL,
TidyAttr_OnSEEKED,
TidyAttr_OnSEEKING,
TidyAttr_OnSHOW,
TidyAttr_OnSTALLED,
TidyAttr_OnSTORAGE,
TidyAttr_OnSUSPEND,
TidyAttr_OnTIMEUPDATE,
TidyAttr_OnUNDO,
TidyAttr_OnVOLUMECHANGE,
TidyAttr_OnWAITING,
TidyAttr_PATTERN,
TidyAttr_PLACEHOLDER,
TidyAttr_POSTER,
TidyAttr_PRELOAD,
TidyAttr_PUBDATE,
TidyAttr_RADIOGROUP,
TidyAttr_REQUIRED,
TidyAttr_REVERSED,
TidyAttr_SANDBOX,
TidyAttr_SCOPED,
TidyAttr_SEAMLESS,
TidyAttr_SIZES,
TidyAttr_SPELLCHECK,
TidyAttr_SRCDOC,
TidyAttr_SRCLANG,
TidyAttr_STEP,
N_TIDY_ATTRIBS /**< Must be last */
} TidyAttrId;
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __TIDYENUM_H__ */

3310
src/access.c Normal file

File diff suppressed because it is too large Load diff

279
src/access.h Normal file
View file

@ -0,0 +1,279 @@
#ifndef __ACCESS_H__
#define __ACCESS_H__
/* access.h -- carry out accessibility checks
Copyright University of Toronto
Portions (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.7 $
*/
/*********************************************************************
* AccessibilityChecks
*
* Carries out processes for all accessibility checks. Traverses
* through all the content within the tree and evaluates the tags for
* accessibility.
*
* To perform the following checks, 'AccessibilityChecks' must be
* called AFTER the tree structure has been formed.
*
* If, in the command prompt, there is no specification of which
* accessibility priorities to check, no accessibility checks will be
* performed. (ie. '1' for priority 1, '2' for priorities 1 and 2,
* and '3') for priorities 1, 2 and 3.)
*
* Copyright University of Toronto
* Programmed by: Mike Lam and Chris Ridpath
* Modifications by : Terry Teague (TRT)
*
*********************************************************************/
#include "forward.h"
#if SUPPORT_ACCESSIBILITY_CHECKS
/* The accessibility checks to perform depending on user's desire.
1. priority 1
2. priority 1 & 2
3. priority 1, 2, & 3
*/
/* Determines if the client-side text link is found within the document
typedef struct AreaLinks
{
struct AreaLinks* next;
char* link;
Bool HasBeenFound;
} AreaLinks;
*/
enum {
TEXTBUF_SIZE=128u
};
struct _TidyAccessImpl;
typedef struct _TidyAccessImpl TidyAccessImpl;
struct _TidyAccessImpl
{
/* gets set from Tidy variable AccessibilityCheckLevel */
int PRIORITYCHK;
/* Number of characters that are found within the concatenated text */
int counter;
/* list of characters in the text nodes found within a container element */
tmbchar textNode[ TEXTBUF_SIZE ];
/* The list of characters found within one text node */
tmbchar text[ TEXTBUF_SIZE ];
/* Number of frame elements found within a frameset */
int numFrames;
/* Number of 'longdesc' attributes found within a frameset */
int HasCheckedLongDesc;
int CheckedHeaders;
int ListElements;
int OtherListElements;
/* For 'USEMAP' identifier */
Bool HasUseMap;
Bool HasName;
Bool HasMap;
/* For tracking nodes that are deleted from the original parse tree - TRT */
/* Node *access_tree; */
Bool HasTH;
Bool HasValidFor;
Bool HasValidId;
Bool HasValidRowHeaders;
Bool HasValidColumnHeaders;
Bool HasInvalidRowHeader;
Bool HasInvalidColumnHeader;
int ForID;
/* List containing map-links
AreaLinks* links;
AreaLinks* start;
AreaLinks* current;
*/
};
/*
Determines which error/warning message should be displayed,
depending on the error code that was called.
Offset accessibility error codes by FIRST_ACCESS_ERR to avoid conflict with
other error codes defined in message.h and used in localize.c.
*/
enum accessErrorCodes
{
FIRST_ACCESS_ERR = 1000, /* must be first */
/* [1.1.1.1] */ IMG_MISSING_ALT,
/* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME,
/* [1.1.1.3] */ IMG_ALT_SUSPICIOUS_FILE_SIZE,
/* [1.1.1.4] */ IMG_ALT_SUSPICIOUS_PLACEHOLDER,
/* [1.1.1.10] */ IMG_ALT_SUSPICIOUS_TOO_LONG,
/* [1.1.1.11] */ IMG_MISSING_ALT_BULLET,
/* [1.1.1.12] */ IMG_MISSING_ALT_H_RULE,
/* [1.1.2.1] */ IMG_MISSING_LONGDESC_DLINK,
/* [1.1.2.2] */ IMG_MISSING_DLINK,
/* [1.1.2.3] */ IMG_MISSING_LONGDESC,
/* [1.1.2.5] */ LONGDESC_NOT_REQUIRED,
/* [1.1.3.1] */ IMG_BUTTON_MISSING_ALT,
/* [1.1.4.1] */ APPLET_MISSING_ALT,
/* [1.1.5.1] */ OBJECT_MISSING_ALT,
/* [1.1.6.1] */ AUDIO_MISSING_TEXT_WAV,
/* [1.1.6.2] */ AUDIO_MISSING_TEXT_AU,
/* [1.1.6.3] */ AUDIO_MISSING_TEXT_AIFF,
/* [1.1.6.4] */ AUDIO_MISSING_TEXT_SND,
/* [1.1.6.5] */ AUDIO_MISSING_TEXT_RA,
/* [1.1.6.6] */ AUDIO_MISSING_TEXT_RM,
/* [1.1.8.1] */ FRAME_MISSING_LONGDESC,
/* [1.1.9.1] */ AREA_MISSING_ALT,
/* [1.1.10.1] */ SCRIPT_MISSING_NOSCRIPT,
/* [1.1.12.1] */ ASCII_REQUIRES_DESCRIPTION,
/* [1.2.1.1] */ IMG_MAP_SERVER_REQUIRES_TEXT_LINKS,
/* [1.4.1.1] */ MULTIMEDIA_REQUIRES_TEXT,
/* [1.5.1.1] */ IMG_MAP_CLIENT_MISSING_TEXT_LINKS,
/* [2.1.1.1] */ INFORMATION_NOT_CONVEYED_IMAGE,
/* [2.1.1.2] */ INFORMATION_NOT_CONVEYED_APPLET,
/* [2.1.1.3] */ INFORMATION_NOT_CONVEYED_OBJECT,
/* [2.1.1.4] */ INFORMATION_NOT_CONVEYED_SCRIPT,
/* [2.1.1.5] */ INFORMATION_NOT_CONVEYED_INPUT,
/* [2.2.1.1] */ COLOR_CONTRAST_TEXT,
/* [2.2.1.2] */ COLOR_CONTRAST_LINK,
/* [2.2.1.3] */ COLOR_CONTRAST_ACTIVE_LINK,
/* [2.2.1.4] */ COLOR_CONTRAST_VISITED_LINK,
/* [3.2.1.1] */ DOCTYPE_MISSING,
/* [3.3.1.1] */ STYLE_SHEET_CONTROL_PRESENTATION,
/* [3.5.1.1] */ HEADERS_IMPROPERLY_NESTED,
/* [3.5.2.1] */ POTENTIAL_HEADER_BOLD,
/* [3.5.2.2] */ POTENTIAL_HEADER_ITALICS,
/* [3.5.2.3] */ POTENTIAL_HEADER_UNDERLINE,
/* [3.5.3.1] */ HEADER_USED_FORMAT_TEXT,
/* [3.6.1.1] */ LIST_USAGE_INVALID_UL,
/* [3.6.1.2] */ LIST_USAGE_INVALID_OL,
/* [3.6.1.4] */ LIST_USAGE_INVALID_LI,
/* [4.1.1.1] */ INDICATE_CHANGES_IN_LANGUAGE,
/* [4.3.1.1] */ LANGUAGE_NOT_IDENTIFIED,
/* [4.3.1.1] */ LANGUAGE_INVALID,
/* [5.1.2.1] */ DATA_TABLE_MISSING_HEADERS,
/* [5.1.2.2] */ DATA_TABLE_MISSING_HEADERS_COLUMN,
/* [5.1.2.3] */ DATA_TABLE_MISSING_HEADERS_ROW,
/* [5.2.1.1] */ DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS,
/* [5.2.1.2] */ DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS,
/* [5.3.1.1] */ LAYOUT_TABLES_LINEARIZE_PROPERLY,
/* [5.4.1.1] */ LAYOUT_TABLE_INVALID_MARKUP,
/* [5.5.1.1] */ TABLE_MISSING_SUMMARY,
/* [5.5.1.2] */ TABLE_SUMMARY_INVALID_NULL,
/* [5.5.1.3] */ TABLE_SUMMARY_INVALID_SPACES,
/* [5.5.1.6] */ TABLE_SUMMARY_INVALID_PLACEHOLDER,
/* [5.5.2.1] */ TABLE_MISSING_CAPTION,
/* [5.6.1.1] */ TABLE_MAY_REQUIRE_HEADER_ABBR,
/* [5.6.1.2] */ TABLE_MAY_REQUIRE_HEADER_ABBR_NULL,
/* [5.6.1.3] */ TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES,
/* [6.1.1.1] */ STYLESHEETS_REQUIRE_TESTING_LINK,
/* [6.1.1.2] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT,
/* [6.1.1.3] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR,
/* [6.2.1.1] */ FRAME_SRC_INVALID,
/* [6.2.2.1] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET,
/* [6.2.2.2] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT,
/* [6.2.2.3] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT,
/* [6.3.1.1] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT,
/* [6.3.1.2] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT,
/* [6.3.1.3] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED,
/* [6.3.1.4] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET,
/* [6.5.1.1] */ FRAME_MISSING_NOFRAMES,
/* [6.5.1.2] */ NOFRAMES_INVALID_NO_VALUE,
/* [6.5.1.3] */ NOFRAMES_INVALID_CONTENT,
/* [6.5.1.4] */ NOFRAMES_INVALID_LINK,
/* [7.1.1.1] */ REMOVE_FLICKER_SCRIPT,
/* [7.1.1.2] */ REMOVE_FLICKER_OBJECT,
/* [7.1.1.3] */ REMOVE_FLICKER_EMBED,
/* [7.1.1.4] */ REMOVE_FLICKER_APPLET,
/* [7.1.1.5] */ REMOVE_FLICKER_ANIMATED_GIF,
/* [7.2.1.1] */ REMOVE_BLINK_MARQUEE,
/* [7.4.1.1] */ REMOVE_AUTO_REFRESH,
/* [7.5.1.1] */ REMOVE_AUTO_REDIRECT,
/* [8.1.1.1] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT,
/* [8.1.1.2] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT,
/* [8.1.1.3] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET,
/* [8.1.1.4] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED,
/* [9.1.1.1] */ IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION,
/* [9.3.1.1] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN,
/* [9.3.1.2] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP,
/* [9.3.1.3] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK,
/* [9.3.1.4] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER,
/* [9.3.1.5] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT,
/* [9.3.1.6] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE,
/* [10.1.1.1] */ NEW_WINDOWS_REQUIRE_WARNING_NEW,
/* [10.1.1.2] */ NEW_WINDOWS_REQUIRE_WARNING_BLANK,
/* [10.2.1.1] */ LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT,
/* [10.2.1.2] */ LABEL_NEEDS_REPOSITIONING_AFTER_INPUT,
/* [10.4.1.1] */ FORM_CONTROL_REQUIRES_DEFAULT_TEXT,
/* [10.4.1.2] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL,
/* [10.4.1.3] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES,
/* [11.2.1.1] */ REPLACE_DEPRECATED_HTML_APPLET,
/* [11.2.1.2] */ REPLACE_DEPRECATED_HTML_BASEFONT,
/* [11.2.1.3] */ REPLACE_DEPRECATED_HTML_CENTER,
/* [11.2.1.4] */ REPLACE_DEPRECATED_HTML_DIR,
/* [11.2.1.5] */ REPLACE_DEPRECATED_HTML_FONT,
/* [11.2.1.6] */ REPLACE_DEPRECATED_HTML_ISINDEX,
/* [11.2.1.7] */ REPLACE_DEPRECATED_HTML_MENU,
/* [11.2.1.8] */ REPLACE_DEPRECATED_HTML_S,
/* [11.2.1.9] */ REPLACE_DEPRECATED_HTML_STRIKE,
/* [11.2.1.10] */ REPLACE_DEPRECATED_HTML_U,
/* [12.1.1.1] */ FRAME_MISSING_TITLE,
/* [12.1.1.2] */ FRAME_TITLE_INVALID_NULL,
/* [12.1.1.3] */ FRAME_TITLE_INVALID_SPACES,
/* [12.4.1.1] */ ASSOCIATE_LABELS_EXPLICITLY,
/* [12.4.1.2] */ ASSOCIATE_LABELS_EXPLICITLY_FOR,
/* [12.4.1.3] */ ASSOCIATE_LABELS_EXPLICITLY_ID,
/* [13.1.1.1] */ LINK_TEXT_NOT_MEANINGFUL,
/* [13.1.1.2] */ LINK_TEXT_MISSING,
/* [13.1.1.3] */ LINK_TEXT_TOO_LONG,
/* [13.1.1.4] */ LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE,
/* [13.1.1.5] */ LINK_TEXT_NOT_MEANINGFUL_MORE,
/* [13.1.1.6] */ LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS,
/* [13.2.1.1] */ METADATA_MISSING,
/* [13.2.1.2] */ METADATA_MISSING_LINK,
/* [13.2.1.3] */ METADATA_MISSING_REDIRECT_AUTOREFRESH,
/* [13.10.1.1] */ SKIPOVER_ASCII_ART,
LAST_ACCESS_ERR /* must be last */
};
void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc );
void TY_(DisplayHTMLTableAlgorithm)( TidyDocImpl* doc );
/************************************************************
* AccessibilityChecks
*
* Traverses through the individual nodes of the tree
* and checks attributes and elements for accessibility.
* after the tree structure has been formed.
************************************************************/
void TY_(AccessibilityChecks)( TidyDocImpl* doc );
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */
#endif /* __ACCESS_H__ */

107
src/alloc.c Normal file
View file

@ -0,0 +1,107 @@
/* alloc.c -- Default memory allocation routines.
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/12/29 16:31:07 $
$Revision: 1.7 $
*/
#include "tidy.h"
#include "forward.h"
static TidyMalloc g_malloc = NULL;
static TidyRealloc g_realloc = NULL;
static TidyFree g_free = NULL;
static TidyPanic g_panic = NULL;
Bool TIDY_CALL tidySetMallocCall( TidyMalloc fmalloc )
{
g_malloc = fmalloc;
return yes;
}
Bool TIDY_CALL tidySetReallocCall( TidyRealloc frealloc )
{
g_realloc = frealloc;
return yes;
}
Bool TIDY_CALL tidySetFreeCall( TidyFree ffree )
{
g_free = ffree;
return yes;
}
Bool TIDY_CALL tidySetPanicCall( TidyPanic fpanic )
{
g_panic = fpanic;
return yes;
}
static void TIDY_CALL defaultPanic( TidyAllocator* ARG_UNUSED(allocator), ctmbstr msg )
{
if ( g_panic )
g_panic( msg );
else
{
/* 2 signifies a serious error */
fprintf( stderr, "Fatal error: %s\n", msg );
#ifdef _DEBUG
assert(0);
#endif
exit(2);
}
}
static void* TIDY_CALL defaultAlloc( TidyAllocator* allocator, size_t size )
{
void *p = ( g_malloc ? g_malloc(size) : malloc(size) );
if ( !p )
defaultPanic( allocator,"Out of memory!");
return p;
}
static void* TIDY_CALL defaultRealloc( TidyAllocator* allocator, void* mem, size_t newsize )
{
void *p;
if ( mem == NULL )
return defaultAlloc( allocator, newsize );
p = ( g_realloc ? g_realloc(mem, newsize) : realloc(mem, newsize) );
if (!p)
defaultPanic( allocator, "Out of memory!");
return p;
}
static void TIDY_CALL defaultFree( TidyAllocator* ARG_UNUSED(allocator), void* mem )
{
if ( mem )
{
if ( g_free )
g_free( mem );
else
free( mem );
}
}
static const TidyAllocatorVtbl defaultVtbl = {
defaultAlloc,
defaultRealloc,
defaultFree,
defaultPanic
};
TidyAllocator TY_(g_default_allocator) = {
&defaultVtbl
};
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

209
src/attrask.c Normal file
View file

@ -0,0 +1,209 @@
/* attrask.c -- Interrogate attribute type
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info:
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.5 $
*/
#include "tidy-int.h"
#include "tidy.h"
#include "attrs.h"
Bool TIDY_CALL tidyAttrIsHREF( TidyAttr tattr )
{
return attrIsHREF( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsSRC( TidyAttr tattr )
{
return attrIsSRC( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsID( TidyAttr tattr )
{
return attrIsID( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsNAME( TidyAttr tattr )
{
return attrIsNAME( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsSUMMARY( TidyAttr tattr )
{
return attrIsSUMMARY( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsALT( TidyAttr tattr )
{
return attrIsALT( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsLONGDESC( TidyAttr tattr )
{
return attrIsLONGDESC( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsUSEMAP( TidyAttr tattr )
{
return attrIsUSEMAP( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsISMAP( TidyAttr tattr )
{
return attrIsISMAP( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsLANGUAGE( TidyAttr tattr )
{
return attrIsLANGUAGE( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsTYPE( TidyAttr tattr )
{
return attrIsTYPE( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsVALUE( TidyAttr tattr )
{
return attrIsVALUE( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsCONTENT( TidyAttr tattr )
{
return attrIsCONTENT( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsTITLE( TidyAttr tattr )
{
return attrIsTITLE( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsXMLNS( TidyAttr tattr )
{
return attrIsXMLNS( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsDATAFLD( TidyAttr tattr )
{
return attrIsDATAFLD( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsWIDTH( TidyAttr tattr )
{
return attrIsWIDTH( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsHEIGHT( TidyAttr tattr )
{
return attrIsHEIGHT( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsFOR( TidyAttr tattr )
{
return attrIsFOR( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsSELECTED( TidyAttr tattr )
{
return attrIsSELECTED( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsCHECKED( TidyAttr tattr )
{
return attrIsCHECKED( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsLANG( TidyAttr tattr )
{
return attrIsLANG( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsTARGET( TidyAttr tattr )
{
return attrIsTARGET( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsHTTP_EQUIV( TidyAttr tattr )
{
return attrIsHTTP_EQUIV( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsREL( TidyAttr tattr )
{
return attrIsREL( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsEvent( TidyAttr tattr )
{
return TY_(attrIsEvent)( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnMOUSEMOVE( TidyAttr tattr )
{
return attrIsOnMOUSEMOVE( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnMOUSEDOWN( TidyAttr tattr )
{
return attrIsOnMOUSEDOWN( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnMOUSEUP( TidyAttr tattr )
{
return attrIsOnMOUSEUP( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnCLICK( TidyAttr tattr )
{
return attrIsOnCLICK( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnMOUSEOVER( TidyAttr tattr )
{
return attrIsOnMOUSEOVER( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnMOUSEOUT( TidyAttr tattr )
{
return attrIsOnMOUSEOUT( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnKEYDOWN( TidyAttr tattr )
{
return attrIsOnKEYDOWN( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnKEYUP( TidyAttr tattr )
{
return attrIsOnKEYUP( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnKEYPRESS( TidyAttr tattr )
{
return attrIsOnKEYPRESS( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnFOCUS( TidyAttr tattr )
{
return attrIsOnFOCUS( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsOnBLUR( TidyAttr tattr )
{
return attrIsOnBLUR( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsBGCOLOR( TidyAttr tattr )
{
return attrIsBGCOLOR( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsLINK( TidyAttr tattr )
{
return attrIsLINK( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsALINK( TidyAttr tattr )
{
return attrIsALINK( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsVLINK( TidyAttr tattr )
{
return attrIsVLINK( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsTEXT( TidyAttr tattr )
{
return attrIsTEXT( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsSTYLE( TidyAttr tattr )
{
return attrIsSTYLE( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsABBR( TidyAttr tattr )
{
return attrIsABBR( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsCOLSPAN( TidyAttr tattr )
{
return attrIsCOLSPAN( tidyAttrToImpl(tattr) );
}
Bool TIDY_CALL tidyAttrIsROWSPAN( TidyAttr tattr )
{
return attrIsROWSPAN( tidyAttrToImpl(tattr) );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

8810
src/attrdict.c Normal file

File diff suppressed because it is too large Load diff

146
src/attrdict.h Normal file
View file

@ -0,0 +1,146 @@
#ifndef __ATTRDICT_H__
#define __ATTRDICT_H__
/* attrdict.h -- extended attribute information
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: attrdict.h,v 1.4 2006/09/12 15:14:44 arnaud02 Exp $
*/
#include "tidy.h"
typedef struct _AttrVersion
{
TidyAttrId attribute;
uint versions;
} AttrVersion;
extern const AttrVersion TY_(W3CAttrsFor_A)[];
extern const AttrVersion TY_(W3CAttrsFor_ABBR)[];
extern const AttrVersion TY_(W3CAttrsFor_ACRONYM)[];
extern const AttrVersion TY_(W3CAttrsFor_ADDRESS)[];
extern const AttrVersion TY_(W3CAttrsFor_APPLET)[];
extern const AttrVersion TY_(W3CAttrsFor_AREA)[];
extern const AttrVersion TY_(W3CAttrsFor_B)[];
extern const AttrVersion TY_(W3CAttrsFor_BASE)[];
extern const AttrVersion TY_(W3CAttrsFor_BASEFONT)[];
extern const AttrVersion TY_(W3CAttrsFor_BDO)[];
extern const AttrVersion TY_(W3CAttrsFor_BIG)[];
extern const AttrVersion TY_(W3CAttrsFor_BLOCKQUOTE)[];
extern const AttrVersion TY_(W3CAttrsFor_BODY)[];
extern const AttrVersion TY_(W3CAttrsFor_BR)[];
extern const AttrVersion TY_(W3CAttrsFor_BUTTON)[];
extern const AttrVersion TY_(W3CAttrsFor_CAPTION)[];
extern const AttrVersion TY_(W3CAttrsFor_CENTER)[];
extern const AttrVersion TY_(W3CAttrsFor_CITE)[];
extern const AttrVersion TY_(W3CAttrsFor_CODE)[];
extern const AttrVersion TY_(W3CAttrsFor_COL)[];
extern const AttrVersion TY_(W3CAttrsFor_COLGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_DD)[];
extern const AttrVersion TY_(W3CAttrsFor_DEL)[];
extern const AttrVersion TY_(W3CAttrsFor_DFN)[];
extern const AttrVersion TY_(W3CAttrsFor_DIR)[];
extern const AttrVersion TY_(W3CAttrsFor_DIV)[];
extern const AttrVersion TY_(W3CAttrsFor_DL)[];
extern const AttrVersion TY_(W3CAttrsFor_DT)[];
extern const AttrVersion TY_(W3CAttrsFor_EM)[];
extern const AttrVersion TY_(W3CAttrsFor_FIELDSET)[];
extern const AttrVersion TY_(W3CAttrsFor_FONT)[];
extern const AttrVersion TY_(W3CAttrsFor_FORM)[];
extern const AttrVersion TY_(W3CAttrsFor_FRAME)[];
extern const AttrVersion TY_(W3CAttrsFor_FRAMESET)[];
extern const AttrVersion TY_(W3CAttrsFor_H1)[];
extern const AttrVersion TY_(W3CAttrsFor_H2)[];
extern const AttrVersion TY_(W3CAttrsFor_H3)[];
extern const AttrVersion TY_(W3CAttrsFor_H4)[];
extern const AttrVersion TY_(W3CAttrsFor_H5)[];
extern const AttrVersion TY_(W3CAttrsFor_H6)[];
extern const AttrVersion TY_(W3CAttrsFor_HEAD)[];
extern const AttrVersion TY_(W3CAttrsFor_HR)[];
extern const AttrVersion TY_(W3CAttrsFor_HTML)[];
extern const AttrVersion TY_(W3CAttrsFor_I)[];
extern const AttrVersion TY_(W3CAttrsFor_IFRAME)[];
extern const AttrVersion TY_(W3CAttrsFor_IMG)[];
extern const AttrVersion TY_(W3CAttrsFor_INPUT)[];
extern const AttrVersion TY_(W3CAttrsFor_INS)[];
extern const AttrVersion TY_(W3CAttrsFor_ISINDEX)[];
extern const AttrVersion TY_(W3CAttrsFor_KBD)[];
extern const AttrVersion TY_(W3CAttrsFor_LABEL)[];
extern const AttrVersion TY_(W3CAttrsFor_LEGEND)[];
extern const AttrVersion TY_(W3CAttrsFor_LI)[];
extern const AttrVersion TY_(W3CAttrsFor_LINK)[];
extern const AttrVersion TY_(W3CAttrsFor_LISTING)[];
extern const AttrVersion TY_(W3CAttrsFor_MAP)[];
extern const AttrVersion TY_(W3CAttrsFor_MENU)[];
extern const AttrVersion TY_(W3CAttrsFor_META)[];
extern const AttrVersion TY_(W3CAttrsFor_NEXTID)[];
extern const AttrVersion TY_(W3CAttrsFor_NOFRAMES)[];
extern const AttrVersion TY_(W3CAttrsFor_NOSCRIPT)[];
extern const AttrVersion TY_(W3CAttrsFor_OBJECT)[];
extern const AttrVersion TY_(W3CAttrsFor_OL)[];
extern const AttrVersion TY_(W3CAttrsFor_OPTGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_OPTION)[];
extern const AttrVersion TY_(W3CAttrsFor_P)[];
extern const AttrVersion TY_(W3CAttrsFor_PARAM)[];
extern const AttrVersion TY_(W3CAttrsFor_PLAINTEXT)[];
extern const AttrVersion TY_(W3CAttrsFor_PRE)[];
extern const AttrVersion TY_(W3CAttrsFor_Q)[];
extern const AttrVersion TY_(W3CAttrsFor_RB)[];
extern const AttrVersion TY_(W3CAttrsFor_RBC)[];
extern const AttrVersion TY_(W3CAttrsFor_RP)[];
extern const AttrVersion TY_(W3CAttrsFor_RT)[];
extern const AttrVersion TY_(W3CAttrsFor_RTC)[];
extern const AttrVersion TY_(W3CAttrsFor_RUBY)[];
extern const AttrVersion TY_(W3CAttrsFor_S)[];
extern const AttrVersion TY_(W3CAttrsFor_SAMP)[];
extern const AttrVersion TY_(W3CAttrsFor_SCRIPT)[];
extern const AttrVersion TY_(W3CAttrsFor_SELECT)[];
extern const AttrVersion TY_(W3CAttrsFor_SMALL)[];
extern const AttrVersion TY_(W3CAttrsFor_SPAN)[];
extern const AttrVersion TY_(W3CAttrsFor_STRIKE)[];
extern const AttrVersion TY_(W3CAttrsFor_STRONG)[];
extern const AttrVersion TY_(W3CAttrsFor_STYLE)[];
extern const AttrVersion TY_(W3CAttrsFor_SUB)[];
extern const AttrVersion TY_(W3CAttrsFor_SUP)[];
extern const AttrVersion TY_(W3CAttrsFor_TABLE)[];
extern const AttrVersion TY_(W3CAttrsFor_TBODY)[];
extern const AttrVersion TY_(W3CAttrsFor_TD)[];
extern const AttrVersion TY_(W3CAttrsFor_TEXTAREA)[];
extern const AttrVersion TY_(W3CAttrsFor_TFOOT)[];
extern const AttrVersion TY_(W3CAttrsFor_TH)[];
extern const AttrVersion TY_(W3CAttrsFor_THEAD)[];
extern const AttrVersion TY_(W3CAttrsFor_TITLE)[];
extern const AttrVersion TY_(W3CAttrsFor_TR)[];
extern const AttrVersion TY_(W3CAttrsFor_TT)[];
extern const AttrVersion TY_(W3CAttrsFor_U)[];
extern const AttrVersion TY_(W3CAttrsFor_UL)[];
extern const AttrVersion TY_(W3CAttrsFor_VAR)[];
extern const AttrVersion TY_(W3CAttrsFor_XMP)[];
extern const AttrVersion TY_(W3CAttrsFor_TRACK)[];
extern const AttrVersion TY_(W3CAttrsFor_SUMMARY)[];
extern const AttrVersion TY_(W3CAttrsFor_FIGCAPTION)[];
extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
extern const AttrVersion TY_(W3CAttrsFor_METER)[];
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
extern const AttrVersion TY_(W3CAttrsFor_DATALIST)[];
extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
#endif /* __ATTRDICT_H__ */

213
src/attrget.c Normal file
View file

@ -0,0 +1,213 @@
/* attrget.c -- Locate attribute value by type
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info:
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.6 $
*/
#include "tidy-int.h"
#include "tags.h"
#include "attrs.h"
#include "tidy.h"
TidyAttr TIDY_CALL tidyAttrGetById( TidyNode tnod, TidyAttrId attId )
{
Node* nimp = tidyNodeToImpl(tnod);
return tidyImplToAttr( TY_(AttrGetById)( nimp, attId ) );
}
TidyAttr TIDY_CALL tidyAttrGetHREF( TidyNode tnod )
{
return tidyImplToAttr( attrGetHREF( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetSRC( TidyNode tnod )
{
return tidyImplToAttr( attrGetSRC( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetID( TidyNode tnod )
{
return tidyImplToAttr( attrGetID( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetNAME( TidyNode tnod )
{
return tidyImplToAttr( attrGetNAME( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetSUMMARY( TidyNode tnod )
{
return tidyImplToAttr( attrGetSUMMARY( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetALT( TidyNode tnod )
{
return tidyImplToAttr( attrGetALT( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetLONGDESC( TidyNode tnod )
{
return tidyImplToAttr( attrGetLONGDESC( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetUSEMAP( TidyNode tnod )
{
return tidyImplToAttr( attrGetUSEMAP( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetISMAP( TidyNode tnod )
{
return tidyImplToAttr( attrGetISMAP( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetLANGUAGE( TidyNode tnod )
{
return tidyImplToAttr( attrGetLANGUAGE( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetTYPE( TidyNode tnod )
{
return tidyImplToAttr( attrGetTYPE( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetVALUE( TidyNode tnod )
{
return tidyImplToAttr( attrGetVALUE( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetCONTENT( TidyNode tnod )
{
return tidyImplToAttr( attrGetCONTENT( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetTITLE( TidyNode tnod )
{
return tidyImplToAttr( attrGetTITLE( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetXMLNS( TidyNode tnod )
{
return tidyImplToAttr( attrGetXMLNS( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetDATAFLD( TidyNode tnod )
{
return tidyImplToAttr( attrGetDATAFLD( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetWIDTH( TidyNode tnod )
{
return tidyImplToAttr( attrGetWIDTH( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetHEIGHT( TidyNode tnod )
{
return tidyImplToAttr( attrGetHEIGHT( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetFOR( TidyNode tnod )
{
return tidyImplToAttr( attrGetFOR( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetSELECTED( TidyNode tnod )
{
return tidyImplToAttr( attrGetSELECTED( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetCHECKED( TidyNode tnod )
{
return tidyImplToAttr( attrGetCHECKED( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetLANG( TidyNode tnod )
{
return tidyImplToAttr( attrGetLANG( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetTARGET( TidyNode tnod )
{
return tidyImplToAttr( attrGetTARGET( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetHTTP_EQUIV( TidyNode tnod )
{
return tidyImplToAttr( attrGetHTTP_EQUIV( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetREL( TidyNode tnod )
{
return tidyImplToAttr( attrGetREL( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEMOVE( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnMOUSEMOVE( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEDOWN( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnMOUSEDOWN( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEUP( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnMOUSEUP( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnCLICK( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnCLICK( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEOVER( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnMOUSEOVER( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEOUT( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnMOUSEOUT( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnKEYDOWN( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnKEYDOWN( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnKEYUP( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnKEYUP( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnKEYPRESS( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnKEYPRESS( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnFOCUS( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnFOCUS( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetOnBLUR( TidyNode tnod )
{
return tidyImplToAttr( attrGetOnBLUR( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetBGCOLOR( TidyNode tnod )
{
return tidyImplToAttr( attrGetBGCOLOR( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetLINK( TidyNode tnod )
{
return tidyImplToAttr( attrGetLINK( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetALINK( TidyNode tnod )
{
return tidyImplToAttr( attrGetALINK( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetVLINK( TidyNode tnod )
{
return tidyImplToAttr( attrGetVLINK( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetTEXT( TidyNode tnod )
{
return tidyImplToAttr( attrGetTEXT( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetSTYLE( TidyNode tnod )
{
return tidyImplToAttr( attrGetSTYLE( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetABBR( TidyNode tnod )
{
return tidyImplToAttr( attrGetABBR( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetCOLSPAN( TidyNode tnod )
{
return tidyImplToAttr( attrGetCOLSPAN( tidyNodeToImpl(tnod) ) );
}
TidyAttr TIDY_CALL tidyAttrGetROWSPAN( TidyNode tnod )
{
return tidyImplToAttr( attrGetROWSPAN( tidyNodeToImpl(tnod) ) );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

2013
src/attrs.c Normal file

File diff suppressed because it is too large Load diff

373
src/attrs.h Normal file
View file

@ -0,0 +1,373 @@
#ifndef __ATTRS_H__
#define __ATTRS_H__
/* attrs.h -- recognize HTML attributes
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/06/14 09:36:06 $
$Revision: 1.29 $
*/
#include "forward.h"
/* declaration for methods that check attribute values */
typedef void (AttrCheck)(TidyDocImpl* doc, Node *node, AttVal *attval);
struct _Attribute
{
TidyAttrId id;
tmbstr name;
AttrCheck* attrchk;
struct _Attribute* next;
};
/*
Anchor/Node linked list
*/
struct _Anchor
{
struct _Anchor *next;
Node *node;
char *name;
};
typedef struct _Anchor Anchor;
#if !defined(ATTRIBUTE_HASH_LOOKUP)
#define ATTRIBUTE_HASH_LOOKUP 1
#endif
#if ATTRIBUTE_HASH_LOOKUP
enum
{
ATTRIBUTE_HASH_SIZE=178u
};
struct _AttrHash
{
Attribute const* attr;
struct _AttrHash* next;
};
typedef struct _AttrHash AttrHash;
#endif
struct _TidyAttribImpl
{
/* anchor/node lookup */
Anchor* anchor_list;
/* Declared literal attributes */
Attribute* declared_attr_list;
#if ATTRIBUTE_HASH_LOOKUP
AttrHash* hashtab[ATTRIBUTE_HASH_SIZE];
#endif
};
typedef struct _TidyAttribImpl TidyAttribImpl;
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
AttrCheck TY_(CheckUrl);
/* public method for finding attribute definition by name */
const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval );
const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
Node *node, ctmbstr name, ctmbstr value );
AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value);
Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname );
/* Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ); */
Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname );
/* may id or name serve as anchor? */
Bool TY_(IsAnchorElement)( TidyDocImpl* doc, Node* node );
/*
In CSS1, selectors can contain only the characters A-Z, 0-9, and
Unicode characters 161-255, plus dash (-); they cannot start with
a dash or a digit; they can also contain escaped characters and any
Unicode character as a numeric code (see next item).
The backslash followed by at most four hexadecimal digits (0..9A..F)
stands for the Unicode character with that number.
Any character except a hexadecimal digit can be escaped to remove its
special meaning, by putting a backslash in front.
#508936 - CSS class naming for -clean option
*/
Bool TY_(IsCSS1Selector)( ctmbstr buf );
Bool TY_(IsValidHTMLID)(ctmbstr id);
Bool TY_(IsValidXMLID)(ctmbstr id);
/* removes anchor for specific node */
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node );
/* free all anchors */
void TY_(FreeAnchors)( TidyDocImpl* doc );
/* public methods for inititializing/freeing attribute dictionary */
void TY_(InitAttrs)( TidyDocImpl* doc );
void TY_(FreeAttrTable)( TidyDocImpl* doc );
void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname );
/*
the same attribute name can't be used
more than once in each element
*/
void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node* node, Bool isXml );
void TY_(SortAttributes)(Node* node, TidyAttrSortStrategy strat);
Bool TY_(IsBoolAttribute)( AttVal* attval );
Bool TY_(attrIsEvent)( AttVal* attval );
AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
/* 0 == TidyAttr_UNKNOWN */
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))
#define AttrHasValue(attr) ((attr) && (attr)->value)
#define AttrValueIs(attr, val) (AttrHasValue(attr) && \
TY_(tmbstrcasecmp)((attr)->value, val) == 0)
#define AttrContains(attr, val) (AttrHasValue(attr) && \
TY_(tmbsubstr)((attr)->value, val) != NULL)
#define AttrVersions(attr) ((attr) && (attr)->dict ? (attr)->dict->versions : VERS_PROPRIETARY)
#define AttrsHaveSameId(a, b) (a && b && a->dict && b->dict && a->dict->id && \
b->dict->id && a->dict->id == b->dict->id)
#define attrIsABBR(av) AttrIsId( av, TidyAttr_ABBR )
#define attrIsACCEPT(av) AttrIsId( av, TidyAttr_ACCEPT )
#define attrIsACCEPT_CHARSET(av) AttrIsId( av, TidyAttr_ACCEPT_CHARSET )
#define attrIsACCESSKEY(av) AttrIsId( av, TidyAttr_ACCESSKEY )
#define attrIsACTION(av) AttrIsId( av, TidyAttr_ACTION )
#define attrIsADD_DATE(av) AttrIsId( av, TidyAttr_ADD_DATE )
#define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN )
#define attrIsALINK(av) AttrIsId( av, TidyAttr_ALINK )
#define attrIsALT(av) AttrIsId( av, TidyAttr_ALT )
#define attrIsARCHIVE(av) AttrIsId( av, TidyAttr_ARCHIVE )
#define attrIsAXIS(av) AttrIsId( av, TidyAttr_AXIS )
#define attrIsBACKGROUND(av) AttrIsId( av, TidyAttr_BACKGROUND )
#define attrIsBGCOLOR(av) AttrIsId( av, TidyAttr_BGCOLOR )
#define attrIsBGPROPERTIES(av) AttrIsId( av, TidyAttr_BGPROPERTIES )
#define attrIsBORDER(av) AttrIsId( av, TidyAttr_BORDER )
#define attrIsBORDERCOLOR(av) AttrIsId( av, TidyAttr_BORDERCOLOR )
#define attrIsBOTTOMMARGIN(av) AttrIsId( av, TidyAttr_BOTTOMMARGIN )
#define attrIsCELLPADDING(av) AttrIsId( av, TidyAttr_CELLPADDING )
#define attrIsCELLSPACING(av) AttrIsId( av, TidyAttr_CELLSPACING )
#define attrIsCHAR(av) AttrIsId( av, TidyAttr_CHAR )
#define attrIsCHAROFF(av) AttrIsId( av, TidyAttr_CHAROFF )
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET )
#define attrIsCHECKED(av) AttrIsId( av, TidyAttr_CHECKED )
#define attrIsCITE(av) AttrIsId( av, TidyAttr_CITE )
#define attrIsCLASS(av) AttrIsId( av, TidyAttr_CLASS )
#define attrIsCLASSID(av) AttrIsId( av, TidyAttr_CLASSID )
#define attrIsCLEAR(av) AttrIsId( av, TidyAttr_CLEAR )
#define attrIsCODE(av) AttrIsId( av, TidyAttr_CODE )
#define attrIsCODEBASE(av) AttrIsId( av, TidyAttr_CODEBASE )
#define attrIsCODETYPE(av) AttrIsId( av, TidyAttr_CODETYPE )
#define attrIsCOLOR(av) AttrIsId( av, TidyAttr_COLOR )
#define attrIsCOLS(av) AttrIsId( av, TidyAttr_COLS )
#define attrIsCOLSPAN(av) AttrIsId( av, TidyAttr_COLSPAN )
#define attrIsCOMPACT(av) AttrIsId( av, TidyAttr_COMPACT )
#define attrIsCONTENT(av) AttrIsId( av, TidyAttr_CONTENT )
#define attrIsCOORDS(av) AttrIsId( av, TidyAttr_COORDS )
#define attrIsDATA(av) AttrIsId( av, TidyAttr_DATA )
#define attrIsDATAFLD(av) AttrIsId( av, TidyAttr_DATAFLD )
#define attrIsDATAFORMATAS(av) AttrIsId( av, TidyAttr_DATAFORMATAS )
#define attrIsDATAPAGESIZE(av) AttrIsId( av, TidyAttr_DATAPAGESIZE )
#define attrIsDATASRC(av) AttrIsId( av, TidyAttr_DATASRC )
#define attrIsDATETIME(av) AttrIsId( av, TidyAttr_DATETIME )
#define attrIsDECLARE(av) AttrIsId( av, TidyAttr_DECLARE )
#define attrIsDEFER(av) AttrIsId( av, TidyAttr_DEFER )
#define attrIsDIR(av) AttrIsId( av, TidyAttr_DIR )
#define attrIsDISABLED(av) AttrIsId( av, TidyAttr_DISABLED )
#define attrIsENCODING(av) AttrIsId( av, TidyAttr_ENCODING )
#define attrIsENCTYPE(av) AttrIsId( av, TidyAttr_ENCTYPE )
#define attrIsFACE(av) AttrIsId( av, TidyAttr_FACE )
#define attrIsFOR(av) AttrIsId( av, TidyAttr_FOR )
#define attrIsFRAME(av) AttrIsId( av, TidyAttr_FRAME )
#define attrIsFRAMEBORDER(av) AttrIsId( av, TidyAttr_FRAMEBORDER )
#define attrIsFRAMESPACING(av) AttrIsId( av, TidyAttr_FRAMESPACING )
#define attrIsGRIDX(av) AttrIsId( av, TidyAttr_GRIDX )
#define attrIsGRIDY(av) AttrIsId( av, TidyAttr_GRIDY )
#define attrIsHEADERS(av) AttrIsId( av, TidyAttr_HEADERS )
#define attrIsHEIGHT(av) AttrIsId( av, TidyAttr_HEIGHT )
#define attrIsHREF(av) AttrIsId( av, TidyAttr_HREF )
#define attrIsHREFLANG(av) AttrIsId( av, TidyAttr_HREFLANG )
#define attrIsHSPACE(av) AttrIsId( av, TidyAttr_HSPACE )
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
#define attrIsID(av) AttrIsId( av, TidyAttr_ID )
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )
#define attrIsLAST_MODIFIED(av) AttrIsId( av, TidyAttr_LAST_MODIFIED )
#define attrIsLAST_VISIT(av) AttrIsId( av, TidyAttr_LAST_VISIT )
#define attrIsLEFTMARGIN(av) AttrIsId( av, TidyAttr_LEFTMARGIN )
#define attrIsLINK(av) AttrIsId( av, TidyAttr_LINK )
#define attrIsLONGDESC(av) AttrIsId( av, TidyAttr_LONGDESC )
#define attrIsLOWSRC(av) AttrIsId( av, TidyAttr_LOWSRC )
#define attrIsMARGINHEIGHT(av) AttrIsId( av, TidyAttr_MARGINHEIGHT )
#define attrIsMARGINWIDTH(av) AttrIsId( av, TidyAttr_MARGINWIDTH )
#define attrIsMAXLENGTH(av) AttrIsId( av, TidyAttr_MAXLENGTH )
#define attrIsMEDIA(av) AttrIsId( av, TidyAttr_MEDIA )
#define attrIsMETHOD(av) AttrIsId( av, TidyAttr_METHOD )
#define attrIsMULTIPLE(av) AttrIsId( av, TidyAttr_MULTIPLE )
#define attrIsNAME(av) AttrIsId( av, TidyAttr_NAME )
#define attrIsNOHREF(av) AttrIsId( av, TidyAttr_NOHREF )
#define attrIsNORESIZE(av) AttrIsId( av, TidyAttr_NORESIZE )
#define attrIsNOSHADE(av) AttrIsId( av, TidyAttr_NOSHADE )
#define attrIsNOWRAP(av) AttrIsId( av, TidyAttr_NOWRAP )
#define attrIsOBJECT(av) AttrIsId( av, TidyAttr_OBJECT )
#define attrIsOnAFTERUPDATE(av) AttrIsId( av, TidyAttr_OnAFTERUPDATE )
#define attrIsOnBEFOREUNLOAD(av) AttrIsId( av, TidyAttr_OnBEFOREUNLOAD )
#define attrIsOnBEFOREUPDATE(av) AttrIsId( av, TidyAttr_OnBEFOREUPDATE )
#define attrIsOnBLUR(av) AttrIsId( av, TidyAttr_OnBLUR )
#define attrIsOnCHANGE(av) AttrIsId( av, TidyAttr_OnCHANGE )
#define attrIsOnCLICK(av) AttrIsId( av, TidyAttr_OnCLICK )
#define attrIsOnDATAAVAILABLE(av) AttrIsId( av, TidyAttr_OnDATAAVAILABLE )
#define attrIsOnDATASETCHANGED(av) AttrIsId( av, TidyAttr_OnDATASETCHANGED )
#define attrIsOnDATASETCOMPLETE(av) AttrIsId( av, TidyAttr_OnDATASETCOMPLETE )
#define attrIsOnDBLCLICK(av) AttrIsId( av, TidyAttr_OnDBLCLICK )
#define attrIsOnERRORUPDATE(av) AttrIsId( av, TidyAttr_OnERRORUPDATE )
#define attrIsOnFOCUS(av) AttrIsId( av, TidyAttr_OnFOCUS )
#define attrIsOnKEYDOWN(av) AttrIsId( av, TidyAttr_OnKEYDOWN )
#define attrIsOnKEYPRESS(av) AttrIsId( av, TidyAttr_OnKEYPRESS )
#define attrIsOnKEYUP(av) AttrIsId( av, TidyAttr_OnKEYUP )
#define attrIsOnLOAD(av) AttrIsId( av, TidyAttr_OnLOAD )
#define attrIsOnMOUSEDOWN(av) AttrIsId( av, TidyAttr_OnMOUSEDOWN )
#define attrIsOnMOUSEMOVE(av) AttrIsId( av, TidyAttr_OnMOUSEMOVE )
#define attrIsOnMOUSEOUT(av) AttrIsId( av, TidyAttr_OnMOUSEOUT )
#define attrIsOnMOUSEOVER(av) AttrIsId( av, TidyAttr_OnMOUSEOVER )
#define attrIsOnMOUSEUP(av) AttrIsId( av, TidyAttr_OnMOUSEUP )
#define attrIsOnRESET(av) AttrIsId( av, TidyAttr_OnRESET )
#define attrIsOnROWENTER(av) AttrIsId( av, TidyAttr_OnROWENTER )
#define attrIsOnROWEXIT(av) AttrIsId( av, TidyAttr_OnROWEXIT )
#define attrIsOnSELECT(av) AttrIsId( av, TidyAttr_OnSELECT )
#define attrIsOnSUBMIT(av) AttrIsId( av, TidyAttr_OnSUBMIT )
#define attrIsOnUNLOAD(av) AttrIsId( av, TidyAttr_OnUNLOAD )
#define attrIsPROFILE(av) AttrIsId( av, TidyAttr_PROFILE )
#define attrIsPROMPT(av) AttrIsId( av, TidyAttr_PROMPT )
#define attrIsRBSPAN(av) AttrIsId( av, TidyAttr_RBSPAN )
#define attrIsREADONLY(av) AttrIsId( av, TidyAttr_READONLY )
#define attrIsREL(av) AttrIsId( av, TidyAttr_REL )
#define attrIsREV(av) AttrIsId( av, TidyAttr_REV )
#define attrIsRIGHTMARGIN(av) AttrIsId( av, TidyAttr_RIGHTMARGIN )
#define attrIsROWS(av) AttrIsId( av, TidyAttr_ROWS )
#define attrIsROWSPAN(av) AttrIsId( av, TidyAttr_ROWSPAN )
#define attrIsRULES(av) AttrIsId( av, TidyAttr_RULES )
#define attrIsSCHEME(av) AttrIsId( av, TidyAttr_SCHEME )
#define attrIsSCOPE(av) AttrIsId( av, TidyAttr_SCOPE )
#define attrIsSCROLLING(av) AttrIsId( av, TidyAttr_SCROLLING )
#define attrIsSELECTED(av) AttrIsId( av, TidyAttr_SELECTED )
#define attrIsSHAPE(av) AttrIsId( av, TidyAttr_SHAPE )
#define attrIsSHOWGRID(av) AttrIsId( av, TidyAttr_SHOWGRID )
#define attrIsSHOWGRIDX(av) AttrIsId( av, TidyAttr_SHOWGRIDX )
#define attrIsSHOWGRIDY(av) AttrIsId( av, TidyAttr_SHOWGRIDY )
#define attrIsSIZE(av) AttrIsId( av, TidyAttr_SIZE )
#define attrIsSPAN(av) AttrIsId( av, TidyAttr_SPAN )
#define attrIsSRC(av) AttrIsId( av, TidyAttr_SRC )
#define attrIsSTANDBY(av) AttrIsId( av, TidyAttr_STANDBY )
#define attrIsSTART(av) AttrIsId( av, TidyAttr_START )
#define attrIsSTYLE(av) AttrIsId( av, TidyAttr_STYLE )
#define attrIsSUMMARY(av) AttrIsId( av, TidyAttr_SUMMARY )
#define attrIsTABINDEX(av) AttrIsId( av, TidyAttr_TABINDEX )
#define attrIsTARGET(av) AttrIsId( av, TidyAttr_TARGET )
#define attrIsTEXT(av) AttrIsId( av, TidyAttr_TEXT )
#define attrIsTITLE(av) AttrIsId( av, TidyAttr_TITLE )
#define attrIsTOPMARGIN(av) AttrIsId( av, TidyAttr_TOPMARGIN )
#define attrIsTYPE(av) AttrIsId( av, TidyAttr_TYPE )
#define attrIsUSEMAP(av) AttrIsId( av, TidyAttr_USEMAP )
#define attrIsVALIGN(av) AttrIsId( av, TidyAttr_VALIGN )
#define attrIsVALUE(av) AttrIsId( av, TidyAttr_VALUE )
#define attrIsVALUETYPE(av) AttrIsId( av, TidyAttr_VALUETYPE )
#define attrIsVERSION(av) AttrIsId( av, TidyAttr_VERSION )
#define attrIsVLINK(av) AttrIsId( av, TidyAttr_VLINK )
#define attrIsVSPACE(av) AttrIsId( av, TidyAttr_VSPACE )
#define attrIsWIDTH(av) AttrIsId( av, TidyAttr_WIDTH )
#define attrIsWRAP(av) AttrIsId( av, TidyAttr_WRAP )
#define attrIsXMLNS(av) AttrIsId( av, TidyAttr_XMLNS )
#define attrIsXML_LANG(av) AttrIsId( av, TidyAttr_XML_LANG )
#define attrIsXML_SPACE(av) AttrIsId( av, TidyAttr_XML_SPACE )
/* Attribute Retrieval macros
*/
#define attrGetHREF( nod ) TY_(AttrGetById)( nod, TidyAttr_HREF )
#define attrGetSRC( nod ) TY_(AttrGetById)( nod, TidyAttr_SRC )
#define attrGetID( nod ) TY_(AttrGetById)( nod, TidyAttr_ID )
#define attrGetNAME( nod ) TY_(AttrGetById)( nod, TidyAttr_NAME )
#define attrGetSUMMARY( nod ) TY_(AttrGetById)( nod, TidyAttr_SUMMARY )
#define attrGetALT( nod ) TY_(AttrGetById)( nod, TidyAttr_ALT )
#define attrGetLONGDESC( nod ) TY_(AttrGetById)( nod, TidyAttr_LONGDESC )
#define attrGetUSEMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_USEMAP )
#define attrGetISMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_ISMAP )
#define attrGetLANGUAGE( nod ) TY_(AttrGetById)( nod, TidyAttr_LANGUAGE )
#define attrGetTYPE( nod ) TY_(AttrGetById)( nod, TidyAttr_TYPE )
#define attrGetVALUE( nod ) TY_(AttrGetById)( nod, TidyAttr_VALUE )
#define attrGetCONTENT( nod ) TY_(AttrGetById)( nod, TidyAttr_CONTENT )
#define attrGetTITLE( nod ) TY_(AttrGetById)( nod, TidyAttr_TITLE )
#define attrGetXMLNS( nod ) TY_(AttrGetById)( nod, TidyAttr_XMLNS )
#define attrGetDATAFLD( nod ) TY_(AttrGetById)( nod, TidyAttr_DATAFLD )
#define attrGetWIDTH( nod ) TY_(AttrGetById)( nod, TidyAttr_WIDTH )
#define attrGetHEIGHT( nod ) TY_(AttrGetById)( nod, TidyAttr_HEIGHT )
#define attrGetFOR( nod ) TY_(AttrGetById)( nod, TidyAttr_FOR )
#define attrGetSELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_SELECTED )
#define attrGetCHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_CHECKED )
#define attrGetLANG( nod ) TY_(AttrGetById)( nod, TidyAttr_LANG )
#define attrGetTARGET( nod ) TY_(AttrGetById)( nod, TidyAttr_TARGET )
#define attrGetHTTP_EQUIV( nod ) TY_(AttrGetById)( nod, TidyAttr_HTTP_EQUIV )
#define attrGetREL( nod ) TY_(AttrGetById)( nod, TidyAttr_REL )
#define attrGetOnMOUSEMOVE( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEMOVE )
#define attrGetOnMOUSEDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEDOWN )
#define attrGetOnMOUSEUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEUP )
#define attrGetOnCLICK( nod ) TY_(AttrGetById)( nod, TidyAttr_OnCLICK )
#define attrGetOnMOUSEOVER( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOVER )
#define attrGetOnMOUSEOUT( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOUT )
#define attrGetOnKEYDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYDOWN )
#define attrGetOnKEYUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYUP )
#define attrGetOnKEYPRESS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYPRESS )
#define attrGetOnFOCUS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnFOCUS )
#define attrGetOnBLUR( nod ) TY_(AttrGetById)( nod, TidyAttr_OnBLUR )
#define attrGetBGCOLOR( nod ) TY_(AttrGetById)( nod, TidyAttr_BGCOLOR )
#define attrGetLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_LINK )
#define attrGetALINK( nod ) TY_(AttrGetById)( nod, TidyAttr_ALINK )
#define attrGetVLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_VLINK )
#define attrGetTEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_TEXT )
#define attrGetSTYLE( nod ) TY_(AttrGetById)( nod, TidyAttr_STYLE )
#define attrGetABBR( nod ) TY_(AttrGetById)( nod, TidyAttr_ABBR )
#define attrGetCOLSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_COLSPAN )
#define attrGetFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_FONT )
#define attrGetBASEFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_BASEFONT )
#define attrGetROWSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_ROWSPAN )
#endif /* __ATTRS_H__ */

232
src/buffio.c Normal file
View file

@ -0,0 +1,232 @@
/* buffio.c -- Treat buffer as an I/O stream.
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/01/23 11:17:46 $
$Revision: 1.14 $
Requires buffer to automatically grow as bytes are added.
Must keep track of current read and write points.
*/
#include "tidy.h"
#include "buffio.h"
#include "forward.h"
/**************
TIDY
**************/
static int TIDY_CALL insrc_getByte( void* appData )
{
TidyBuffer* buf = (TidyBuffer*) appData;
return tidyBufGetByte( buf );
}
static Bool TIDY_CALL insrc_eof( void* appData )
{
TidyBuffer* buf = (TidyBuffer*) appData;
return tidyBufEndOfInput( buf );
}
static void TIDY_CALL insrc_ungetByte( void* appData, byte bv )
{
TidyBuffer* buf = (TidyBuffer*) appData;
tidyBufUngetByte( buf, bv );
}
void TIDY_CALL tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf )
{
inp->getByte = insrc_getByte;
inp->eof = insrc_eof;
inp->ungetByte = insrc_ungetByte;
inp->sourceData = buf;
}
static void TIDY_CALL outsink_putByte( void* appData, byte bv )
{
TidyBuffer* buf = (TidyBuffer*) appData;
tidyBufPutByte( buf, bv );
}
void TIDY_CALL tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf )
{
outp->putByte = outsink_putByte;
outp->sinkData = buf;
}
void TIDY_CALL tidyBufInit( TidyBuffer* buf )
{
assert( buf != NULL );
tidyBufInitWithAllocator( buf, NULL );
}
void TIDY_CALL tidyBufAlloc( TidyBuffer* buf, uint allocSize )
{
tidyBufAllocWithAllocator( buf, NULL, allocSize );
}
void TIDY_CALL tidyBufInitWithAllocator( TidyBuffer* buf,
TidyAllocator *allocator )
{
assert( buf != NULL );
TidyClearMemory( buf, sizeof(TidyBuffer) );
buf->allocator = allocator ? allocator : &TY_(g_default_allocator);
}
void TIDY_CALL tidyBufAllocWithAllocator( TidyBuffer* buf,
TidyAllocator *allocator,
uint allocSize )
{
tidyBufInitWithAllocator( buf, allocator );
tidyBufCheckAlloc( buf, allocSize, 0 );
buf->next = 0;
}
void TIDY_CALL tidyBufFree( TidyBuffer* buf )
{
assert( buf != NULL );
TidyFree( buf->allocator, buf->bp );
tidyBufInitWithAllocator( buf, buf->allocator );
}
void TIDY_CALL tidyBufClear( TidyBuffer* buf )
{
assert( buf != NULL );
if ( buf->bp )
{
TidyClearMemory( buf->bp, buf->allocated );
buf->size = 0;
}
buf->next = 0;
}
/* Many users do not call tidyBufInit() or tidyBufAlloc() or their allocator
counterparts. So by default, set the default allocator.
*/
static void setDefaultAllocator( TidyBuffer* buf )
{
buf->allocator = &TY_(g_default_allocator);
}
/* Avoid thrashing memory by doubling buffer size
** until larger than requested size.
buf->allocated is bigger than allocSize+1 so that a trailing null byte is
always available.
*/
void TIDY_CALL tidyBufCheckAlloc( TidyBuffer* buf, uint allocSize, uint chunkSize )
{
assert( buf != NULL );
if ( !buf->allocator )
setDefaultAllocator( buf );
if ( 0 == chunkSize )
chunkSize = 256;
if ( allocSize+1 > buf->allocated )
{
byte* bp;
uint allocAmt = chunkSize;
if ( buf->allocated > 0 )
allocAmt = buf->allocated;
while ( allocAmt < allocSize+1 )
allocAmt *= 2;
bp = (byte*)TidyRealloc( buf->allocator, buf->bp, allocAmt );
if ( bp != NULL )
{
TidyClearMemory( bp + buf->allocated, allocAmt - buf->allocated );
buf->bp = bp;
buf->allocated = allocAmt;
}
}
}
/* Attach buffer to a chunk O' memory w/out allocation */
void TIDY_CALL tidyBufAttach( TidyBuffer* buf, byte* bp, uint size )
{
assert( buf != NULL );
buf->bp = bp;
buf->size = buf->allocated = size;
buf->next = 0;
if ( !buf->allocator )
setDefaultAllocator( buf );
}
/* Clear pointer to memory w/out deallocation */
void TIDY_CALL tidyBufDetach( TidyBuffer* buf )
{
tidyBufInitWithAllocator( buf, buf->allocator );
}
/**************
OUTPUT
**************/
void TIDY_CALL tidyBufAppend( TidyBuffer* buf, void* vp, uint size )
{
assert( buf != NULL );
if ( vp != NULL && size > 0 )
{
tidyBufCheckAlloc( buf, buf->size + size, 0 );
memcpy( buf->bp + buf->size, vp, size );
buf->size += size;
}
}
void TIDY_CALL tidyBufPutByte( TidyBuffer* buf, byte bv )
{
assert( buf != NULL );
tidyBufCheckAlloc( buf, buf->size + 1, 0 );
buf->bp[ buf->size++ ] = bv;
}
int TIDY_CALL tidyBufPopByte( TidyBuffer* buf )
{
int bv = EOF;
assert( buf != NULL );
if ( buf->size > 0 )
bv = buf->bp[ --buf->size ];
return bv;
}
/**************
INPUT
**************/
int TIDY_CALL tidyBufGetByte( TidyBuffer* buf )
{
int bv = EOF;
if ( ! tidyBufEndOfInput(buf) )
bv = buf->bp[ buf->next++ ];
return bv;
}
Bool TIDY_CALL tidyBufEndOfInput( TidyBuffer* buf )
{
return ( buf->next >= buf->size );
}
void TIDY_CALL tidyBufUngetByte( TidyBuffer* buf, byte bv )
{
if ( buf->next > 0 )
{
--buf->next;
assert( bv == buf->bp[ buf->next ] );
}
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

1032
src/charsets.c Normal file

File diff suppressed because it is too large Load diff

14
src/charsets.h Normal file
View file

@ -0,0 +1,14 @@
/* charsets.h -- character set information and mappings
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: charsets.h,v 1.2 2006/09/12 15:14:44 arnaud02 Exp $
*/
uint TY_(GetEncodingIdFromName)(ctmbstr name);
uint TY_(GetEncodingIdFromCodePage)(uint cp);
uint TY_(GetEncodingCodePageFromName)(ctmbstr name);
uint TY_(GetEncodingCodePageFromId)(uint id);
ctmbstr TY_(GetEncodingNameFromId)(uint id);
ctmbstr TY_(GetEncodingNameFromCodePage)(uint cp);

2674
src/clean.c Normal file

File diff suppressed because it is too large Load diff

87
src/clean.h Normal file
View file

@ -0,0 +1,87 @@
#ifndef __CLEAN_H__
#define __CLEAN_H__
/* clean.h -- clean up misuse of presentation markup
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info:
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.14 $
*/
void TY_(FixNodeLinks)(Node *node);
void TY_(FreeStyles)( TidyDocImpl* doc );
/* Add class="foo" to node
*/
void TY_(AddStyleAsClass)( TidyDocImpl* doc, Node *node, ctmbstr stylevalue );
void TY_(AddStyleProperty)(TidyDocImpl* doc, Node *node, ctmbstr property );
void TY_(CleanDocument)( TidyDocImpl* doc );
/* simplifies <b><b> ... </b> ...</b> etc. */
void TY_(NestedEmphasis)( TidyDocImpl* doc, Node* node );
/* replace i by em and b by strong */
void TY_(EmFromI)( TidyDocImpl* doc, Node* node );
/*
Some people use dir or ul without an li
to indent the content. The pattern to
look for is a list with a single implicit
li. This is recursively replaced by an
implicit blockquote.
*/
void TY_(List2BQ)( TidyDocImpl* doc, Node* node );
/*
Replace implicit blockquote by div with an indent
taking care to reduce nested blockquotes to a single
div with the indent set to match the nesting depth
*/
void TY_(BQ2Div)( TidyDocImpl* doc, Node* node );
void TY_(DropSections)( TidyDocImpl* doc, Node* node );
/*
This is a major clean up to strip out all the extra stuff you get
when you save as web page from Word 2000. It doesn't yet know what
to do with VML tags, but these will appear as errors unless you
declare them as new tags, such as o:p which needs to be declared
as inline.
*/
void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node);
Bool TY_(IsWord2000)( TidyDocImpl* doc );
/* where appropriate move object elements from head to body */
void TY_(BumpObject)( TidyDocImpl* doc, Node *html );
/* This is disabled due to http://tidy.sf.net/bug/681116 */
#if 0
void TY_(FixBrakes)( TidyDocImpl* pDoc, Node *pParent );
#endif
void TY_(VerifyHTTPEquiv)( TidyDocImpl* pDoc, Node *pParent );
void TY_(DropComments)(TidyDocImpl* doc, Node* node);
void TY_(DropFontElements)(TidyDocImpl* doc, Node* node, Node **pnode);
void TY_(WbrToSpace)(TidyDocImpl* doc, Node* node);
void TY_(DowngradeTypography)(TidyDocImpl* doc, Node* node);
void TY_(ReplacePreformattedSpaces)(TidyDocImpl* doc, Node* node);
void TY_(NormalizeSpaces)(Lexer *lexer, Node *node);
void TY_(ConvertCDATANodes)(TidyDocImpl* doc, Node* node);
void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId);
void TY_(FixXhtmlNamespace)(TidyDocImpl* doc, Bool wantXmlns);
void TY_(FixLanguageInformation)(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang);
#endif /* __CLEAN_H__ */

1746
src/config.c Normal file

File diff suppressed because it is too large Load diff

153
src/config.h Normal file
View file

@ -0,0 +1,153 @@
#ifndef __CONFIG_H__
#define __CONFIG_H__
/* config.h -- read config file and manage config properties
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/12/29 16:31:08 $
$Revision: 1.14 $
config files associate a property name with a value.
// comments can start at the beginning of a line
# comments can start at the beginning of a line
name: short values fit onto one line
name: a really long value that
continues on the next line
property names are case insensitive and should be less than
60 characters in length and must start at the begining of
the line, as whitespace at the start of a line signifies a
line continuation.
*/
#include "forward.h"
#include "tidy.h"
#include "streamio.h"
struct _tidy_option;
typedef struct _tidy_option TidyOptionImpl;
typedef Bool (ParseProperty)( TidyDocImpl* doc, const TidyOptionImpl* opt );
struct _tidy_option
{
TidyOptionId id;
TidyConfigCategory category; /* put 'em in groups */
ctmbstr name; /* property name */
TidyOptionType type; /* string, int or bool */
ulong dflt; /* default for TidyInteger and TidyBoolean */
ParseProperty* parser; /* parsing method, read-only if NULL */
const ctmbstr* pickList; /* pick list */
ctmbstr pdflt; /* default for TidyString */
};
typedef union
{
ulong v; /* Value for TidyInteger and TidyBoolean */
char *p; /* Value for TidyString */
} TidyOptionValue;
typedef struct _tidy_config
{
TidyOptionValue value[ N_TIDY_OPTIONS + 1 ]; /* current config values */
TidyOptionValue snapshot[ N_TIDY_OPTIONS + 1 ]; /* Snapshot of values to be restored later */
/* track what tags user has defined to eliminate unnecessary searches */
uint defined_tags;
uint c; /* current char in input stream */
StreamIn* cfgIn; /* current input source */
} TidyConfigImpl;
typedef struct {
TidyOptionId opt; /**< Identifier. */
ctmbstr doc; /**< HTML text */
TidyOptionId const *links; /**< Cross references.
Last element must be 'TidyUnknownOption'. */
} TidyOptionDoc;
const TidyOptionImpl* TY_(lookupOption)( ctmbstr optnam );
const TidyOptionImpl* TY_(getOption)( TidyOptionId optId );
TidyIterator TY_(getOptionList)( TidyDocImpl* doc );
const TidyOptionImpl* TY_(getNextOption)( TidyDocImpl* doc, TidyIterator* iter );
TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option );
ctmbstr TY_(getNextOptionPick)( const TidyOptionImpl* option, TidyIterator* iter );
const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId );
void TY_(InitConfig)( TidyDocImpl* doc );
void TY_(FreeConfig)( TidyDocImpl* doc );
/* Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val ); */
Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val );
Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val );
Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId );
void TY_(ResetConfigToDefault)( TidyDocImpl* doc );
void TY_(TakeConfigSnapshot)( TidyDocImpl* doc );
void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc );
void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom );
int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
int TY_(ParseConfigFileEnc)( TidyDocImpl* doc,
ctmbstr cfgfil, ctmbstr charenc );
int TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
int TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink );
/* returns false if unknown option, missing parameter, or
option doesn't use parameter
*/
Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optVal );
Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optVal );
/* ensure that char encodings are self consistent */
Bool TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding );
Bool TY_(ConfigDiffThanDefault)( TidyDocImpl* doc );
Bool TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc );
int TY_(CharEncodingId)( TidyDocImpl* doc, ctmbstr charenc );
ctmbstr TY_(CharEncodingName)( int encoding );
ctmbstr TY_(CharEncodingOptName)( int encoding );
/* void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename ); */
#ifdef _DEBUG
/* Debug lookup functions will be type-safe and assert option type match */
ulong TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId );
Bool TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId );
TidyTriState TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId );
ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId );
#define cfg(doc, id) TY_(_cfgGet)( (doc), (id) )
#define cfgBool(doc, id) TY_(_cfgGetBool)( (doc), (id) )
#define cfgAutoBool(doc, id) TY_(_cfgGetAutoBool)( (doc), (id) )
#define cfgStr(doc, id) TY_(_cfgGetString)( (doc), (id) )
#else
/* Release build macros for speed */
#define cfg(doc, id) ((doc)->config.value[ (id) ].v)
#define cfgBool(doc, id) ((Bool) cfg(doc, id))
#define cfgAutoBool(doc, id) ((TidyTriState) cfg(doc, id))
#define cfgStr(doc, id) ((ctmbstr) (doc)->config.value[ (id) ].p)
#endif /* _DEBUG */
#endif /* __CONFIG_H__ */

419
src/entities.c Normal file
View file

@ -0,0 +1,419 @@
/* entities.c -- recognize HTML ISO entities
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: hoehrmann $
$Date: 2008/08/09 11:55:27 $
$Revision: 1.19 $
Entity handling can be static because there are no config or
document-specific values. Lookup table is 100% defined at
compile time.
*/
#include <stdio.h>
#include "entities.h"
#include "tidy-int.h"
#include "tmbstr.h"
struct _entity;
typedef struct _entity entity;
struct _entity
{
ctmbstr name;
uint versions;
uint code;
};
static const entity entities[] =
{
/*
** Markup pre-defined character entities
*/
{ "quot", VERS_ALL|VERS_XML, 34 },
{ "amp", VERS_ALL|VERS_XML, 38 },
{ "apos", VERS_FROM40|VERS_XML, 39 },
{ "lt", VERS_ALL|VERS_XML, 60 },
{ "gt", VERS_ALL|VERS_XML, 62 },
/*
** Latin-1 character entities
*/
{ "nbsp", VERS_ALL, 160 },
{ "iexcl", VERS_ALL, 161 },
{ "cent", VERS_ALL, 162 },
{ "pound", VERS_ALL, 163 },
{ "curren", VERS_ALL, 164 },
{ "yen", VERS_ALL, 165 },
{ "brvbar", VERS_ALL, 166 },
{ "sect", VERS_ALL, 167 },
{ "uml", VERS_ALL, 168 },
{ "copy", VERS_ALL, 169 },
{ "ordf", VERS_ALL, 170 },
{ "laquo", VERS_ALL, 171 },
{ "not", VERS_ALL, 172 },
{ "shy", VERS_ALL, 173 },
{ "reg", VERS_ALL, 174 },
{ "macr", VERS_ALL, 175 },
{ "deg", VERS_ALL, 176 },
{ "plusmn", VERS_ALL, 177 },
{ "sup2", VERS_ALL, 178 },
{ "sup3", VERS_ALL, 179 },
{ "acute", VERS_ALL, 180 },
{ "micro", VERS_ALL, 181 },
{ "para", VERS_ALL, 182 },
{ "middot", VERS_ALL, 183 },
{ "cedil", VERS_ALL, 184 },
{ "sup1", VERS_ALL, 185 },
{ "ordm", VERS_ALL, 186 },
{ "raquo", VERS_ALL, 187 },
{ "frac14", VERS_ALL, 188 },
{ "frac12", VERS_ALL, 189 },
{ "frac34", VERS_ALL, 190 },
{ "iquest", VERS_ALL, 191 },
{ "Agrave", VERS_ALL, 192 },
{ "Aacute", VERS_ALL, 193 },
{ "Acirc", VERS_ALL, 194 },
{ "Atilde", VERS_ALL, 195 },
{ "Auml", VERS_ALL, 196 },
{ "Aring", VERS_ALL, 197 },
{ "AElig", VERS_ALL, 198 },
{ "Ccedil", VERS_ALL, 199 },
{ "Egrave", VERS_ALL, 200 },
{ "Eacute", VERS_ALL, 201 },
{ "Ecirc", VERS_ALL, 202 },
{ "Euml", VERS_ALL, 203 },
{ "Igrave", VERS_ALL, 204 },
{ "Iacute", VERS_ALL, 205 },
{ "Icirc", VERS_ALL, 206 },
{ "Iuml", VERS_ALL, 207 },
{ "ETH", VERS_ALL, 208 },
{ "Ntilde", VERS_ALL, 209 },
{ "Ograve", VERS_ALL, 210 },
{ "Oacute", VERS_ALL, 211 },
{ "Ocirc", VERS_ALL, 212 },
{ "Otilde", VERS_ALL, 213 },
{ "Ouml", VERS_ALL, 214 },
{ "times", VERS_ALL, 215 },
{ "Oslash", VERS_ALL, 216 },
{ "Ugrave", VERS_ALL, 217 },
{ "Uacute", VERS_ALL, 218 },
{ "Ucirc", VERS_ALL, 219 },
{ "Uuml", VERS_ALL, 220 },
{ "Yacute", VERS_ALL, 221 },
{ "THORN", VERS_ALL, 222 },
{ "szlig", VERS_ALL, 223 },
{ "agrave", VERS_ALL, 224 },
{ "aacute", VERS_ALL, 225 },
{ "acirc", VERS_ALL, 226 },
{ "atilde", VERS_ALL, 227 },
{ "auml", VERS_ALL, 228 },
{ "aring", VERS_ALL, 229 },
{ "aelig", VERS_ALL, 230 },
{ "ccedil", VERS_ALL, 231 },
{ "egrave", VERS_ALL, 232 },
{ "eacute", VERS_ALL, 233 },
{ "ecirc", VERS_ALL, 234 },
{ "euml", VERS_ALL, 235 },
{ "igrave", VERS_ALL, 236 },
{ "iacute", VERS_ALL, 237 },
{ "icirc", VERS_ALL, 238 },
{ "iuml", VERS_ALL, 239 },
{ "eth", VERS_ALL, 240 },
{ "ntilde", VERS_ALL, 241 },
{ "ograve", VERS_ALL, 242 },
{ "oacute", VERS_ALL, 243 },
{ "ocirc", VERS_ALL, 244 },
{ "otilde", VERS_ALL, 245 },
{ "ouml", VERS_ALL, 246 },
{ "divide", VERS_ALL, 247 },
{ "oslash", VERS_ALL, 248 },
{ "ugrave", VERS_ALL, 249 },
{ "uacute", VERS_ALL, 250 },
{ "ucirc", VERS_ALL, 251 },
{ "uuml", VERS_ALL, 252 },
{ "yacute", VERS_ALL, 253 },
{ "thorn", VERS_ALL, 254 },
{ "yuml", VERS_ALL, 255 },
/*
** Extended Entities defined in HTML 4: Symbols
*/
{ "fnof", VERS_FROM40, 402 },
{ "Alpha", VERS_FROM40, 913 },
{ "Beta", VERS_FROM40, 914 },
{ "Gamma", VERS_FROM40, 915 },
{ "Delta", VERS_FROM40, 916 },
{ "Epsilon", VERS_FROM40, 917 },
{ "Zeta", VERS_FROM40, 918 },
{ "Eta", VERS_FROM40, 919 },
{ "Theta", VERS_FROM40, 920 },
{ "Iota", VERS_FROM40, 921 },
{ "Kappa", VERS_FROM40, 922 },
{ "Lambda", VERS_FROM40, 923 },
{ "Mu", VERS_FROM40, 924 },
{ "Nu", VERS_FROM40, 925 },
{ "Xi", VERS_FROM40, 926 },
{ "Omicron", VERS_FROM40, 927 },
{ "Pi", VERS_FROM40, 928 },
{ "Rho", VERS_FROM40, 929 },
{ "Sigma", VERS_FROM40, 931 },
{ "Tau", VERS_FROM40, 932 },
{ "Upsilon", VERS_FROM40, 933 },
{ "Phi", VERS_FROM40, 934 },
{ "Chi", VERS_FROM40, 935 },
{ "Psi", VERS_FROM40, 936 },
{ "Omega", VERS_FROM40, 937 },
{ "alpha", VERS_FROM40, 945 },
{ "beta", VERS_FROM40, 946 },
{ "gamma", VERS_FROM40, 947 },
{ "delta", VERS_FROM40, 948 },
{ "epsilon", VERS_FROM40, 949 },
{ "zeta", VERS_FROM40, 950 },
{ "eta", VERS_FROM40, 951 },
{ "theta", VERS_FROM40, 952 },
{ "iota", VERS_FROM40, 953 },
{ "kappa", VERS_FROM40, 954 },
{ "lambda", VERS_FROM40, 955 },
{ "mu", VERS_FROM40, 956 },
{ "nu", VERS_FROM40, 957 },
{ "xi", VERS_FROM40, 958 },
{ "omicron", VERS_FROM40, 959 },
{ "pi", VERS_FROM40, 960 },
{ "rho", VERS_FROM40, 961 },
{ "sigmaf", VERS_FROM40, 962 },
{ "sigma", VERS_FROM40, 963 },
{ "tau", VERS_FROM40, 964 },
{ "upsilon", VERS_FROM40, 965 },
{ "phi", VERS_FROM40, 966 },
{ "chi", VERS_FROM40, 967 },
{ "psi", VERS_FROM40, 968 },
{ "omega", VERS_FROM40, 969 },
{ "thetasym", VERS_FROM40, 977 },
{ "upsih", VERS_FROM40, 978 },
{ "piv", VERS_FROM40, 982 },
{ "bull", VERS_FROM40, 8226 },
{ "hellip", VERS_FROM40, 8230 },
{ "prime", VERS_FROM40, 8242 },
{ "Prime", VERS_FROM40, 8243 },
{ "oline", VERS_FROM40, 8254 },
{ "frasl", VERS_FROM40, 8260 },
{ "weierp", VERS_FROM40, 8472 },
{ "image", VERS_FROM40, 8465 },
{ "real", VERS_FROM40, 8476 },
{ "trade", VERS_FROM40, 8482 },
{ "alefsym", VERS_FROM40, 8501 },
{ "larr", VERS_FROM40, 8592 },
{ "uarr", VERS_FROM40, 8593 },
{ "rarr", VERS_FROM40, 8594 },
{ "darr", VERS_FROM40, 8595 },
{ "harr", VERS_FROM40, 8596 },
{ "crarr", VERS_FROM40, 8629 },
{ "lArr", VERS_FROM40, 8656 },
{ "uArr", VERS_FROM40, 8657 },
{ "rArr", VERS_FROM40, 8658 },
{ "dArr", VERS_FROM40, 8659 },
{ "hArr", VERS_FROM40, 8660 },
{ "forall", VERS_FROM40, 8704 },
{ "part", VERS_FROM40, 8706 },
{ "exist", VERS_FROM40, 8707 },
{ "empty", VERS_FROM40, 8709 },
{ "nabla", VERS_FROM40, 8711 },
{ "isin", VERS_FROM40, 8712 },
{ "notin", VERS_FROM40, 8713 },
{ "ni", VERS_FROM40, 8715 },
{ "prod", VERS_FROM40, 8719 },
{ "sum", VERS_FROM40, 8721 },
{ "minus", VERS_FROM40, 8722 },
{ "lowast", VERS_FROM40, 8727 },
{ "radic", VERS_FROM40, 8730 },
{ "prop", VERS_FROM40, 8733 },
{ "infin", VERS_FROM40, 8734 },
{ "ang", VERS_FROM40, 8736 },
{ "and", VERS_FROM40, 8743 },
{ "or", VERS_FROM40, 8744 },
{ "cap", VERS_FROM40, 8745 },
{ "cup", VERS_FROM40, 8746 },
{ "int", VERS_FROM40, 8747 },
{ "there4", VERS_FROM40, 8756 },
{ "sim", VERS_FROM40, 8764 },
{ "cong", VERS_FROM40, 8773 },
{ "asymp", VERS_FROM40, 8776 },
{ "ne", VERS_FROM40, 8800 },
{ "equiv", VERS_FROM40, 8801 },
{ "le", VERS_FROM40, 8804 },
{ "ge", VERS_FROM40, 8805 },
{ "sub", VERS_FROM40, 8834 },
{ "sup", VERS_FROM40, 8835 },
{ "nsub", VERS_FROM40, 8836 },
{ "sube", VERS_FROM40, 8838 },
{ "supe", VERS_FROM40, 8839 },
{ "oplus", VERS_FROM40, 8853 },
{ "otimes", VERS_FROM40, 8855 },
{ "perp", VERS_FROM40, 8869 },
{ "sdot", VERS_FROM40, 8901 },
{ "lceil", VERS_FROM40, 8968 },
{ "rceil", VERS_FROM40, 8969 },
{ "lfloor", VERS_FROM40, 8970 },
{ "rfloor", VERS_FROM40, 8971 },
{ "lang", VERS_FROM40, 9001 },
{ "rang", VERS_FROM40, 9002 },
{ "loz", VERS_FROM40, 9674 },
{ "spades", VERS_FROM40, 9824 },
{ "clubs", VERS_FROM40, 9827 },
{ "hearts", VERS_FROM40, 9829 },
{ "diams", VERS_FROM40, 9830 },
/*
** Extended Entities defined in HTML 4: Special (less Markup at top)
*/
{ "OElig", VERS_FROM40, 338 },
{ "oelig", VERS_FROM40, 339 },
{ "Scaron", VERS_FROM40, 352 },
{ "scaron", VERS_FROM40, 353 },
{ "Yuml", VERS_FROM40, 376 },
{ "circ", VERS_FROM40, 710 },
{ "tilde", VERS_FROM40, 732 },
{ "ensp", VERS_FROM40, 8194 },
{ "emsp", VERS_FROM40, 8195 },
{ "thinsp", VERS_FROM40, 8201 },
{ "zwnj", VERS_FROM40, 8204 },
{ "zwj", VERS_FROM40, 8205 },
{ "lrm", VERS_FROM40, 8206 },
{ "rlm", VERS_FROM40, 8207 },
{ "ndash", VERS_FROM40, 8211 },
{ "mdash", VERS_FROM40, 8212 },
{ "lsquo", VERS_FROM40, 8216 },
{ "rsquo", VERS_FROM40, 8217 },
{ "sbquo", VERS_FROM40, 8218 },
{ "ldquo", VERS_FROM40, 8220 },
{ "rdquo", VERS_FROM40, 8221 },
{ "bdquo", VERS_FROM40, 8222 },
{ "dagger", VERS_FROM40, 8224 },
{ "Dagger", VERS_FROM40, 8225 },
{ "permil", VERS_FROM40, 8240 },
{ "lsaquo", VERS_FROM40, 8249 },
{ "rsaquo", VERS_FROM40, 8250 },
{ "euro", VERS_FROM40, 8364 },
{ NULL, VERS_UNKNOWN, 0 }
};
/* Pure static implementation. Trades off lookup speed
** for faster setup time (well, none actually).
** Optimization of comparing 1st character buys enough
** speed that hash doesn't improve things without > 500
** items in list.
*/
static const entity* entitiesLookup( ctmbstr s )
{
tmbchar ch = (tmbchar)( s ? *s : 0 );
const entity *np;
for ( np = entities; ch && np && np->name; ++np )
if ( ch == *np->name && TY_(tmbstrcmp)(s, np->name) == 0 )
return np;
return NULL;
}
#if 0
/* entity starting with "&" returns zero on error */
uint EntityCode( ctmbstr name, uint versions )
{
const entity* np;
assert( name && name[0] == '&' );
/* numeric entitity: name = "&#" followed by number */
if ( name[1] == '#' )
{
uint c = 0; /* zero on missing/bad number */
Bool isXml = ( (versions & VERS_XML) == VERS_XML );
/* 'x' prefix denotes hexadecimal number format */
if ( name[2] == 'x' || (!isXml && name[2] == 'X') )
sscanf( name+3, "%x", &c );
else
sscanf( name+2, "%u", &c );
return (uint) c;
}
/* Named entity: name ="&" followed by a name */
if ( NULL != (np = entitiesLookup(name+1)) )
{
/* Only recognize entity name if version supports it. */
if ( np->versions & versions )
return np->code;
}
return 0; /* zero signifies unknown entity name */
}
#endif
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions )
{
const entity* np;
assert( name && name[0] == '&' );
assert( code != NULL );
assert( versions != NULL );
/* numeric entitity: name = "&#" followed by number */
if ( name[1] == '#' )
{
uint c = 0; /* zero on missing/bad number */
/* 'x' prefix denotes hexadecimal number format */
if ( name[2] == 'x' || (!isXml && name[2] == 'X') )
sscanf( name+3, "%x", &c );
else
sscanf( name+2, "%u", &c );
*code = c;
*versions = VERS_ALL;
return yes;
}
/* Named entity: name ="&" followed by a name */
if ( NULL != (np = entitiesLookup(name+1)) )
{
*code = np->code;
*versions = np->versions;
return yes;
}
*code = 0;
*versions = ( isXml ? VERS_XML : VERS_PROPRIETARY );
return no;
}
ctmbstr TY_(EntityName)( uint ch, uint versions )
{
ctmbstr entnam = NULL;
const entity *ep;
for ( ep = entities; ep->name != NULL; ++ep )
{
if ( ep->code == ch )
{
if ( ep->versions & versions )
entnam = ep->name;
break; /* Found code. Stop search. */
}
}
return entnam;
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

24
src/entities.h Normal file
View file

@ -0,0 +1,24 @@
#ifndef __ENTITIES_H__
#define __ENTITIES_H__
/* entities.h -- recognize character entities
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.8 $
*/
#include "forward.h"
/* entity starting with "&" returns zero on error */
/* uint EntityCode( ctmbstr name, uint versions ); */
ctmbstr TY_(EntityName)( uint charCode, uint versions );
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions );
#endif /* __ENTITIES_H__ */

106
src/fileio.c Normal file
View file

@ -0,0 +1,106 @@
/* fileio.c -- does standard I/O
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/05/30 16:47:31 $
$Revision: 1.17 $
Default implementations of Tidy input sources
and output sinks based on standard C FILE*.
*/
#include <stdio.h>
#include "forward.h"
#include "fileio.h"
#include "tidy.h"
typedef struct _fp_input_source
{
FILE* fp;
TidyBuffer unget;
} FileSource;
static int TIDY_CALL filesrc_getByte( void* sourceData )
{
FileSource* fin = (FileSource*) sourceData;
int bv;
if ( fin->unget.size > 0 )
bv = tidyBufPopByte( &fin->unget );
else
bv = fgetc( fin->fp );
return bv;
}
static Bool TIDY_CALL filesrc_eof( void* sourceData )
{
FileSource* fin = (FileSource*) sourceData;
Bool isEOF = ( fin->unget.size == 0 );
if ( isEOF )
isEOF = feof( fin->fp ) != 0;
return isEOF;
}
static void TIDY_CALL filesrc_ungetByte( void* sourceData, byte bv )
{
FileSource* fin = (FileSource*) sourceData;
tidyBufPutByte( &fin->unget, bv );
}
#if SUPPORT_POSIX_MAPPED_FILES
#define initFileSource initStdIOFileSource
#define freeFileSource freeStdIOFileSource
#endif
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
{
FileSource* fin = NULL;
fin = (FileSource*) TidyAlloc( allocator, sizeof(FileSource) );
if ( !fin )
return -1;
TidyClearMemory( fin, sizeof(FileSource) );
fin->unget.allocator = allocator;
fin->fp = fp;
inp->getByte = filesrc_getByte;
inp->eof = filesrc_eof;
inp->ungetByte = filesrc_ungetByte;
inp->sourceData = fin;
return 0;
}
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
{
FileSource* fin = (FileSource*) inp->sourceData;
if ( closeIt && fin && fin->fp )
fclose( fin->fp );
tidyBufFree( &fin->unget );
TidyFree( fin->unget.allocator, fin );
}
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv )
{
FILE* fout = (FILE*) sinkData;
fputc( bv, fout );
}
void TY_(initFileSink)( TidyOutputSink* outp, FILE* fp )
{
outp->putByte = TY_(filesink_putByte);
outp->sinkData = fp;
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

46
src/fileio.h Normal file
View file

@ -0,0 +1,46 @@
#ifndef __FILEIO_H__
#define __FILEIO_H__
/** @file fileio.h - does standard C I/O
Implementation of a FILE* based TidyInputSource and
TidyOutputSink.
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info:
$Author: arnaud02 $
$Date: 2007/05/30 16:47:31 $
$Revision: 1.8 $
*/
#include "buffio.h"
#ifdef __cplusplus
extern "C" {
#endif
/** Allocate and initialize file input source */
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
/** Free file input source */
void TY_(freeFileSource)( TidyInputSource* source, Bool closeIt );
#if SUPPORT_POSIX_MAPPED_FILES
/** Allocate and initialize file input source using Standard C I/O */
int TY_(initStdIOFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
/** Free file input source using Standard C I/O */
void TY_(freeStdIOFileSource)( TidyInputSource* source, Bool closeIt );
#endif
/** Initialize file output sink */
void TY_(initFileSink)( TidyOutputSink* sink, FILE* fp );
/* Needed for internal declarations */
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv );
#ifdef __cplusplus
}
#endif
#endif /* __FILEIO_H__ */

69
src/forward.h Normal file
View file

@ -0,0 +1,69 @@
#ifndef __FORWARD_H__
#define __FORWARD_H__
/* forward.h -- Forward declarations for major Tidy structures
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/02/11 09:45:52 $
$Revision: 1.7 $
Avoids many include file circular dependencies.
Try to keep this file down to the minimum to avoid
cross-talk between modules.
Header files include this file. C files include tidy-int.h.
*/
#include "platform.h"
#include "tidy.h"
/* Internal symbols are prefixed to avoid clashes with other libraries */
#define TYDYAPPEND(str1,str2) str1##str2
#define TY_(str) TYDYAPPEND(prvTidy,str)
struct _StreamIn;
typedef struct _StreamIn StreamIn;
struct _StreamOut;
typedef struct _StreamOut StreamOut;
struct _TidyDocImpl;
typedef struct _TidyDocImpl TidyDocImpl;
struct _Dict;
typedef struct _Dict Dict;
struct _Attribute;
typedef struct _Attribute Attribute;
struct _AttVal;
typedef struct _AttVal AttVal;
struct _Node;
typedef struct _Node Node;
struct _IStack;
typedef struct _IStack IStack;
struct _Lexer;
typedef struct _Lexer Lexer;
extern TidyAllocator TY_(g_default_allocator);
/** Wrappers for easy memory allocation using an allocator */
#define TidyAlloc(allocator, size) ((allocator)->vtbl->alloc((allocator), (size)))
#define TidyRealloc(allocator, block, size) ((allocator)->vtbl->realloc((allocator), (block), (size)))
#define TidyFree(allocator, block) ((allocator)->vtbl->free((allocator), (block)))
#define TidyPanic(allocator, msg) ((allocator)->vtbl->panic((allocator), (msg)))
#define TidyClearMemory(block, size) memset((block), 0, (size))
#endif /* __FORWARD_H__ */

105
src/iconvtc.c Normal file
View file

@ -0,0 +1,105 @@
/* iconvtc.c -- Interface to iconv transcoding routines
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: iconvtc.c,v 1.2 2008/08/09 11:55:27 hoehrmann Exp $
*/
#include "tidy.h"
#include "forward.h"
#include "streamio.h"
#ifdef TIDY_ICONV_SUPPORT
#include <iconv.h>
/* maximum number of bytes for a single character */
#define TC_INBUFSIZE 16
/* maximum number of characters per byte sequence */
#define TC_OUTBUFSIZE 16
Bool IconvInitInputTranscoder(void)
{
return no;
}
void IconvUninitInputTranscoder(void)
{
return;
}
int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
{
iconv_t cd;
TidyInputSource * source;
char inbuf[TC_INBUFSIZE] = { 0 };
char outbuf[TC_OUTBUFSIZE] = { 0 };
size_t inbufsize = 0;
assert( in != NULL );
assert( &in->source != NULL );
assert( bytesRead != NULL );
assert( in->iconvptr != 0 );
cd = (iconv_t)in->iconvptr;
source = &in->source;
inbuf[inbufsize++] = (char)firstByte;
while(inbufsize < TC_INBUFSIZE)
{
char * outbufptr = (char*)outbuf;
char * inbufptr = (char*)inbuf;
size_t readNow = inbufsize;
size_t writeNow = TC_OUTBUFSIZE;
size_t result = 0;
int iconv_errno = 0;
int nextByte = EndOfStream;
result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
iconv_errno = errno;
if (result != (size_t)(-1))
{
int c;
/* create codepoint from UTF-32LE octets */
c = (unsigned char)outbuf[0];
c += (unsigned char)outbuf[1] << 8;
c += (unsigned char)outbuf[2] << 16;
c += (unsigned char)outbuf[3] << 32;
/* set number of read bytes */
*bytesRead = inbufsize;
return c;
}
assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
assert( iconv_errno != E2BIG ); /* not enough memory */
assert( iconv_errno == EINVAL ); /* incomplete sequence */
/* we need more bytes */
nextByte = source->getByte(source->sourceData);
if (nextByte == EndOfStream)
{
/* todo: error message for broken stream? */
*bytesRead = inbufsize;
return EndOfStream;
}
inbuf[inbufsize++] = (char)nextByte;
}
/* No full character found after reading TC_INBUFSIZE bytes, */
/* give up to read this stream, it's obviously unreadable. */
/* todo: error message for broken stream? */
return EndOfStream;
}
#endif /* TIDY_ICONV_SUPPORT */

15
src/iconvtc.h Normal file
View file

@ -0,0 +1,15 @@
#ifndef __ICONVTC_H__
#define __ICONVTC_H__
#ifdef TIDY_ICONV_SUPPORT
/* iconvtc.h -- Interface to iconv transcoding routines
(c) 1998-2003 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: iconvtc.h,v 1.1 2003/04/28 22:59:41 hoehrmann Exp $
*/
#endif /* TIDY_ICONV_SUPPORT */
#endif /* __ICONVTC_H__ */

373
src/istack.c Normal file
View file

@ -0,0 +1,373 @@
/* istack.c -- inline stack for compatibility with Mosaic
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/12/29 16:31:08 $
$Revision: 1.21 $
*/
#include "tidy-int.h"
#include "lexer.h"
#include "attrs.h"
#include "streamio.h"
#include "tmbstr.h"
/* duplicate attributes */
AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs)
{
AttVal *newattrs;
if (attrs == NULL)
return attrs;
newattrs = TY_(NewAttribute)(doc);
*newattrs = *attrs;
newattrs->next = TY_(DupAttrs)( doc, attrs->next );
newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute);
newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value);
newattrs->dict = TY_(FindAttribute)(doc, newattrs);
newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL;
newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL;
return newattrs;
}
static Bool IsNodePushable( Node *node )
{
if (node->tag == NULL)
return no;
if (!(node->tag->model & CM_INLINE))
return no;
if (node->tag->model & CM_OBJECT)
return no;
return yes;
}
/*
push a copy of an inline node onto stack
but don't push if implicit or OBJECT or APPLET
(implicit tags are ones generated from the istack)
One issue arises with pushing inlines when
the tag is already pushed. For instance:
<p><em>text
<p><em>more text
Shouldn't be mapped to
<p><em>text</em></p>
<p><em><em>more text</em></em>
*/
void TY_(PushInline)( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
IStack *istack;
if (node->implicit)
return;
if ( !IsNodePushable(node) )
return;
if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) )
return;
/* make sure there is enough space for the stack */
if (lexer->istacksize + 1 > lexer->istacklength)
{
if (lexer->istacklength == 0)
lexer->istacklength = 6; /* this is perhaps excessive */
lexer->istacklength = lexer->istacklength * 2;
lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack,
sizeof(IStack)*(lexer->istacklength));
}
istack = &(lexer->istack[lexer->istacksize]);
istack->tag = node->tag;
istack->element = TY_(tmbstrdup)(doc->allocator, node->element);
istack->attributes = TY_(DupAttrs)( doc, node->attributes );
++(lexer->istacksize);
}
static void PopIStack( TidyDocImpl* doc )
{
Lexer* lexer = doc->lexer;
IStack *istack;
AttVal *av;
--(lexer->istacksize);
istack = &(lexer->istack[lexer->istacksize]);
while (istack->attributes)
{
av = istack->attributes;
istack->attributes = av->next;
TY_(FreeAttribute)( doc, av );
}
TidyDocFree(doc, istack->element);
}
static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid )
{
Lexer* lexer = doc->lexer;
IStack *istack;
while (lexer->istacksize > 0)
{
PopIStack( doc );
istack = &(lexer->istack[lexer->istacksize]);
if ( istack->tag->id == tid )
break;
}
}
/* pop inline stack */
void TY_(PopInline)( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
if (node)
{
if ( !IsNodePushable(node) )
return;
/* if node is </a> then pop until we find an <a> */
if ( nodeIsA(node) )
{
PopIStackUntil( doc, TidyTag_A );
return;
}
}
if (lexer->istacksize > 0)
{
PopIStack( doc );
/* #427822 - fix by Randy Waki 7 Aug 00 */
if (lexer->insert >= lexer->istack + lexer->istacksize)
lexer->insert = NULL;
}
}
Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node )
{
Lexer* lexer = doc->lexer;
int i;
for (i = lexer->istacksize - 1; i >= 0; --i)
{
if (lexer->istack[i].tag == node->tag)
return yes;
}
return no;
}
/*
Test whether the last element on the stack has the same type than "node".
*/
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node )
{
Lexer* lexer = doc->lexer;
if ( element && !IsNodePushable(element) )
return no;
if (lexer->istacksize > 0) {
if (lexer->istack[lexer->istacksize - 1].tag == node->tag) {
return yes;
}
}
return no;
}
/*
This has the effect of inserting "missing" inline
elements around the contents of blocklevel elements
such as P, TD, TH, DIV, PRE etc. This procedure is
called at the start of ParseBlock. when the inline
stack is not empty, as will be the case in:
<i><h1>italic heading</h1></i>
which is then treated as equivalent to
<h1><i>italic heading</i></h1>
This is implemented by setting the lexer into a mode
where it gets tokens from the inline stack rather than
from the input stream.
*/
int TY_(InlineDup)( TidyDocImpl* doc, Node* node )
{
Lexer* lexer = doc->lexer;
int n;
if ((n = lexer->istacksize - lexer->istackbase) > 0)
{
lexer->insert = &(lexer->istack[lexer->istackbase]);
lexer->inode = node;
}
return n;
}
/*
defer duplicates when entering a table or other
element where the inlines shouldn't be duplicated
*/
void TY_(DeferDup)( TidyDocImpl* doc )
{
doc->lexer->insert = NULL;
doc->lexer->inode = NULL;
}
Node *TY_(InsertedToken)( TidyDocImpl* doc )
{
Lexer* lexer = doc->lexer;
Node *node;
IStack *istack;
uint n;
/* this will only be NULL if inode != NULL */
if (lexer->insert == NULL)
{
node = lexer->inode;
lexer->inode = NULL;
return node;
}
/*
If this is the "latest" node then update
the position, otherwise use current values
*/
if (lexer->inode == NULL)
{
lexer->lines = doc->docIn->curline;
lexer->columns = doc->docIn->curcol;
}
node = TY_(NewNode)(doc->allocator, lexer);
node->type = StartTag;
node->implicit = yes;
node->start = lexer->txtstart;
/* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */
node->end = lexer->txtend; /* was : lexer->txtstart; */
istack = lexer->insert;
#if 0 && defined(_DEBUG)
if ( lexer->istacksize == 0 )
fprintf( stderr, "0-size istack!\n" );
#endif
node->element = TY_(tmbstrdup)(doc->allocator, istack->element);
node->tag = istack->tag;
node->attributes = TY_(DupAttrs)( doc, istack->attributes );
/* advance lexer to next item on the stack */
n = (uint)(lexer->insert - &(lexer->istack[0]));
/* and recover state if we have reached the end */
if (++n < lexer->istacksize)
lexer->insert = &(lexer->istack[n]);
else
lexer->insert = NULL;
return node;
}
/*
We have two CM_INLINE elements pushed ... the first is closing,
but, like the browser, the second should be retained ...
Like <b>bold <i>bold and italics</b> italics only</i>
This function switches the tag positions on the stack,
returning 'yes' if both were found in the expected order.
*/
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node )
{
Lexer* lexer = doc->lexer;
if ( lexer
&& element && element->tag
&& node && node->tag
&& TY_(IsPushed)( doc, element )
&& TY_(IsPushed)( doc, node )
&& ((lexer->istacksize - lexer->istackbase) >= 2) )
{
/* we have a chance of succeeding ... */
int i;
for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i)
{
if (lexer->istack[i].tag == element->tag) {
/* found the element tag - phew */
IStack *istack1 = &lexer->istack[i];
IStack *istack2 = NULL;
--i; /* back one more, and continue */
for ( ; i >= 0; --i)
{
if (lexer->istack[i].tag == node->tag)
{
/* found the element tag - phew */
istack2 = &lexer->istack[i];
break;
}
}
if ( istack2 )
{
/* perform the swap */
IStack tmp_istack = *istack2;
*istack2 = *istack1;
*istack1 = tmp_istack;
return yes;
}
}
}
}
return no;
}
/*
We want to push a specific a specific element on the stack,
but it may not be the last element, which InlineDup()
would handle. Return yes, if found and inserted.
*/
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element )
{
Lexer* lexer = doc->lexer;
int n, i;
if ( element
&& (element->tag != NULL)
&& ((n = lexer->istacksize - lexer->istackbase) > 0) )
{
for ( i = n - 1; i >=0; --i ) {
if (lexer->istack[i].tag == element->tag) {
/* found our element tag - insert it */
lexer->insert = &(lexer->istack[i]);
lexer->inode = node;
return yes;
}
}
}
return no;
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

3832
src/lexer.c Normal file

File diff suppressed because it is too large Load diff

621
src/lexer.h Normal file
View file

@ -0,0 +1,621 @@
#ifndef __LEXER_H__
#define __LEXER_H__
/* lexer.h -- Lexer for html parser
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info:
$Author: arnaud02 $
$Date: 2008/03/22 21:06:11 $
$Revision: 1.41 $
*/
/*
Given an input source, it returns a sequence of tokens.
GetToken(source) gets the next token
UngetToken(source) provides one level undo
The tags include an attribute list:
- linked list of attribute/value nodes
- each node has 2 NULL-terminated strings.
- entities are replaced in attribute values
white space is compacted if not in preformatted mode
If not in preformatted mode then leading white space
is discarded and subsequent white space sequences
compacted to single space characters.
If XmlTags is no then Tag names are folded to upper
case and attribute names to lower case.
Not yet done:
- Doctype subset and marked sections
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "forward.h"
/* lexer character types
*/
#define digit 1u
#define letter 2u
#define namechar 4u
#define white 8u
#define newline 16u
#define lowercase 32u
#define uppercase 64u
#define digithex 128u
/* node->type is one of these values
*/
typedef enum
{
RootNode,
DocTypeTag,
CommentTag,
ProcInsTag,
TextNode,
StartTag,
EndTag,
StartEndTag,
CDATATag,
SectionTag,
AspTag,
JsteTag,
PhpTag,
XmlDecl
} NodeType;
/* lexer GetToken states
*/
typedef enum
{
LEX_CONTENT,
LEX_GT,
LEX_ENDTAG,
LEX_STARTTAG,
LEX_COMMENT,
LEX_DOCTYPE,
LEX_PROCINSTR,
LEX_CDATA,
LEX_SECTION,
LEX_ASP,
LEX_JSTE,
LEX_PHP,
LEX_XMLDECL
} LexerState;
/* ParseDocTypeDecl state constants */
typedef enum
{
DT_INTERMEDIATE,
DT_DOCTYPENAME,
DT_PUBLICSYSTEM,
DT_QUOTEDSTRING,
DT_INTSUBSET
} ParseDocTypeDeclState;
/* content model shortcut encoding
Descriptions are tentative.
*/
#define CM_UNKNOWN 0
/* Elements with no content. Map to HTML specification. */
#define CM_EMPTY (1 << 0)
/* Elements that appear outside of "BODY". */
#define CM_HTML (1 << 1)
/* Elements that can appear within HEAD. */
#define CM_HEAD (1 << 2)
/* HTML "block" elements. */
#define CM_BLOCK (1 << 3)
/* HTML "inline" elements. */
#define CM_INLINE (1 << 4)
/* Elements that mark list item ("LI"). */
#define CM_LIST (1 << 5)
/* Elements that mark definition list item ("DL", "DT"). */
#define CM_DEFLIST (1 << 6)
/* Elements that can appear inside TABLE. */
#define CM_TABLE (1 << 7)
/* Used for "THEAD", "TFOOT" or "TBODY". */
#define CM_ROWGRP (1 << 8)
/* Used for "TD", "TH" */
#define CM_ROW (1 << 9)
/* Elements whose content must be protected against white space movement.
Includes some elements that can found in forms. */
#define CM_FIELD (1 << 10)
/* Used to avoid propagating inline emphasis inside some elements
such as OBJECT or APPLET. */
#define CM_OBJECT (1 << 11)
/* Elements that allows "PARAM". */
#define CM_PARAM (1 << 12)
/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
#define CM_FRAMES (1 << 13)
/* Heading elements (h1, h2, ...). */
#define CM_HEADING (1 << 14)
/* Elements with an optional end tag. */
#define CM_OPT (1 << 15)
/* Elements that use "align" attribute for vertical position. */
#define CM_IMG (1 << 16)
/* Elements with inline and block model. Used to avoid calling InlineDup. */
#define CM_MIXED (1 << 17)
/* Elements whose content needs to be indented only if containing one
CM_BLOCK element. */
#define CM_NO_INDENT (1 << 18)
/* Elements that are obsolete (such as "dir", "menu"). */
#define CM_OBSOLETE (1 << 19)
/* User defined elements. Used to determine how attributes wihout value
should be printed. */
#define CM_NEW (1 << 20)
/* Elements that cannot be omitted. */
#define CM_OMITST (1 << 21)
/* If the document uses just HTML 2.0 tags and attributes described
** it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
** If there are proprietary tags and attributes then describe it as
** HTML Proprietary. If it includes the xml-lang or xmlns attributes
** but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
** flavors of Voyager (strict, loose or frameset).
*/
/* unknown */
#define xxxx 0u
/* W3C defined HTML/XHTML family document types */
#define HT20 1u
#define HT32 2u
#define H40S 4u
#define H40T 8u
#define H40F 16u
#define H41S 32u
#define H41T 64u
#define H41F 128u
#define X10S 256u
#define X10T 512u
#define X10F 1024u
#define XH11 2048u
#define XB10 4096u
/* proprietary stuff */
#define VERS_SUN 8192u
#define VERS_NETSCAPE 16384u
#define VERS_MICROSOFT 32768u
/* special flag */
#define VERS_XML 65536u
/* "HTML5" */
#define HT50 131072u
#define XH50 262144u
/* compatibility symbols */
#define VERS_UNKNOWN (xxxx)
#define VERS_HTML20 (HT20)
#define VERS_HTML32 (HT32)
#define VERS_HTML40_STRICT (H40S|H41S|X10S)
#define VERS_HTML40_LOOSE (H40T|H41T|X10T)
#define VERS_FRAMESET (H40F|H41F|X10F)
#define VERS_XHTML11 (XH11)
#define VERS_BASIC (XB10)
/* meta symbols */
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
#define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET)
#define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME)
#define VERS_EVENTS (VERS_HTML40|VERS_XHTML11)
#define VERS_FROM32 (VERS_HTML32|VERS_HTML40)
#define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC)
#define VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50)
/* all W3C defined document types */
#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50)
/* all proprietary types */
#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
/* Linked list of class names and styles
*/
struct _Style;
typedef struct _Style TagStyle;
struct _Style
{
tmbstr tag;
tmbstr tag_class;
tmbstr properties;
TagStyle *next;
};
/* Linked list of style properties
*/
struct _StyleProp;
typedef struct _StyleProp StyleProp;
struct _StyleProp
{
tmbstr name;
tmbstr value;
StyleProp *next;
};
/* Attribute/Value linked list node
*/
struct _AttVal
{
AttVal* next;
const Attribute* dict;
Node* asp;
Node* php;
int delim;
tmbstr attribute;
tmbstr value;
};
/*
Mosaic handles inlines via a separate stack from other elements
We duplicate this to recover from inline markup errors such as:
<i>italic text
<p>more italic text</b> normal text
which for compatibility with Mosaic is mapped to:
<i>italic text</i>
<p><i>more italic text</i> normal text
Note that any inline end tag pop's the effect of the current
inline start tag, so that </b> pop's <i> in the above example.
*/
struct _IStack
{
IStack* next;
const Dict* tag; /* tag's dictionary definition */
tmbstr element; /* name (NULL for text nodes) */
AttVal* attributes;
};
/* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl,
** etc. etc.
*/
struct _Node
{
Node* parent; /* tree structure */
Node* prev;
Node* next;
Node* content;
Node* last;
AttVal* attributes;
const Dict* was; /* old tag when it was changed */
const Dict* tag; /* tag's dictionary definition */
tmbstr element; /* name (NULL for text nodes) */
uint start; /* start of span onto text array */
uint end; /* end of span onto text array */
NodeType type; /* TextNode, StartTag, EndTag etc. */
uint line; /* current line of document */
uint column; /* current column of document */
Bool closed; /* true if closed by explicit end tag */
Bool implicit; /* true if inferred */
Bool linebreak; /* true if followed by a line break */
#ifdef TIDY_STORE_ORIGINAL_TEXT
tmbstr otext;
#endif
};
/*
The following are private to the lexer
Use NewLexer() to create a lexer, and
FreeLexer() to free it.
*/
struct _Lexer
{
#if 0 /* Move to TidyDocImpl */
StreamIn* in; /* document content input */
StreamOut* errout; /* error output stream */
uint badAccess; /* for accessibility errors */
uint badLayout; /* for bad style errors */
uint badChars; /* for bad character encodings */
uint badForm; /* for mismatched/mispositioned form tags */
uint warnings; /* count of warnings in this document */
uint errors; /* count of errors */
#endif
uint lines; /* lines seen */
uint columns; /* at start of current token */
Bool waswhite; /* used to collapse contiguous white space */
Bool pushed; /* true after token has been pushed back */
Bool insertspace; /* when space is moved after end tag */
Bool excludeBlocks; /* Netscape compatibility */
Bool exiled; /* true if moved out of table */
Bool isvoyager; /* true if xmlns attribute on html element */
uint versions; /* bit vector of HTML versions */
uint doctype; /* version as given by doctype (if any) */
uint versionEmitted; /* version of doctype emitted */
Bool bad_doctype; /* e.g. if html or PUBLIC is missing */
uint txtstart; /* start of current node */
uint txtend; /* end of current node */
LexerState state; /* state of lexer's finite state machine */
Node* token; /* last token returned by GetToken() */
Node* itoken; /* last duplicate inline returned by GetToken() */
Node* root; /* remember root node of the document */
Node* parent; /* remember parent node for CDATA elements */
Bool seenEndBody; /* true if a </body> tag has been encountered */
Bool seenEndHtml; /* true if a </html> tag has been encountered */
/*
Lexer character buffer
Parse tree nodes span onto this buffer
which contains the concatenated text
contents of all of the elements.
lexsize must be reset for each file.
*/
tmbstr lexbuf; /* MB character buffer */
uint lexlength; /* allocated */
uint lexsize; /* used */
/* Inline stack for compatibility with Mosaic */
Node* inode; /* for deferring text node */
IStack* insert; /* for inferring inline tags */
IStack* istack;
uint istacklength; /* allocated */
uint istacksize; /* used */
uint istackbase; /* start of frame */
TagStyle *styles; /* used for cleaning up presentation markup */
TidyAllocator* allocator; /* allocator */
#if 0
TidyDocImpl* doc; /* Pointer back to doc for error reporting */
#endif
};
/* Lexer Functions
*/
/* choose what version to use for new doctype */
int TY_(HTMLVersion)( TidyDocImpl* doc );
/* everything is allowed in proprietary version of HTML */
/* this is handled here rather than in the tag/attr dicts */
void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
Bool TY_(IsWhite)(uint c);
Bool TY_(IsDigit)(uint c);
Bool TY_(IsLetter)(uint c);
Bool TY_(IsNewline)(uint c);
Bool TY_(IsNamechar)(uint c);
Bool TY_(IsXMLLetter)(uint c);
Bool TY_(IsXMLNamechar)(uint c);
/* Bool IsLower(uint c); */
Bool TY_(IsUpper)(uint c);
uint TY_(ToLower)(uint c);
uint TY_(ToUpper)(uint c);
Lexer* TY_(NewLexer)( TidyDocImpl* doc );
void TY_(FreeLexer)( TidyDocImpl* doc );
/* store character c as UTF-8 encoded byte stream */
void TY_(AddCharToLexer)( Lexer *lexer, uint c );
/*
Used for elements and text nodes
element name is NULL for text nodes
start and end are offsets into lexbuf
which contains the textual content of
all elements in the parse tree.
parent and content allow traversal
of the parse tree in any direction.
attributes are represented as a linked
list of AttVal nodes which hold the
strings for attribute/value pairs.
*/
Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer );
/* used to clone heading nodes when split by an <HR> */
Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element );
/* free node's attributes */
void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node );
/* doesn't repair attribute list linkage */
void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av );
/* detach attribute from node */
void TY_(DetachAttribute)( Node *node, AttVal *attr );
/* detach attribute from node then free it
*/
void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
/*
Free document nodes by iterating through peers and recursing
through children. Set next to NULL before calling FreeNode()
to avoid freeing peer nodes. Doesn't patch up prev/next links.
*/
void TY_(FreeNode)( TidyDocImpl* doc, Node *node );
Node* TY_(TextToken)( Lexer *lexer );
/* used for creating preformatted text from Word2000 */
Node* TY_(NewLineNode)( Lexer *lexer );
/* used for adding a &nbsp; for Word2000 */
Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt );
void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
/* void AddStringLiteralLen( Lexer* lexer, ctmbstr str, int len ); */
/* find element */
Node* TY_(FindDocType)( TidyDocImpl* doc );
Node* TY_(FindHTML)( TidyDocImpl* doc );
Node* TY_(FindHEAD)( TidyDocImpl* doc );
Node* TY_(FindTITLE)(TidyDocImpl* doc);
Node* TY_(FindBody)( TidyDocImpl* doc );
Node* TY_(FindXmlDecl)(TidyDocImpl* doc);
/* Returns containing block element, if any */
Node* TY_(FindContainer)( Node* node );
/* add meta element for Tidy */
Bool TY_(AddGenerator)( TidyDocImpl* doc );
uint TY_(ApparentVersion)( TidyDocImpl* doc );
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml );
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );
/* fixup doctype if missing */
Bool TY_(FixDocType)( TidyDocImpl* doc );
/* ensure XML document starts with <?xml version="1.0"?> */
/* add encoding attribute if not using ASCII or UTF-8 output */
Bool TY_(FixXmlDecl)( TidyDocImpl* doc );
Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id);
void TY_(UngetToken)( TidyDocImpl* doc );
/*
modes for GetToken()
MixedContent -- for elements which don't accept PCDATA
Preformatted -- white space preserved as is
IgnoreMarkup -- for CDATA elements such as script, style
*/
typedef enum
{
IgnoreWhitespace,
MixedContent,
Preformatted,
IgnoreMarkup,
CdataContent
} GetTokenMode;
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode );
void TY_(InitMap)(void);
/* create a new attribute */
AttVal* TY_(NewAttribute)( TidyDocImpl* doc );
/* create a new attribute with given name and value */
AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value,
int delim );
/* insert attribute at the end of attribute list of a node */
void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av );
/* insert attribute at the start of attribute list of a node */
void TY_(InsertAttributeAtStart)( Node *node, AttVal *av );
/*************************************
In-line Stack functions
*************************************/
/* duplicate attributes */
AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
/*
push a copy of an inline node onto stack
but don't push if implicit or OBJECT or APPLET
(implicit tags are ones generated from the istack)
One issue arises with pushing inlines when
the tag is already pushed. For instance:
<p><em>text
<p><em>more text
Shouldn't be mapped to
<p><em>text</em></p>
<p><em><em>more text</em></em>
*/
void TY_(PushInline)( TidyDocImpl* doc, Node* node );
/* pop inline stack */
void TY_(PopInline)( TidyDocImpl* doc, Node* node );
Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node );
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node );
/*
This has the effect of inserting "missing" inline
elements around the contents of blocklevel elements
such as P, TD, TH, DIV, PRE etc. This procedure is
called at the start of ParseBlock. when the inline
stack is not empty, as will be the case in:
<i><h1>italic heading</h1></i>
which is then treated as equivalent to
<h1><i>italic heading</i></h1>
This is implemented by setting the lexer into a mode
where it gets tokens from the inline stack rather than
from the input stream.
*/
int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
/*
defer duplicates when entering a table or other
element where the inlines shouldn't be duplicated
*/
void TY_(DeferDup)( TidyDocImpl* doc );
Node* TY_(InsertedToken)( TidyDocImpl* doc );
/* stack manipulation for inline elements */
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node );
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element );
#ifdef __cplusplus
}
#endif
#endif /* __LEXER_H__ */

1882
src/localize.c Normal file

File diff suppressed because it is too large Load diff

329
src/mappedio.c Executable file
View file

@ -0,0 +1,329 @@
/* Interface to mmap style I/O
(c) 2006-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
Originally contributed by Cory Nelson and Nuno Lopes
$Id: mappedio.c,v 1.14 2008/03/18 20:19:35 arnaud02 Exp $
*/
/* keep these here to keep file non-empty */
#include "forward.h"
#include "mappedio.h"
#if SUPPORT_POSIX_MAPPED_FILES
#include "fileio.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/mman.h>
typedef struct
{
TidyAllocator *allocator;
const byte *base;
size_t pos, size;
} MappedFileSource;
static int TIDY_CALL mapped_getByte( void* sourceData )
{
MappedFileSource* fin = (MappedFileSource*) sourceData;
return fin->base[fin->pos++];
}
static Bool TIDY_CALL mapped_eof( void* sourceData )
{
MappedFileSource* fin = (MappedFileSource*) sourceData;
return (fin->pos >= fin->size);
}
static void TIDY_CALL mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) )
{
MappedFileSource* fin = (MappedFileSource*) sourceData;
fin->pos--;
}
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
{
MappedFileSource* fin;
struct stat sbuf;
int fd;
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
if ( !fin )
return -1;
fd = fileno(fp);
if ( fstat(fd, &sbuf) == -1
|| sbuf.st_size == 0
|| (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
MAP_SHARED, fd, 0)) == MAP_FAILED)
{
TidyFree( allocator, fin );
/* Fallback on standard I/O */
return TY_(initStdIOFileSource)( allocator, inp, fp );
}
fin->pos = 0;
fin->allocator = allocator;
fclose(fp);
inp->getByte = mapped_getByte;
inp->eof = mapped_eof;
inp->ungetByte = mapped_ungetByte;
inp->sourceData = fin;
return 0;
}
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
{
if ( inp->getByte == mapped_getByte )
{
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
munmap( (void*)fin->base, fin->size );
TidyFree( fin->allocator, fin );
}
else
TY_(freeStdIOFileSource)( inp, closeIt );
}
#endif
#if defined(_WIN32)
#include "streamio.h"
#include "tidy-int.h"
#include "message.h"
#include <errno.h>
#if _MSC_VER < 1300 /* less than msvc++ 7.0 */
#pragma warning(disable:4115) /* named type definition in parentheses in windows headers */
#endif
#include <windows.h>
typedef struct _fp_input_mapped_source
{
TidyAllocator *allocator;
LONGLONG size, pos;
HANDLE file, map;
byte *view, *iter, *end;
unsigned int gran;
} MappedFileSource;
static int mapped_openView( MappedFileSource *data )
{
DWORD numb = ( ( data->size - data->pos ) > data->gran ) ?
data->gran : (DWORD)( data->size - data->pos );
if ( data->view )
{
UnmapViewOfFile( data->view );
data->view = NULL;
}
data->view = MapViewOfFile( data->map, FILE_MAP_READ,
(DWORD)( data->pos >> 32 ),
(DWORD)data->pos, numb );
if ( !data->view ) return -1;
data->iter = data->view;
data->end = data->iter + numb;
return 0;
}
static int TIDY_CALL mapped_getByte( void *sourceData )
{
MappedFileSource *data = sourceData;
if ( !data->view || data->iter >= data->end )
{
data->pos += data->gran;
if ( data->pos >= data->size || mapped_openView(data) != 0 )
return EndOfStream;
}
return *( data->iter++ );
}
static Bool TIDY_CALL mapped_eof( void *sourceData )
{
MappedFileSource *data = sourceData;
return ( data->pos >= data->size );
}
static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
{
MappedFileSource *data = sourceData;
if ( data->iter >= data->view )
{
--data->iter;
return;
}
if ( data->pos < data->gran )
{
assert(0);
return;
}
data->pos -= data->gran;
mapped_openView( data );
}
static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp )
{
MappedFileSource* fin = NULL;
inp->getByte = mapped_getByte;
inp->eof = mapped_eof;
inp->ungetByte = mapped_ungetByte;
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
if ( !fin )
return -1;
#if _MSC_VER < 1300 /* less than msvc++ 7.0 */
{
LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size;
(DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart );
if ( GetLastError() != NO_ERROR || fin->size <= 0 )
{
TidyFree(allocator, fin);
return -1;
}
}
#else
if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size )
|| fin->size <= 0 )
{
TidyFree(allocator, fin);
return -1;
}
#endif
fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL );
if ( !fin->map )
{
TidyFree(allocator, fin);
return -1;
}
{
SYSTEM_INFO info;
GetSystemInfo( &info );
fin->gran = info.dwAllocationGranularity;
}
fin->allocator = allocator;
fin->pos = 0;
fin->view = NULL;
fin->iter = NULL;
fin->end = NULL;
if ( mapped_openView( fin ) != 0 )
{
CloseHandle( fin->map );
TidyFree( allocator, fin );
return -1;
}
fin->file = fp;
inp->sourceData = fin;
return 0;
}
static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt )
{
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE )
{
if ( fin->view )
UnmapViewOfFile( fin->view );
CloseHandle( fin->map );
CloseHandle( fin->file );
}
TidyFree( fin->allocator, fin );
}
StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding )
{
StreamIn *in = TY_(initStreamIn)( doc, encoding );
if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 )
{
TY_(freeStreamIn)( in );
return NULL;
}
in->iotype = FileIO;
return in;
}
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) {
int status = -ENOENT;
HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING, 0, NULL );
#if PRESERVE_FILE_TIMES
LONGLONG actime, modtime;
TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) &&
GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) )
{
#define TY_I64(str) TYDYAPPEND(str,LL)
#if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */
# undef TY_I64
# define TY_I64(str) TYDYAPPEND(str,i64)
#endif
doc->filetimes.actime =
(time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 );
doc->filetimes.modtime =
(time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 );
}
#endif
if ( fin != INVALID_HANDLE_VALUE )
{
StreamIn* in = MappedFileInput( doc, fin,
cfg( doc, TidyInCharEncoding ) );
if ( !in )
{
CloseHandle( fin );
return -ENOMEM;
}
status = TY_(DocParseStream)( doc, in );
freeMappedFileSource( &in->source, yes );
TY_(freeStreamIn)( in );
}
else /* Error message! */
TY_(FileError)( doc, filnam, TidyError );
return status;
}
#endif
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

16
src/mappedio.h Executable file
View file

@ -0,0 +1,16 @@
#ifndef __TIDY_MAPPED_IO_H__
#define __TIDY_MAPPED_IO_H__
/* Interface to mmap style I/O
(c) 2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: mappedio.h,v 1.2 2006/09/15 16:50:37 arnaud02 Exp $
*/
#if defined(_WIN32)
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam );
#endif
#endif /* __TIDY_MAPPED_IO_H__ */

207
src/message.h Normal file
View file

@ -0,0 +1,207 @@
#ifndef __MESSAGE_H__
#define __MESSAGE_H__
/* message.h -- general message writing routines
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/05/30 16:47:31 $
$Revision: 1.29 $
*/
#include "forward.h"
#include "tidy.h" /* For TidyReportLevel */
/* General message writing routines.
** Each message is a single warning, error, etc.
**
** This routine will keep track of counts and,
** if the caller has set a filter, it will be
** called. The new preferred way of handling
** Tidy diagnostics output is either a) define
** a new output sink or b) install a message
** filter routine.
**
** Keeps track of ShowWarnings, ShowErrors, etc.
*/
ctmbstr TY_(ReleaseDate)(void);
/* void TY_(ShowVersion)( TidyDocImpl* doc ); */
void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option );
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc );
/* void TY_(HelloMessage)( TidyDocImpl* doc, ctmbstr date, ctmbstr filename ); */
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
void TY_(ReportNumWarnings)( TidyDocImpl* doc );
void TY_(GeneralInfo)( TidyDocImpl* doc );
/* void TY_(UnknownOption)( TidyDocImpl* doc, char c ); */
/* void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); */
void TY_(FileError)( TidyDocImpl* doc, ctmbstr file, TidyReportLevel level );
void TY_(ErrorSummary)( TidyDocImpl* doc );
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
void TY_(ReportAttrError)( TidyDocImpl* doc, Node* node, AttVal* av, uint code );
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );
#if SUPPORT_ACCESSIBILITY_CHECKS
void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code );
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code );
#endif
void TY_(ReportNotice)(TidyDocImpl* doc, Node *element, Node *node, uint code);
void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code);
void TY_(ReportError)(TidyDocImpl* doc, Node* element, Node* node, uint code);
void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
/* error codes for entities/numeric character references */
#define MISSING_SEMICOLON 1
#define MISSING_SEMICOLON_NCR 2
#define UNKNOWN_ENTITY 3
#define UNESCAPED_AMPERSAND 4
#define APOS_UNDEFINED 5
/* error codes for element messages */
#define MISSING_ENDTAG_FOR 6
#define MISSING_ENDTAG_BEFORE 7
#define DISCARDING_UNEXPECTED 8
#define NESTED_EMPHASIS 9
#define NON_MATCHING_ENDTAG 10
#define TAG_NOT_ALLOWED_IN 11
#define MISSING_STARTTAG 12
#define UNEXPECTED_ENDTAG 13
#define USING_BR_INPLACE_OF 14
#define INSERTING_TAG 15
#define SUSPECTED_MISSING_QUOTE 16
#define MISSING_TITLE_ELEMENT 17
#define DUPLICATE_FRAMESET 18
#define CANT_BE_NESTED 19
#define OBSOLETE_ELEMENT 20
#define PROPRIETARY_ELEMENT 21
#define UNKNOWN_ELEMENT 22
#define TRIM_EMPTY_ELEMENT 23
#define COERCE_TO_ENDTAG 24
#define ILLEGAL_NESTING 25
#define NOFRAMES_CONTENT 26
#define CONTENT_AFTER_BODY 27
#define INCONSISTENT_VERSION 28
#define MALFORMED_COMMENT 29
#define BAD_COMMENT_CHARS 30
#define BAD_XML_COMMENT 31
#define BAD_CDATA_CONTENT 32
#define INCONSISTENT_NAMESPACE 33
#define DOCTYPE_AFTER_TAGS 34
#define MALFORMED_DOCTYPE 35
#define UNEXPECTED_END_OF_FILE 36
#define DTYPE_NOT_UPPER_CASE 37
#define TOO_MANY_ELEMENTS 38
#define UNESCAPED_ELEMENT 39
#define NESTED_QUOTATION 40
#define ELEMENT_NOT_EMPTY 41
#define ENCODING_IO_CONFLICT 42
#define MIXED_CONTENT_IN_BLOCK 43
#define MISSING_DOCTYPE 44
#define SPACE_PRECEDING_XMLDECL 45
#define TOO_MANY_ELEMENTS_IN 46
#define UNEXPECTED_ENDTAG_IN 47
#define REPLACING_ELEMENT 83
#define REPLACING_UNEX_ELEMENT 84
#define COERCE_TO_ENDTAG_WARN 85
/* error codes used for attribute messages */
#define UNKNOWN_ATTRIBUTE 48
#define INSERTING_ATTRIBUTE 49
#define MISSING_ATTR_VALUE 50
#define BAD_ATTRIBUTE_VALUE 51
#define UNEXPECTED_GT 52
#define PROPRIETARY_ATTRIBUTE 53
#define PROPRIETARY_ATTR_VALUE 54
#define REPEATED_ATTRIBUTE 55
#define MISSING_IMAGEMAP 56
#define XML_ATTRIBUTE_VALUE 57
#define UNEXPECTED_QUOTEMARK 58
#define MISSING_QUOTEMARK 59
#define ID_NAME_MISMATCH 60
#define BACKSLASH_IN_URI 61
#define FIXED_BACKSLASH 62
#define ILLEGAL_URI_REFERENCE 63
#define ESCAPED_ILLEGAL_URI 64
#define NEWLINE_IN_URI 65
#define ANCHOR_NOT_UNIQUE 66
#define JOINING_ATTRIBUTE 68
#define UNEXPECTED_EQUALSIGN 69
#define ATTR_VALUE_NOT_LCASE 70
#define XML_ID_SYNTAX 71
#define INVALID_ATTRIBUTE 72
#define BAD_ATTRIBUTE_VALUE_REPLACED 73
#define INVALID_XML_ID 74
#define UNEXPECTED_END_OF_FILE_ATTR 75
#define MISSING_ATTRIBUTE 86
#define WHITE_IN_URI 87
#define PREVIOUS_LOCATION 88 /* last */
/* character encoding errors */
#define VENDOR_SPECIFIC_CHARS 76
#define INVALID_SGML_CHARS 77
#define INVALID_UTF8 78
#define INVALID_UTF16 79
#define ENCODING_MISMATCH 80
#define INVALID_URI 81
#define INVALID_NCR 82
/* accessibility flaws */
#define BA_MISSING_IMAGE_ALT 1
#define BA_MISSING_LINK_ALT 2
#define BA_MISSING_SUMMARY 4
#define BA_MISSING_IMAGE_MAP 8
#define BA_USING_FRAMES 16
#define BA_USING_NOFRAMES 32
#define BA_INVALID_LINK_NOFRAMES 64 /* WAI [6.5.1.4] */
#define BA_WAI (1 << 31)
/* presentation flaws */
#define USING_SPACER 1
#define USING_LAYER 2
#define USING_NOBR 4
#define USING_FONT 8
#define USING_BODY 16
#define REPLACED_CHAR 0
#define DISCARDED_CHAR 1
/* badchar bit field */
#define BC_VENDOR_SPECIFIC_CHARS 1
#define BC_INVALID_SGML_CHARS 2
#define BC_INVALID_UTF8 4
#define BC_INVALID_UTF16 8
#define BC_ENCODING_MISMATCH 16 /* fatal error */
#define BC_INVALID_URI 32
#define BC_INVALID_NCR 64
#endif /* __MESSAGE_H__ */

4408
src/parser.c Normal file

File diff suppressed because it is too large Load diff

76
src/parser.h Normal file
View file

@ -0,0 +1,76 @@
#ifndef __PARSER_H__
#define __PARSER_H__
/* parser.h -- HTML Parser
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/05/30 16:47:31 $
$Revision: 1.14 $
*/
#include "forward.h"
Bool TY_(CheckNodeIntegrity)(Node *node);
Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
/*
used to determine how attributes
without values should be printed
this was introduced to deal with
user defined tags e.g. Cold Fusion
*/
Bool TY_(IsNewNode)(Node *node);
void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
/* extract a node and its children from a markup tree */
Node *TY_(RemoveNode)(Node *node);
/* remove node from markup tree and discard it */
Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
/* insert node into markup tree as the firt element
of content of element */
void TY_(InsertNodeAtStart)(Node *element, Node *node);
/* insert node into markup tree as the last element
of content of "element" */
void TY_(InsertNodeAtEnd)(Node *element, Node *node);
/* insert node into markup tree before element */
void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
/* insert node into markup tree after element */
void TY_(InsertNodeAfterElement)(Node *element, Node *node);
Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
/* assumes node is a text node */
Bool TY_(IsBlank)(Lexer *lexer, Node *node);
Bool TY_(IsJavaScript)(Node *node);
/*
HTML is the top level element
*/
void TY_(ParseDocument)( TidyDocImpl* doc );
/*
XML documents
*/
Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
void TY_(ParseXMLDocument)( TidyDocImpl* doc );
#endif /* __PARSER_H__ */

2276
src/pprint.c Normal file

File diff suppressed because it is too large Load diff

93
src/pprint.h Normal file
View file

@ -0,0 +1,93 @@
#ifndef __PPRINT_H__
#define __PPRINT_H__
/* pprint.h -- pretty print parse tree
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info:
$Author: arnaud02 $
$Date: 2007/02/11 09:45:08 $
$Revision: 1.9 $
*/
#include "forward.h"
/*
Block-level and unknown elements are printed on
new lines and their contents indented 2 spaces
Inline elements are printed inline.
Inline content is wrapped on spaces (except in
attribute values or preformatted text, after
start tags and before end tags
*/
#define NORMAL 0u
#define PREFORMATTED 1u
#define COMMENT 2u
#define ATTRIBVALUE 4u
#define NOWRAP 8u
#define CDATA 16u
/* The pretty printer keeps at most two lines of text in the
** buffer before flushing output. We need to capture the
** indent state (indent level) at the _beginning_ of _each_
** line, not the end of just the second line.
**
** We must also keep track "In Attribute" and "In String"
** states at the _end_ of each line,
*/
typedef struct _TidyIndent
{
int spaces;
int attrValStart;
int attrStringStart;
} TidyIndent;
typedef struct _TidyPrintImpl
{
TidyAllocator *allocator; /* Allocator */
uint *linebuf;
uint lbufsize;
uint linelen;
uint wraphere;
uint ixInd;
TidyIndent indent[2]; /* Two lines worth of indent state */
} TidyPrintImpl;
#if 0 && SUPPORT_ASIAN_ENCODINGS
/* #431953 - start RJ Wraplen adjusted for smooth international ride */
uint CWrapLen( TidyDocImpl* doc, uint ind );
#endif
void TY_(InitPrintBuf)( TidyDocImpl* doc );
void TY_(FreePrintBuf)( TidyDocImpl* doc );
void TY_(PFlushLine)( TidyDocImpl* doc, uint indent );
/* print just the content of the body element.
** useful when you want to reuse material from
** other documents.
**
** -- Sebastiano Vigna <vigna@dsi.unimi.it>
*/
void TY_(PrintBody)( TidyDocImpl* doc ); /* you can print an entire document */
/* node as body using PPrintTree() */
void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
#endif /* __PPRINT_H__ */

1407
src/streamio.c Normal file

File diff suppressed because it is too large Load diff

222
src/streamio.h Normal file
View file

@ -0,0 +1,222 @@
#ifndef __STREAMIO_H__
#define __STREAMIO_H__
/* streamio.h -- handles character stream I/O
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/07/22 09:33:26 $
$Revision: 1.21 $
Wrapper around Tidy input source and output sink
that calls appropriate interfaces, and applies
necessary char encoding transformations: to/from
ISO-10646 and/or UTF-8.
*/
#include "forward.h"
#include "buffio.h"
#include "fileio.h"
#ifdef __cplusplus
extern "C"
{
#endif
typedef enum
{
FileIO,
BufferIO,
UserIO
} IOType;
/* states for ISO 2022
A document in ISO-2022 based encoding uses some ESC sequences called
"designator" to switch character sets. The designators defined and
used in ISO-2022-JP are:
"ESC" + "(" + ? for ISO646 variants
"ESC" + "$" + ? and
"ESC" + "$" + "(" + ? for multibyte character sets
*/
typedef enum
{
FSM_ASCII,
FSM_ESC,
FSM_ESCD,
FSM_ESCDP,
FSM_ESCP,
FSM_NONASCII
} ISO2022State;
/************************
** Source
************************/
enum
{
CHARBUF_SIZE=5,
LASTPOS_SIZE=64
};
/* non-raw input is cleaned up*/
struct _StreamIn
{
ISO2022State state; /* FSM for ISO2022 */
Bool pushed;
TidyAllocator *allocator;
tchar* charbuf;
uint bufpos;
uint bufsize;
int tabs;
int lastcols[LASTPOS_SIZE];
unsigned short curlastpos; /* current last position in lastcols */
unsigned short firstlastpos; /* first valid last position in lastcols */
int curcol;
int curline;
int encoding;
IOType iotype;
TidyInputSource source;
#ifdef TIDY_WIN32_MLANG_SUPPORT
void* mlang;
#endif
#ifdef TIDY_STORE_ORIGINAL_TEXT
tmbstr otextbuf;
size_t otextsize;
uint otextlen;
#endif
/* Pointer back to document for error reporting */
TidyDocImpl* doc;
};
StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
void TY_(freeStreamIn)(StreamIn* in);
StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
int TY_(ReadBOMEncoding)(StreamIn *in);
uint TY_(ReadChar)( StreamIn* in );
void TY_(UngetChar)( uint c, StreamIn* in );
Bool TY_(IsEOF)( StreamIn* in );
/************************
** Sink
************************/
struct _StreamOut
{
int encoding;
ISO2022State state; /* for ISO 2022 */
uint nl;
#ifdef TIDY_WIN32_MLANG_SUPPORT
void* mlang;
#endif
IOType iotype;
TidyOutputSink sink;
};
StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
StreamOut* TY_(StdErrOutput)(void);
/* StreamOut* StdOutOutput(void); */
void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
void TY_(WriteChar)( uint c, StreamOut* out );
void TY_(outBOM)( StreamOut *out );
ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
/************************
** Misc
************************/
/* character encodings
*/
#define RAW 0
#define ASCII 1
#define LATIN0 2
#define LATIN1 3
#define UTF8 4
#define ISO2022 5
#define MACROMAN 6
#define WIN1252 7
#define IBM858 8
#if SUPPORT_UTF16_ENCODINGS
#define UTF16LE 9
#define UTF16BE 10
#define UTF16 11
#endif
/* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
** (i.e., to Unicode) before being recoded into UTF-8. This may be
** confusing: usually UTF-8 implies ISO10646 codepoints.
*/
#if SUPPORT_ASIAN_ENCODINGS
#if SUPPORT_UTF16_ENCODINGS
#define BIG5 12
#define SHIFTJIS 13
#else
#define BIG5 9
#define SHIFTJIS 10
#endif
#endif
#ifdef TIDY_WIN32_MLANG_SUPPORT
/* hack: windows code page numbers start at 37 */
#define WIN32MLANG 36
#endif
/* char encoding used when replacing illegal SGML chars,
** regardless of specified encoding. Set at compile time
** to either Windows or Mac.
*/
extern const int TY_(ReplacementCharEncoding);
/* Function for conversion from Windows-1252 to Unicode */
uint TY_(DecodeWin1252)(uint c);
/* Function to convert from MacRoman to Unicode */
uint TY_(DecodeMacRoman)(uint c);
#ifdef __cplusplus
}
#endif
/* Use numeric constants as opposed to escape chars (\r, \n)
** to avoid conflict Mac compilers that may re-define these.
*/
#define CR 0xD
#define LF 0xA
#if defined(MAC_OS_CLASSIC)
#define DEFAULT_NL_CONFIG TidyCR
#elif defined(_WIN32) || defined(OS2_OS)
#define DEFAULT_NL_CONFIG TidyCRLF
#else
#define DEFAULT_NL_CONFIG TidyLF
#endif
#endif /* __STREAMIO_H__ */

286
src/tagask.c Normal file
View file

@ -0,0 +1,286 @@
/* tagask.c -- Interrogate node type
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.6 $
*/
#include "tidy-int.h"
#include "tags.h"
#include "tidy.h"
Bool TIDY_CALL tidyNodeIsText( TidyNode tnod )
{ return TY_(nodeIsText)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeCMIsBlock( TidyNode tnod ); /* not exported yet */
Bool tidyNodeCMIsBlock( TidyNode tnod )
{ return TY_(nodeCMIsBlock)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeCMIsInline( TidyNode tnod ); /* not exported yet */
Bool tidyNodeCMIsInline( TidyNode tnod )
{ return TY_(nodeCMIsInline)( tidyNodeToImpl(tnod) );
}
Bool tidyNodeCMIsEmpty( TidyNode tnod ); /* not exported yet */
Bool tidyNodeCMIsEmpty( TidyNode tnod )
{ return TY_(nodeCMIsEmpty)( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsHeader( TidyNode tnod )
{ return TY_(nodeIsHeader)( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsHTML( TidyNode tnod )
{ return nodeIsHTML( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsHEAD( TidyNode tnod )
{ return nodeIsHEAD( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsTITLE( TidyNode tnod )
{ return nodeIsTITLE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBASE( TidyNode tnod )
{ return nodeIsBASE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsMETA( TidyNode tnod )
{ return nodeIsMETA( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBODY( TidyNode tnod )
{ return nodeIsBODY( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsFRAMESET( TidyNode tnod )
{ return nodeIsFRAMESET( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsFRAME( TidyNode tnod )
{ return nodeIsFRAME( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsIFRAME( TidyNode tnod )
{ return nodeIsIFRAME( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsNOFRAMES( TidyNode tnod )
{ return nodeIsNOFRAMES( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsHR( TidyNode tnod )
{ return nodeIsHR( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsH1( TidyNode tnod )
{ return nodeIsH1( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsH2( TidyNode tnod )
{ return nodeIsH2( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsPRE( TidyNode tnod )
{ return nodeIsPRE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsLISTING( TidyNode tnod )
{ return nodeIsLISTING( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsP( TidyNode tnod )
{ return nodeIsP( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsUL( TidyNode tnod )
{ return nodeIsUL( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsOL( TidyNode tnod )
{ return nodeIsOL( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsDL( TidyNode tnod )
{ return nodeIsDL( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsDIR( TidyNode tnod )
{ return nodeIsDIR( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsLI( TidyNode tnod )
{ return nodeIsLI( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsDT( TidyNode tnod )
{ return nodeIsDT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsDD( TidyNode tnod )
{ return nodeIsDD( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsTABLE( TidyNode tnod )
{ return nodeIsTABLE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsCAPTION( TidyNode tnod )
{ return nodeIsCAPTION( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsTD( TidyNode tnod )
{ return nodeIsTD( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsTH( TidyNode tnod )
{ return nodeIsTH( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsTR( TidyNode tnod )
{ return nodeIsTR( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsCOL( TidyNode tnod )
{ return nodeIsCOL( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsCOLGROUP( TidyNode tnod )
{ return nodeIsCOLGROUP( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBR( TidyNode tnod )
{ return nodeIsBR( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsA( TidyNode tnod )
{ return nodeIsA( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsLINK( TidyNode tnod )
{ return nodeIsLINK( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsB( TidyNode tnod )
{ return nodeIsB( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsI( TidyNode tnod )
{ return nodeIsI( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSTRONG( TidyNode tnod )
{ return nodeIsSTRONG( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsEM( TidyNode tnod )
{ return nodeIsEM( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBIG( TidyNode tnod )
{ return nodeIsBIG( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSMALL( TidyNode tnod )
{ return nodeIsSMALL( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsPARAM( TidyNode tnod )
{ return nodeIsPARAM( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsOPTION( TidyNode tnod )
{ return nodeIsOPTION( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsOPTGROUP( TidyNode tnod )
{ return nodeIsOPTGROUP( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsIMG( TidyNode tnod )
{ return nodeIsIMG( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsMAP( TidyNode tnod )
{ return nodeIsMAP( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsAREA( TidyNode tnod )
{ return nodeIsAREA( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsNOBR( TidyNode tnod )
{ return nodeIsNOBR( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsWBR( TidyNode tnod )
{ return nodeIsWBR( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsFONT( TidyNode tnod )
{ return nodeIsFONT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsLAYER( TidyNode tnod )
{ return nodeIsLAYER( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSPACER( TidyNode tnod )
{ return nodeIsSPACER( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsCENTER( TidyNode tnod )
{ return nodeIsCENTER( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSTYLE( TidyNode tnod )
{ return nodeIsSTYLE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSCRIPT( TidyNode tnod )
{ return nodeIsSCRIPT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsNOSCRIPT( TidyNode tnod )
{ return nodeIsNOSCRIPT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsFORM( TidyNode tnod )
{ return nodeIsFORM( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsTEXTAREA( TidyNode tnod )
{ return nodeIsTEXTAREA( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBLOCKQUOTE( TidyNode tnod )
{ return nodeIsBLOCKQUOTE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsAPPLET( TidyNode tnod )
{ return nodeIsAPPLET( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsOBJECT( TidyNode tnod )
{ return nodeIsOBJECT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsDIV( TidyNode tnod )
{ return nodeIsDIV( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSPAN( TidyNode tnod )
{ return nodeIsSPAN( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsINPUT( TidyNode tnod )
{ return nodeIsINPUT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsQ( TidyNode tnod )
{ return nodeIsQ( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsLABEL( TidyNode tnod )
{ return nodeIsLABEL( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsH3( TidyNode tnod )
{ return nodeIsH3( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsH4( TidyNode tnod )
{ return nodeIsH4( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsH5( TidyNode tnod )
{ return nodeIsH5( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsH6( TidyNode tnod )
{ return nodeIsH6( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsADDRESS( TidyNode tnod )
{ return nodeIsADDRESS( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsXMP( TidyNode tnod )
{ return nodeIsXMP( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSELECT( TidyNode tnod )
{ return nodeIsSELECT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBLINK( TidyNode tnod )
{ return nodeIsBLINK( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsMARQUEE( TidyNode tnod )
{ return nodeIsMARQUEE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsEMBED( TidyNode tnod )
{ return nodeIsEMBED( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsBASEFONT( TidyNode tnod )
{ return nodeIsBASEFONT( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsISINDEX( TidyNode tnod )
{ return nodeIsISINDEX( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsS( TidyNode tnod )
{ return nodeIsS( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod )
{ return nodeIsSTRIKE( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsU( TidyNode tnod )
{ return nodeIsU( tidyNodeToImpl(tnod) );
}
Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod )
{ return nodeIsMENU( tidyNodeToImpl(tnod) );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

1004
src/tags.c Normal file

File diff suppressed because it is too large Load diff

235
src/tags.h Normal file
View file

@ -0,0 +1,235 @@
#ifndef __TAGS_H__
#define __TAGS_H__
/* tags.h -- recognize HTML tags
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/12/15 10:17:55 $
$Revision: 1.20 $
The HTML tags are stored as 8 bit ASCII strings.
Use lookupw() to find a tag given a wide char string.
*/
#include "forward.h"
#include "attrdict.h"
typedef void (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node );
/*
Tag dictionary node
*/
/* types of tags that the user can define */
typedef enum
{
tagtype_null = 0,
tagtype_empty = 1,
tagtype_inline = 2,
tagtype_block = 4,
tagtype_pre = 8
} UserTagType;
struct _Dict
{
TidyTagId id;
tmbstr name;
uint versions;
AttrVersion const * attrvers;
uint model;
Parser* parser;
CheckAttribs* chkattrs;
Dict* next;
};
#if !defined(ELEMENT_HASH_LOOKUP)
#define ELEMENT_HASH_LOOKUP 1
#endif
#if ELEMENT_HASH_LOOKUP
enum
{
ELEMENT_HASH_SIZE=178u
};
struct _DictHash
{
Dict const* tag;
struct _DictHash* next;
};
typedef struct _DictHash DictHash;
#endif
struct _TidyTagImpl
{
Dict* xml_tags; /* placeholder for all xml tags */
Dict* declared_tag_list; /* User declared tags */
#if ELEMENT_HASH_LOOKUP
DictHash* hashtab[ELEMENT_HASH_SIZE];
#endif
};
typedef struct _TidyTagImpl TidyTagImpl;
/* interface for finding tag by name */
const Dict* TY_(LookupTagDef)( TidyTagId tid );
Bool TY_(FindTag)( TidyDocImpl* doc, Node *node );
Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node );
void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name );
void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType ); /* tagtype_null to free all */
TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc );
ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
TidyIterator* iter );
void TY_(InitTags)( TidyDocImpl* doc );
void TY_(FreeTags)( TidyDocImpl* doc );
/* Parser methods for tags */
Parser TY_(ParseHTML);
Parser TY_(ParseHead);
Parser TY_(ParseTitle);
Parser TY_(ParseScript);
Parser TY_(ParseFrameSet);
Parser TY_(ParseNoFrames);
Parser TY_(ParseBody);
Parser TY_(ParsePre);
Parser TY_(ParseList);
Parser TY_(ParseDefList);
Parser TY_(ParseBlock);
Parser TY_(ParseInline);
Parser TY_(ParseEmpty);
Parser TY_(ParseTableTag);
Parser TY_(ParseColGroup);
Parser TY_(ParseRowGroup);
Parser TY_(ParseRow);
Parser TY_(ParseSelect);
Parser TY_(ParseOptGroup);
Parser TY_(ParseText);
CheckAttribs TY_(CheckAttributes);
/* 0 == TidyTag_UNKNOWN */
#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
Bool TY_(nodeIsText)( Node* node );
Bool TY_(nodeIsElement)( Node* node );
Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
#if 0
/* Compare & result to operand. If equal, then all bits
** requested are set.
*/
Bool nodeMatchCM( Node* node, uint contentModel );
#endif
/* True if any of the bits requested are set.
*/
Bool TY_(nodeHasCM)( Node* node, uint contentModel );
Bool TY_(nodeCMIsBlock)( Node* node );
Bool TY_(nodeCMIsInline)( Node* node );
Bool TY_(nodeCMIsEmpty)( Node* node );
Bool TY_(nodeIsHeader)( Node* node ); /* H1, H2, ..., H6 */
uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
#define nodeIsHTML( node ) TagIsId( node, TidyTag_HTML )
#define nodeIsHEAD( node ) TagIsId( node, TidyTag_HEAD )
#define nodeIsTITLE( node ) TagIsId( node, TidyTag_TITLE )
#define nodeIsBASE( node ) TagIsId( node, TidyTag_BASE )
#define nodeIsMETA( node ) TagIsId( node, TidyTag_META )
#define nodeIsBODY( node ) TagIsId( node, TidyTag_BODY )
#define nodeIsFRAMESET( node ) TagIsId( node, TidyTag_FRAMESET )
#define nodeIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
#define nodeIsIFRAME( node ) TagIsId( node, TidyTag_IFRAME )
#define nodeIsNOFRAMES( node ) TagIsId( node, TidyTag_NOFRAMES )
#define nodeIsHR( node ) TagIsId( node, TidyTag_HR )
#define nodeIsH1( node ) TagIsId( node, TidyTag_H1 )
#define nodeIsH2( node ) TagIsId( node, TidyTag_H2 )
#define nodeIsPRE( node ) TagIsId( node, TidyTag_PRE )
#define nodeIsLISTING( node ) TagIsId( node, TidyTag_LISTING )
#define nodeIsP( node ) TagIsId( node, TidyTag_P )
#define nodeIsUL( node ) TagIsId( node, TidyTag_UL )
#define nodeIsOL( node ) TagIsId( node, TidyTag_OL )
#define nodeIsDL( node ) TagIsId( node, TidyTag_DL )
#define nodeIsDIR( node ) TagIsId( node, TidyTag_DIR )
#define nodeIsLI( node ) TagIsId( node, TidyTag_LI )
#define nodeIsDT( node ) TagIsId( node, TidyTag_DT )
#define nodeIsDD( node ) TagIsId( node, TidyTag_DD )
#define nodeIsTABLE( node ) TagIsId( node, TidyTag_TABLE )
#define nodeIsCAPTION( node ) TagIsId( node, TidyTag_CAPTION )
#define nodeIsTD( node ) TagIsId( node, TidyTag_TD )
#define nodeIsTH( node ) TagIsId( node, TidyTag_TH )
#define nodeIsTR( node ) TagIsId( node, TidyTag_TR )
#define nodeIsCOL( node ) TagIsId( node, TidyTag_COL )
#define nodeIsCOLGROUP( node ) TagIsId( node, TidyTag_COLGROUP )
#define nodeIsBR( node ) TagIsId( node, TidyTag_BR )
#define nodeIsA( node ) TagIsId( node, TidyTag_A )
#define nodeIsLINK( node ) TagIsId( node, TidyTag_LINK )
#define nodeIsB( node ) TagIsId( node, TidyTag_B )
#define nodeIsI( node ) TagIsId( node, TidyTag_I )
#define nodeIsSTRONG( node ) TagIsId( node, TidyTag_STRONG )
#define nodeIsEM( node ) TagIsId( node, TidyTag_EM )
#define nodeIsBIG( node ) TagIsId( node, TidyTag_BIG )
#define nodeIsSMALL( node ) TagIsId( node, TidyTag_SMALL )
#define nodeIsPARAM( node ) TagIsId( node, TidyTag_PARAM )
#define nodeIsOPTION( node ) TagIsId( node, TidyTag_OPTION )
#define nodeIsOPTGROUP( node ) TagIsId( node, TidyTag_OPTGROUP )
#define nodeIsIMG( node ) TagIsId( node, TidyTag_IMG )
#define nodeIsMAP( node ) TagIsId( node, TidyTag_MAP )
#define nodeIsAREA( node ) TagIsId( node, TidyTag_AREA )
#define nodeIsNOBR( node ) TagIsId( node, TidyTag_NOBR )
#define nodeIsWBR( node ) TagIsId( node, TidyTag_WBR )
#define nodeIsFONT( node ) TagIsId( node, TidyTag_FONT )
#define nodeIsLAYER( node ) TagIsId( node, TidyTag_LAYER )
#define nodeIsSPACER( node ) TagIsId( node, TidyTag_SPACER )
#define nodeIsCENTER( node ) TagIsId( node, TidyTag_CENTER )
#define nodeIsSTYLE( node ) TagIsId( node, TidyTag_STYLE )
#define nodeIsSCRIPT( node ) TagIsId( node, TidyTag_SCRIPT )
#define nodeIsNOSCRIPT( node ) TagIsId( node, TidyTag_NOSCRIPT )
#define nodeIsFORM( node ) TagIsId( node, TidyTag_FORM )
#define nodeIsTEXTAREA( node ) TagIsId( node, TidyTag_TEXTAREA )
#define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE )
#define nodeIsAPPLET( node ) TagIsId( node, TidyTag_APPLET )
#define nodeIsOBJECT( node ) TagIsId( node, TidyTag_OBJECT )
#define nodeIsDIV( node ) TagIsId( node, TidyTag_DIV )
#define nodeIsSPAN( node ) TagIsId( node, TidyTag_SPAN )
#define nodeIsINPUT( node ) TagIsId( node, TidyTag_INPUT )
#define nodeIsQ( node ) TagIsId( node, TidyTag_Q )
#define nodeIsLABEL( node ) TagIsId( node, TidyTag_LABEL )
#define nodeIsH3( node ) TagIsId( node, TidyTag_H3 )
#define nodeIsH4( node ) TagIsId( node, TidyTag_H4 )
#define nodeIsH5( node ) TagIsId( node, TidyTag_H5 )
#define nodeIsH6( node ) TagIsId( node, TidyTag_H6 )
#define nodeIsADDRESS( node ) TagIsId( node, TidyTag_ADDRESS )
#define nodeIsXMP( node ) TagIsId( node, TidyTag_XMP )
#define nodeIsSELECT( node ) TagIsId( node, TidyTag_SELECT )
#define nodeIsBLINK( node ) TagIsId( node, TidyTag_BLINK )
#define nodeIsMARQUEE( node ) TagIsId( node, TidyTag_MARQUEE )
#define nodeIsEMBED( node ) TagIsId( node, TidyTag_EMBED )
#define nodeIsBASEFONT( node ) TagIsId( node, TidyTag_BASEFONT )
#define nodeIsISINDEX( node ) TagIsId( node, TidyTag_ISINDEX )
#define nodeIsS( node ) TagIsId( node, TidyTag_S )
#define nodeIsSTRIKE( node ) TagIsId( node, TidyTag_STRIKE )
#define nodeIsSUB( node ) TagIsId( node, TidyTag_SUB )
#define nodeIsSUP( node ) TagIsId( node, TidyTag_SUP )
#define nodeIsU( node ) TagIsId( node, TidyTag_U )
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
#endif /* __TAGS_H__ */

129
src/tidy-int.h Normal file
View file

@ -0,0 +1,129 @@
#ifndef __TIDY_INT_H__
#define __TIDY_INT_H__
/* tidy-int.h -- internal library declarations
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/02/11 09:45:52 $
$Revision: 1.13 $
*/
#include "tidy.h"
#include "config.h"
#include "lexer.h"
#include "tags.h"
#include "attrs.h"
#include "pprint.h"
#include "access.h"
#ifndef MAX
#define MAX(a,b) (((a) > (b))?(a):(b))
#endif
#ifndef MIN
#define MIN(a,b) (((a) < (b))?(a):(b))
#endif
struct _TidyDocImpl
{
/* The Document Tree (and backing store buffer) */
Node root; /* This MUST remain the first declared
variable in this structure */
Lexer* lexer;
/* Config + Markup Declarations */
TidyConfigImpl config;
TidyTagImpl tags;
TidyAttribImpl attribs;
#if SUPPORT_ACCESSIBILITY_CHECKS
/* Accessibility Checks state */
TidyAccessImpl access;
#endif
/* The Pretty Print buffer */
TidyPrintImpl pprint;
/* I/O */
StreamIn* docIn;
StreamOut* docOut;
StreamOut* errout;
TidyReportFilter mssgFilt;
TidyOptCallback pOptCallback;
/* Parse + Repair Results */
uint optionErrors;
uint errors;
uint warnings;
uint accessErrors;
uint infoMessages;
uint docErrors;
int parseStatus;
uint badAccess; /* for accessibility errors */
uint badLayout; /* for bad style errors */
uint badChars; /* for bad char encodings */
uint badForm; /* for badly placed form tags */
/* Memory allocator */
TidyAllocator* allocator;
/* Miscellaneous */
void* appData;
uint nClassId;
Bool inputHadBOM;
#ifdef TIDY_STORE_ORIGINAL_TEXT
Bool storeText;
#endif
#if PRESERVE_FILE_TIMES
struct utimbuf filetimes;
#endif
tmbstr givenDoctype;
};
/* Twizzle internal/external types */
#ifdef NEVER
TidyDocImpl* tidyDocToImpl( TidyDoc tdoc );
TidyDoc tidyImplToDoc( TidyDocImpl* impl );
Node* tidyNodeToImpl( TidyNode tnod );
TidyNode tidyImplToNode( Node* node );
AttVal* tidyAttrToImpl( TidyAttr tattr );
TidyAttr tidyImplToAttr( AttVal* attval );
const TidyOptionImpl* tidyOptionToImpl( TidyOption topt );
TidyOption tidyImplToOption( const TidyOptionImpl* option );
#else
#define tidyDocToImpl( tdoc ) ((TidyDocImpl*)(tdoc))
#define tidyImplToDoc( doc ) ((TidyDoc)(doc))
#define tidyNodeToImpl( tnod ) ((Node*)(tnod))
#define tidyImplToNode( node ) ((TidyNode)(node))
#define tidyAttrToImpl( tattr ) ((AttVal*)(tattr))
#define tidyImplToAttr( attval ) ((TidyAttr)(attval))
#define tidyOptionToImpl( topt ) ((const TidyOptionImpl*)(topt))
#define tidyImplToOption( option ) ((TidyOption)(option))
#endif
/** Wrappers for easy memory allocation using the document's allocator */
#define TidyDocAlloc(doc, size) TidyAlloc((doc)->allocator, size)
#define TidyDocRealloc(doc, block, size) TidyRealloc((doc)->allocator, block, size)
#define TidyDocFree(doc, block) TidyFree((doc)->allocator, block)
#define TidyDocPanic(doc, msg) TidyPanic((doc)->allocator, msg)
int TY_(DocParseStream)( TidyDocImpl* impl, StreamIn* in );
#endif /* __TIDY_INT_H__ */

1751
src/tidylib.c Normal file

File diff suppressed because it is too large Load diff

306
src/tmbstr.c Normal file
View file

@ -0,0 +1,306 @@
/* tmbstr.c -- Tidy string utility functions
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/12/29 16:31:08 $
$Revision: 1.13 $
*/
#include "forward.h"
#include "tmbstr.h"
#include "lexer.h"
/* like strdup but using an allocator */
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str )
{
tmbstr s = NULL;
if ( str )
{
uint len = TY_(tmbstrlen)( str );
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
while ( 0 != (*cp++ = *str++) )
/**/;
}
return s;
}
/* like strndup but using an allocator */
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len )
{
tmbstr s = NULL;
if ( str && len > 0 )
{
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
while ( len-- > 0 && (*cp++ = *str++) )
/**/;
*cp = 0;
}
return s;
}
/* exactly same as strncpy */
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size )
{
if ( s1 != NULL && s2 != NULL )
{
tmbstr cp = s1;
while ( *s2 && --size ) /* Predecrement: reserve byte */
*cp++ = *s2++; /* for NULL terminator. */
*cp = 0;
}
return size;
}
/* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
*/
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 )
{
uint ncpy = 0;
while (0 != (*s1++ = *s2++) )
++ncpy;
return ncpy;
}
/* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
*/
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 )
{
uint ncpy = 0;
while ( *s1 )
++s1;
while (0 != (*s1++ = *s2++) )
++ncpy;
return ncpy;
}
/* exactly same as strcmp */
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 )
{
int c;
while ((c = *s1) == *s2)
{
if (c == '\0')
return 0;
++s1;
++s2;
}
return (*s1 > *s2 ? 1 : -1);
}
/* returns byte count, not char count */
uint TY_(tmbstrlen)( ctmbstr str )
{
uint len = 0;
if ( str )
{
while ( *str++ )
++len;
}
return len;
}
/*
MS C 4.2 doesn't include strcasecmp.
Note that tolower and toupper won't
work on chars > 127.
Neither does ToLower()!
*/
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 )
{
uint c;
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
{
if (c == '\0')
return 0;
++s1;
++s2;
}
return (*s1 > *s2 ? 1 : -1);
}
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n )
{
uint c;
while ((c = (byte)*s1) == (byte)*s2)
{
if (c == '\0')
return 0;
if (n == 0)
return 0;
++s1;
++s2;
--n;
}
if (n == 0)
return 0;
return (*s1 > *s2 ? 1 : -1);
}
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n )
{
uint c;
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
{
if (c == '\0')
return 0;
if (n == 0)
return 0;
++s1;
++s2;
--n;
}
if (n == 0)
return 0;
return (*s1 > *s2 ? 1 : -1);
}
#if 0
/* return offset of cc from beginning of s1,
** -1 if not found.
*/
int TY_(tmbstrnchr)( ctmbstr s1, uint maxlen, tmbchar cc )
{
int i;
ctmbstr cp = s1;
for ( i = 0; (uint)i < maxlen; ++i, ++cp )
{
if ( *cp == cc )
return i;
}
return -1;
}
#endif
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 )
{
uint len2 = TY_(tmbstrlen)(s2);
int ix, diff = len1 - len2;
for ( ix = 0; ix <= diff; ++ix )
{
if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 )
return (ctmbstr) s1+ix;
}
return NULL;
}
#if 0
ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 )
{
uint len2 = TY_(tmbstrlen)(s2);
int ix, diff = len1 - len2;
for ( ix = 0; ix <= diff; ++ix )
{
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
return (ctmbstr) s1+ix;
}
return NULL;
}
#endif
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 )
{
uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2);
int ix, diff = len1 - len2;
for ( ix = 0; ix <= diff; ++ix )
{
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
return (ctmbstr) s1+ix;
}
return NULL;
}
/* Transform ASCII chars in string to lower case */
tmbstr TY_(tmbstrtolower)( tmbstr s )
{
tmbstr cp;
for ( cp=s; *cp; ++cp )
*cp = (tmbchar) TY_(ToLower)( *cp );
return s;
}
/* Transform ASCII chars in string to upper case */
tmbstr TY_(tmbstrtoupper)(tmbstr s)
{
tmbstr cp;
for (cp = s; *cp; ++cp)
*cp = (tmbchar)TY_(ToUpper)(*cp);
return s;
}
#if 0
Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 )
{
#if FILENAMES_CASE_SENSITIVE
return ( TY_(tmbstrcmp)( filename1, filename2 ) == 0 );
#else
return ( TY_(tmbstrcasecmp)( filename1, filename2 ) == 0 );
#endif
}
#endif
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
{
int retval;
#if HAS_VSNPRINTF
retval = vsnprintf(buffer, count - 1, format, args);
/* todo: conditionally null-terminate the string? */
buffer[count - 1] = 0;
#else
retval = vsprintf(buffer, format, args);
#endif /* HAS_VSNPRINTF */
return retval;
}
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
{
int retval;
va_list args;
va_start(args, format);
#if HAS_VSNPRINTF
retval = vsnprintf(buffer, count - 1, format, args);
/* todo: conditionally null-terminate the string? */
buffer[count - 1] = 0;
#else
retval = vsprintf(buffer, format, args);
#endif /* HAS_VSNPRINTF */
va_end(args);
return retval;
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

92
src/tmbstr.h Normal file
View file

@ -0,0 +1,92 @@
#ifndef __TMBSTR_H__
#define __TMBSTR_H__
/* tmbstr.h - Tidy string utility functions
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/12/29 16:31:09 $
$Revision: 1.11 $
*/
#include "platform.h"
#ifdef __cplusplus
extern "C"
{
#endif
/* like strdup but using an allocator */
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str );
/* like strndup but using an allocator */
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len);
/* exactly same as strncpy */
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size );
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 );
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 );
/* exactly same as strcmp */
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 );
/* returns byte count, not char count */
uint TY_(tmbstrlen)( ctmbstr str );
/*
MS C 4.2 doesn't include strcasecmp.
Note that tolower and toupper won't
work on chars > 127.
Neither do Lexer.ToLower() or Lexer.ToUpper()!
We get away with this because, except for XML tags,
we are always comparing to ascii element and
attribute names defined by HTML specs.
*/
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 );
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n );
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n );
/* return offset of cc from beginning of s1,
** -1 if not found.
*/
/* int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 );
/* ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );
/* transform string to lower case */
tmbstr TY_(tmbstrtolower)( tmbstr s );
/* Transform ASCII chars in string to upper case */
tmbstr TY_(tmbstrtoupper)( tmbstr s );
/* Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
#ifdef __GNUC__
__attribute__((format(printf, 3, 0)))
#endif
;
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
#ifdef __GNUC__
__attribute__((format(printf, 3, 4)))
#endif
;
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* __TMBSTR_H__ */

539
src/utf8.c Normal file
View file

@ -0,0 +1,539 @@
/* utf8.c -- convert characters to/from UTF-8
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2007/05/30 16:47:31 $
$Revision: 1.10 $
Uses public interfaces to abstract input source and output
sink, which may be user supplied or either FILE* or memory
based Tidy implementations. Encoding support is uniform
regardless of I/O mechanism.
Note, UTF-8 encoding, by itself, does not affect the actual
"codepoints" of the underlying character encoding. In the
cases of ASCII, Latin1, Unicode (16-bit, BMP), these all
refer to ISO-10646 "codepoints". For anything else, they
refer to some other "codepoint" set.
Put another way, UTF-8 is a variable length method to
represent any non-negative integer value. The glyph
that a integer value represents is unchanged and defined
externally (e.g. by ISO-10646, Big5, Win1252, MacRoman,
Latin2-9, and so on).
Put still another way, UTF-8 is more of a _transfer_ encoding
than a _character_ encoding, per se.
*/
#include "tidy.h"
#include "forward.h"
#include "utf8.h"
/*
UTF-8 encoding/decoding functions
Return # of bytes in UTF-8 sequence; result < 0 if illegal sequence
Also see below for UTF-16 encoding/decoding functions
References :
1) UCS Transformation Format 8 (UTF-8):
ISO/IEC 10646-1:1996 Amendment 2 or ISO/IEC 10646-1:2000 Annex D
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335>
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html>
Table 4 - Mapping from UCS-4 to UTF-8
2) Unicode standards:
<http://www.unicode.org/unicode/standard/standard.html>
3) Legal UTF-8 byte sequences:
<http://www.unicode.org/unicode/uni2errata/UTF-8_Corrigendum.html>
Code point 1st byte 2nd byte 3rd byte 4th byte
---------- -------- -------- -------- --------
U+0000..U+007F 00..7F
U+0080..U+07FF C2..DF 80..BF
U+0800..U+0FFF E0 A0..BF 80..BF
U+1000..U+FFFF E1..EF 80..BF 80..BF
U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also
allows for the use of five- and six-byte sequences to encode
characters that are outside the range of the Unicode character
set; those five- and six-byte sequences are illegal for the use
of UTF-8 as a transformation of Unicode characters. ISO/IEC 10646
does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF
(but it does allow other noncharacters).
4) RFC 2279: UTF-8, a transformation format of ISO 10646:
<http://www.ietf.org/rfc/rfc2279.txt>
5) UTF-8 and Unicode FAQ:
<http://www.cl.cam.ac.uk/~mgk25/unicode.html>
6) Markus Kuhn's UTF-8 decoder stress test file:
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>
7) UTF-8 Demo:
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt>
8) UTF-8 Sampler:
<http://www.columbia.edu/kermit/utf8.html>
9) Transformation Format for 16 Planes of Group 00 (UTF-16):
ISO/IEC 10646-1:1996 Amendment 1 or ISO/IEC 10646-1:2000 Annex C
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n2005/n2005.pdf>
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-16.html>
10) RFC 2781: UTF-16, an encoding of ISO 10646:
<http://www.ietf.org/rfc/rfc2781.txt>
11) UTF-16 invalid surrogate pairs:
<http://www.unicode.org/unicode/faq/utf_bom.html#16>
UTF-16 UTF-8 UCS-4
D83F DFF* F0 9F BF B* 0001FFF*
D87F DFF* F0 AF BF B* 0002FFF*
D8BF DFF* F0 BF BF B* 0003FFF*
D8FF DFF* F1 8F BF B* 0004FFF*
D93F DFF* F1 9F BF B* 0005FFF*
D97F DFF* F1 AF BF B* 0006FFF*
...
DBBF DFF* F3 BF BF B* 000FFFF*
DBFF DFF* F4 8F BF B* 0010FFF*
* = E or F
1010 A
1011 B
1100 C
1101 D
1110 E
1111 F
*/
#define kNumUTF8Sequences 7
#define kMaxUTF8Bytes 4
#define kUTF8ByteSwapNotAChar 0xFFFE
#define kUTF8NotAChar 0xFFFF
#define kMaxUTF8FromUCS4 0x10FFFF
#define kUTF16SurrogatesBegin 0x10000
#define kMaxUTF16FromUCS4 0x10FFFF
/* UTF-16 surrogate pair areas */
#define kUTF16LowSurrogateBegin 0xD800
#define kUTF16LowSurrogateEnd 0xDBFF
#define kUTF16HighSurrogateBegin 0xDC00
#define kUTF16HighSurrogateEnd 0xDFFF
/* offsets into validUTF8 table below */
static const int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
{
0, /* 1 byte */
1, /* 2 bytes */
2, /* 3 bytes */
4, /* 4 bytes */
kNumUTF8Sequences /* must be last */
};
static const struct validUTF8Sequence
{
uint lowChar;
uint highChar;
int numBytes;
byte validBytes[8];
} validUTF8[kNumUTF8Sequences] =
{
/* low high #bytes byte 1 byte 2 byte 3 byte 4 */
{0x0000, 0x007F, 1, {0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
{0x0080, 0x07FF, 2, {0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}},
{0x0800, 0x0FFF, 3, {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
{0x1000, 0xFFFF, 3, {0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
{0x10000, 0x3FFFF, 4, {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
{0x40000, 0xFFFFF, 4, {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
{0x100000, 0x10FFFF, 4, {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}}
};
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
TidyInputSource* inp, int* count )
{
byte tempbuf[10];
byte *buf = &tempbuf[0];
uint ch = 0, n = 0;
int i, bytes = 0;
Bool hasError = no;
if ( successorBytes )
buf = (byte*) successorBytes;
/* special check if we have been passed an EOF char */
if ( firstByte == EndOfStream )
{
/* at present */
*c = firstByte;
*count = 1;
return 0;
}
ch = firstByte; /* first byte is passed in separately */
if (ch <= 0x7F) /* 0XXX XXXX one byte */
{
n = ch;
bytes = 1;
}
else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */
{
n = ch & 31;
bytes = 2;
}
else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
{
n = ch & 15;
bytes = 3;
}
else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
{
n = ch & 7;
bytes = 4;
}
else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */
{
n = ch & 3;
bytes = 5;
hasError = yes;
}
else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */
{
n = ch & 1;
bytes = 6;
hasError = yes;
}
else
{
/* not a valid first byte of a UTF-8 sequence */
n = ch;
bytes = 1;
hasError = yes;
}
/* successor bytes should have the form 10XX XXXX */
/* If caller supplied buffer, use it. Else see if caller
** supplied an input source, use that.
*/
if ( successorBytes )
{
for ( i=0; i < bytes-1; ++i )
{
if ( !buf[i] || (buf[i] & 0xC0) != 0x80 )
{
hasError = yes;
bytes = i+1;
break;
}
n = (n << 6) | (buf[i] & 0x3F);
}
}
else if ( inp )
{
for ( i=0; i < bytes-1 && !inp->eof(inp->sourceData); ++i )
{
int b = inp->getByte( inp->sourceData );
buf[i] = (tmbchar) b;
/* End of data or illegal successor byte value */
if ( b == EOF || (buf[i] & 0xC0) != 0x80 )
{
hasError = yes;
bytes = i+1;
if ( b != EOF )
inp->ungetByte( inp->sourceData, buf[i] );
break;
}
n = (n << 6) | (buf[i] & 0x3F);
}
}
else if ( bytes > 1 )
{
hasError = yes;
bytes = 1;
}
if (!hasError && ((n == kUTF8ByteSwapNotAChar) || (n == kUTF8NotAChar)))
hasError = yes;
if (!hasError && (n > kMaxUTF8FromUCS4))
hasError = yes;
#if 0 /* Breaks Big5 D8 - DF */
if (!hasError && (n >= kUTF16LowSurrogateBegin) && (n <= kUTF16HighSurrogateEnd))
/* unpaired surrogates not allowed */
hasError = yes;
#endif
if (!hasError)
{
int lo, hi;
lo = offsetUTF8Sequences[bytes - 1];
hi = offsetUTF8Sequences[bytes] - 1;
/* check for overlong sequences */
if ((n < validUTF8[lo].lowChar) || (n > validUTF8[hi].highChar))
hasError = yes;
else
{
hasError = yes; /* assume error until proven otherwise */
for (i = lo; i <= hi; i++)
{
int tempCount;
byte theByte;
for (tempCount = 0; tempCount < bytes; tempCount++)
{
if (!tempCount)
theByte = (tmbchar) firstByte;
else
theByte = buf[tempCount - 1];
if ( theByte >= validUTF8[i].validBytes[(tempCount * 2)] &&
theByte <= validUTF8[i].validBytes[(tempCount * 2) + 1] )
hasError = no;
if (hasError)
break;
}
}
}
}
#if 1 && defined(_DEBUG)
if ( hasError )
{
/* debug */
fprintf( stderr, "UTF-8 decoding error of %d bytes : ", bytes );
fprintf( stderr, "0x%02x ", firstByte );
for (i = 1; i < bytes; i++)
fprintf( stderr, "0x%02x ", buf[i - 1] );
fprintf( stderr, " = U+%04ulx\n", n );
}
#endif
*count = bytes;
*c = n;
if ( hasError )
return -1;
return 0;
}
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
TidyOutputSink* outp, int* count )
{
byte tempbuf[10] = {0};
byte* buf = &tempbuf[0];
int bytes = 0;
Bool hasError = no;
if ( encodebuf )
buf = (byte*) encodebuf;
if (c <= 0x7F) /* 0XXX XXXX one byte */
{
buf[0] = (tmbchar) c;
bytes = 1;
}
else if (c <= 0x7FF) /* 110X XXXX two bytes */
{
buf[0] = (tmbchar) ( 0xC0 | (c >> 6) );
buf[1] = (tmbchar) ( 0x80 | (c & 0x3F) );
bytes = 2;
}
else if (c <= 0xFFFF) /* 1110 XXXX three bytes */
{
buf[0] = (tmbchar) (0xE0 | (c >> 12));
buf[1] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[2] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 3;
if ( c == kUTF8ByteSwapNotAChar || c == kUTF8NotAChar )
hasError = yes;
#if 0 /* Breaks Big5 D8 - DF */
else if ( c >= kUTF16LowSurrogateBegin && c <= kUTF16HighSurrogateEnd )
/* unpaired surrogates not allowed */
hasError = yes;
#endif
}
else if (c <= 0x1FFFFF) /* 1111 0XXX four bytes */
{
buf[0] = (tmbchar) (0xF0 | (c >> 18));
buf[1] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
buf[2] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[3] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 4;
if (c > kMaxUTF8FromUCS4)
hasError = yes;
}
else if (c <= 0x3FFFFFF) /* 1111 10XX five bytes */
{
buf[0] = (tmbchar) (0xF8 | (c >> 24));
buf[1] = (tmbchar) (0x80 | (c >> 18));
buf[2] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
buf[3] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[4] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 5;
hasError = yes;
}
else if (c <= 0x7FFFFFFF) /* 1111 110X six bytes */
{
buf[0] = (tmbchar) (0xFC | (c >> 30));
buf[1] = (tmbchar) (0x80 | ((c >> 24) & 0x3F));
buf[2] = (tmbchar) (0x80 | ((c >> 18) & 0x3F));
buf[3] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
buf[4] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
buf[5] = (tmbchar) (0x80 | (c & 0x3F));
bytes = 6;
hasError = yes;
}
else
hasError = yes;
/* don't output invalid UTF-8 byte sequence to a stream */
if ( !hasError && outp != NULL )
{
int ix;
for ( ix=0; ix < bytes; ++ix )
outp->putByte( outp->sinkData, buf[ix] );
}
#if 1 && defined(_DEBUG)
if ( hasError )
{
int i;
fprintf( stderr, "UTF-8 encoding error for U+%x : ", c );
for (i = 0; i < bytes; i++)
fprintf( stderr, "0x%02x ", buf[i] );
fprintf( stderr, "\n" );
}
#endif
*count = bytes;
if (hasError)
return -1;
return 0;
}
/* return one less than the number of bytes used by the UTF-8 byte sequence */
/* str points to the UTF-8 byte sequence */
/* the Unicode char is returned in *ch */
uint TY_(GetUTF8)( ctmbstr str, uint *ch )
{
uint n;
int bytes;
int err;
bytes = 0;
/* first byte "str[0]" is passed in separately from the */
/* rest of the UTF-8 byte sequence starting at "str[1]" */
err = TY_(DecodeUTF8BytesToChar)( &n, str[0], str+1, NULL, &bytes );
if (err)
{
#if 1 && defined(_DEBUG)
fprintf(stderr, "pprint UTF-8 decoding error for U+%x : ", n);
#endif
n = 0xFFFD; /* replacement char */
}
*ch = n;
return bytes - 1;
}
/* store char c as UTF-8 encoded byte stream */
tmbstr TY_(PutUTF8)( tmbstr buf, uint c )
{
int err, count = 0;
err = TY_(EncodeCharToUTF8Bytes)( c, buf, NULL, &count );
if (err)
{
#if 1 && defined(_DEBUG)
fprintf(stderr, "pprint UTF-8 encoding error for U+%x : ", c);
#endif
/* replacement char 0xFFFD encoded as UTF-8 */
buf[0] = (byte) 0xEF;
buf[1] = (byte) 0xBF;
buf[2] = (byte) 0xBD;
count = 3;
}
buf += count;
return buf;
}
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 )
{
return ( ucs4 <= kMaxUTF16FromUCS4 );
}
Bool TY_(IsHighSurrogate)( tchar ch )
{
return ( ch >= kUTF16HighSurrogateBegin && ch <= kUTF16HighSurrogateEnd );
}
Bool TY_(IsLowSurrogate)( tchar ch )
{
return ( ch >= kUTF16LowSurrogateBegin && ch <= kUTF16LowSurrogateEnd );
}
tchar TY_(CombineSurrogatePair)( tchar high, tchar low )
{
assert( TY_(IsHighSurrogate)(high) && TY_(IsLowSurrogate)(low) );
return ( ((low - kUTF16LowSurrogateBegin) * 0x400) +
high - kUTF16HighSurrogateBegin + 0x10000 );
}
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* low, tchar* high )
{
Bool status = ( TY_(IsValidCombinedChar)( utf16 ) && high && low );
if ( status )
{
*low = (utf16 - kUTF16SurrogatesBegin) / 0x400 + kUTF16LowSurrogateBegin;
*high = (utf16 - kUTF16SurrogatesBegin) % 0x400 + kUTF16HighSurrogateBegin;
}
return status;
}
Bool TY_(IsValidCombinedChar)( tchar ch )
{
return ( ch >= kUTF16SurrogatesBegin &&
(ch & 0x0000FFFE) != 0x0000FFFE &&
(ch & 0x0000FFFF) != 0x0000FFFF );
}
Bool TY_(IsCombinedChar)( tchar ch )
{
return ( ch >= kUTF16SurrogatesBegin );
}
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

52
src/utf8.h Normal file
View file

@ -0,0 +1,52 @@
#ifndef __UTF8_H__
#define __UTF8_H__
/* utf8.h -- convert characters to/from UTF-8
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2006/09/12 15:14:44 $
$Revision: 1.5 $
*/
#include "platform.h"
#include "buffio.h"
/* UTF-8 encoding/decoding support
** Does not convert character "codepoints", i.e. to/from 10646.
*/
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
TidyInputSource* inp, int* count );
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
TidyOutputSink* outp, int* count );
uint TY_(GetUTF8)( ctmbstr str, uint *ch );
tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
#define UNICODE_BOM UNICODE_BOM_BE
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
Bool TY_(IsHighSurrogate)( tchar ch );
Bool TY_(IsLowSurrogate)( tchar ch );
Bool TY_(IsCombinedChar)( tchar ch );
Bool TY_(IsValidCombinedChar)( tchar ch );
tchar TY_(CombineSurrogatePair)( tchar high, tchar low );
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
#endif /* __UTF8_H__ */

14
src/version.h Executable file
View file

@ -0,0 +1,14 @@
/* version information
(c) 2007-2009 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2009/03/25 21:37:11 $
$Revision: 1.46 $
*/
static const char TY_(release_date)[] = "16 November 2011";

14
src/version.h~ Executable file
View file

@ -0,0 +1,14 @@
/* version information
(c) 2007-2009 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
CVS Info :
$Author: arnaud02 $
$Date: 2009/03/25 21:37:11 $
$Revision: 1.46 $
*/
static const char TY_(release_date)[] = "25 March 2009";

795
src/win32tc.c Normal file
View file

@ -0,0 +1,795 @@
/* win32tc.c -- Interface to Win32 transcoding routines
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: win32tc.c,v 1.12 2008/08/09 11:55:27 hoehrmann Exp $
*/
/* keep these here to keep file non-empty */
#include "tidy.h"
#include "forward.h"
#include "streamio.h"
#include "tmbstr.h"
#include "utf8.h"
#ifdef TIDY_WIN32_MLANG_SUPPORT
#define VC_EXTRALEAN
#define CINTERFACE
#define COBJMACROS
#include <windows.h>
#include <mlang.h>
#undef COBJMACROS
#undef CINTERFACE
#undef VC_EXTRALEAN
/* maximum number of bytes for a single character */
#define TC_INBUFSIZE 16
/* maximum number of characters per byte sequence */
#define TC_OUTBUFSIZE 16
#define CreateMLangObject(p) \
CoCreateInstance( \
&CLSID_CMLangConvertCharset, \
NULL, \
CLSCTX_ALL, \
&IID_IMLangConvertCharset, \
(VOID **)&p);
/* Character Set to Microsoft Windows Codepage Identifier map, */
/* from <rotor/sscli/clr/src/classlibnative/nls/encodingdata.cpp>. */
/* note: the 'safe' field indicates whether this encoding can be */
/* read/written character-by-character; this does not apply to */
/* various stateful encodings such as ISO-2022 or UTF-7, these */
/* must be read/written as a complete stream. It is possible that */
/* some 'unsafe' encodings are marked as 'save'. */
/* todo: cleanup; Tidy should use only a single mapping table to */
/* circumvent unsupported aliases in other transcoding libraries, */
/* enable reverse lookup of encoding names and ease maintenance. */
static struct _nameWinCPMap
{
tmbstr name;
uint wincp;
Bool safe;
} const NameWinCPMap[] = {
{ "cp037", 37, yes },
{ "csibm037", 37, yes },
{ "ebcdic-cp-ca", 37, yes },
{ "ebcdic-cp-nl", 37, yes },
{ "ebcdic-cp-us", 37, yes },
{ "ebcdic-cp-wt", 37, yes },
{ "ibm037", 37, yes },
{ "cp437", 437, yes },
{ "cspc8codepage437", 437, yes },
{ "ibm437", 437, yes },
{ "cp500", 500, yes },
{ "csibm500", 500, yes },
{ "ebcdic-cp-be", 500, yes },
{ "ebcdic-cp-ch", 500, yes },
{ "ibm500", 500, yes },
{ "asmo-708", 708, yes },
{ "dos-720", 720, yes },
{ "ibm737", 737, yes },
{ "ibm775", 775, yes },
{ "cp850", 850, yes },
{ "ibm850", 850, yes },
{ "cp852", 852, yes },
{ "ibm852", 852, yes },
{ "cp855", 855, yes },
{ "ibm855", 855, yes },
{ "cp857", 857, yes },
{ "ibm857", 857, yes },
{ "ccsid00858", 858, yes },
{ "cp00858", 858, yes },
{ "cp858", 858, yes },
{ "ibm00858", 858, yes },
{ "pc-multilingual-850+euro", 858, yes },
{ "cp860", 860, yes },
{ "ibm860", 860, yes },
{ "cp861", 861, yes },
{ "ibm861", 861, yes },
{ "cp862", 862, yes },
{ "dos-862", 862, yes },
{ "ibm862", 862, yes },
{ "cp863", 863, yes },
{ "ibm863", 863, yes },
{ "cp864", 864, yes },
{ "ibm864", 864, yes },
{ "cp865", 865, yes },
{ "ibm865", 865, yes },
{ "cp866", 866, yes },
{ "ibm866", 866, yes },
{ "cp869", 869, yes },
{ "ibm869", 869, yes },
{ "cp870", 870, yes },
{ "csibm870", 870, yes },
{ "ebcdic-cp-roece", 870, yes },
{ "ebcdic-cp-yu", 870, yes },
{ "ibm870", 870, yes },
{ "dos-874", 874, yes },
{ "iso-8859-11", 874, yes },
{ "tis-620", 874, yes },
{ "windows-874", 874, yes },
{ "cp875", 875, yes },
{ "csshiftjis", 932, yes },
{ "cswindows31j", 932, yes },
{ "ms_kanji", 932, yes },
{ "shift-jis", 932, yes },
{ "shift_jis", 932, yes },
{ "sjis", 932, yes },
{ "x-ms-cp932", 932, yes },
{ "x-sjis", 932, yes },
{ "chinese", 936, yes },
{ "cn-gb", 936, yes },
{ "csgb2312", 936, yes },
{ "csgb231280", 936, yes },
{ "csiso58gb231280", 936, yes },
{ "gb2312", 936, yes },
{ "gb2312-80", 936, yes },
{ "gb231280", 936, yes },
{ "gb_2312-80", 936, yes },
{ "gbk", 936, yes },
{ "iso-ir-58", 936, yes },
{ "csksc56011987", 949, yes },
{ "iso-ir-149", 949, yes },
{ "korean", 949, yes },
{ "ks-c-5601", 949, yes },
{ "ks-c5601", 949, yes },
{ "ks_c_5601", 949, yes },
{ "ks_c_5601-1987", 949, yes },
{ "ks_c_5601-1989", 949, yes },
{ "ks_c_5601_1987", 949, yes },
{ "ksc5601", 949, yes },
{ "ksc_5601", 949, yes },
{ "big5", 950, yes },
{ "big5-hkscs", 950, yes },
{ "cn-big5", 950, yes },
{ "csbig5", 950, yes },
{ "x-x-big5", 950, yes },
{ "cp1026", 1026, yes },
{ "csibm1026", 1026, yes },
{ "ibm1026", 1026, yes },
{ "ibm01047", 1047, yes },
{ "ccsid01140", 1140, yes },
{ "cp01140", 1140, yes },
{ "ebcdic-us-37+euro", 1140, yes },
{ "ibm01140", 1140, yes },
{ "ccsid01141", 1141, yes },
{ "cp01141", 1141, yes },
{ "ebcdic-de-273+euro", 1141, yes },
{ "ibm01141", 1141, yes },
{ "ccsid01142", 1142, yes },
{ "cp01142", 1142, yes },
{ "ebcdic-dk-277+euro", 1142, yes },
{ "ebcdic-no-277+euro", 1142, yes },
{ "ibm01142", 1142, yes },
{ "ccsid01143", 1143, yes },
{ "cp01143", 1143, yes },
{ "ebcdic-fi-278+euro", 1143, yes },
{ "ebcdic-se-278+euro", 1143, yes },
{ "ibm01143", 1143, yes },
{ "ccsid01144", 1144, yes },
{ "cp01144", 1144, yes },
{ "ebcdic-it-280+euro", 1144, yes },
{ "ibm01144", 1144, yes },
{ "ccsid01145", 1145, yes },
{ "cp01145", 1145, yes },
{ "ebcdic-es-284+euro", 1145, yes },
{ "ibm01145", 1145, yes },
{ "ccsid01146", 1146, yes },
{ "cp01146", 1146, yes },
{ "ebcdic-gb-285+euro", 1146, yes },
{ "ibm01146", 1146, yes },
{ "ccsid01147", 1147, yes },
{ "cp01147", 1147, yes },
{ "ebcdic-fr-297+euro", 1147, yes },
{ "ibm01147", 1147, yes },
{ "ccsid01148", 1148, yes },
{ "cp01148", 1148, yes },
{ "ebcdic-international-500+euro", 1148, yes },
{ "ibm01148", 1148, yes },
{ "ccsid01149", 1149, yes },
{ "cp01149", 1149, yes },
{ "ebcdic-is-871+euro", 1149, yes },
{ "ibm01149", 1149, yes },
{ "iso-10646-ucs-2", 1200, yes },
{ "ucs-2", 1200, yes },
{ "unicode", 1200, yes },
{ "utf-16", 1200, yes },
{ "utf-16le", 1200, yes },
{ "unicodefffe", 1201, yes },
{ "utf-16be", 1201, yes },
{ "windows-1250", 1250, yes },
{ "x-cp1250", 1250, yes },
{ "windows-1251", 1251, yes },
{ "x-cp1251", 1251, yes },
{ "windows-1252", 1252, yes },
{ "x-ansi", 1252, yes },
{ "windows-1253", 1253, yes },
{ "windows-1254", 1254, yes },
{ "windows-1255", 1255, yes },
{ "cp1256", 1256, yes },
{ "windows-1256", 1256, yes },
{ "windows-1257", 1257, yes },
{ "windows-1258", 1258, yes },
{ "johab", 1361, yes },
{ "macintosh", 10000, yes },
{ "x-mac-japanese", 10001, yes },
{ "x-mac-chinesetrad", 10002, yes },
{ "x-mac-korean", 10003, yes },
{ "x-mac-arabic", 10004, yes },
{ "x-mac-hebrew", 10005, yes },
{ "x-mac-greek", 10006, yes },
{ "x-mac-cyrillic", 10007, yes },
{ "x-mac-chinesesimp", 10008, yes },
{ "x-mac-romanian", 10010, yes },
{ "x-mac-ukrainian", 10017, yes },
{ "x-mac-thai", 10021, yes },
{ "x-mac-ce", 10029, yes },
{ "x-mac-icelandic", 10079, yes },
{ "x-mac-turkish", 10081, yes },
{ "x-mac-croatian", 10082, yes },
{ "x-chinese-cns", 20000, yes },
{ "x-cp20001", 20001, yes },
{ "x-chinese-eten", 20002, yes },
{ "x-cp20003", 20003, yes },
{ "x-cp20004", 20004, yes },
{ "x-cp20005", 20005, yes },
{ "irv", 20105, yes },
{ "x-ia5", 20105, yes },
{ "din_66003", 20106, yes },
{ "german", 20106, yes },
{ "x-ia5-german", 20106, yes },
{ "sen_850200_b", 20107, yes },
{ "swedish", 20107, yes },
{ "x-ia5-swedish", 20107, yes },
{ "norwegian", 20108, yes },
{ "ns_4551-1", 20108, yes },
{ "x-ia5-norwegian", 20108, yes },
{ "ansi_x3.4-1968", 20127, yes },
{ "ansi_x3.4-1986", 20127, yes },
{ "ascii", 20127, yes },
{ "cp367", 20127, yes },
{ "csascii", 20127, yes },
{ "ibm367", 20127, yes },
{ "iso-ir-6", 20127, yes },
{ "iso646-us", 20127, yes },
{ "iso_646.irv:1991", 20127, yes },
{ "us", 20127, yes },
{ "us-ascii", 20127, yes },
{ "x-cp20261", 20261, yes },
{ "x-cp20269", 20269, yes },
{ "cp273", 20273, yes },
{ "csibm273", 20273, yes },
{ "ibm273", 20273, yes },
{ "csibm277", 20277, yes },
{ "ebcdic-cp-dk", 20277, yes },
{ "ebcdic-cp-no", 20277, yes },
{ "ibm277", 20277, yes },
{ "cp278", 20278, yes },
{ "csibm278", 20278, yes },
{ "ebcdic-cp-fi", 20278, yes },
{ "ebcdic-cp-se", 20278, yes },
{ "ibm278", 20278, yes },
{ "cp280", 20280, yes },
{ "csibm280", 20280, yes },
{ "ebcdic-cp-it", 20280, yes },
{ "ibm280", 20280, yes },
{ "cp284", 20284, yes },
{ "csibm284", 20284, yes },
{ "ebcdic-cp-es", 20284, yes },
{ "ibm284", 20284, yes },
{ "cp285", 20285, yes },
{ "csibm285", 20285, yes },
{ "ebcdic-cp-gb", 20285, yes },
{ "ibm285", 20285, yes },
{ "cp290", 20290, yes },
{ "csibm290", 20290, yes },
{ "ebcdic-jp-kana", 20290, yes },
{ "ibm290", 20290, yes },
{ "cp297", 20297, yes },
{ "csibm297", 20297, yes },
{ "ebcdic-cp-fr", 20297, yes },
{ "ibm297", 20297, yes },
{ "cp420", 20420, yes },
{ "csibm420", 20420, yes },
{ "ebcdic-cp-ar1", 20420, yes },
{ "ibm420", 20420, yes },
{ "cp423", 20423, yes },
{ "csibm423", 20423, yes },
{ "ebcdic-cp-gr", 20423, yes },
{ "ibm423", 20423, yes },
{ "cp424", 20424, yes },
{ "csibm424", 20424, yes },
{ "ebcdic-cp-he", 20424, yes },
{ "ibm424", 20424, yes },
{ "x-ebcdic-koreanextended", 20833, yes },
{ "csibmthai", 20838, yes },
{ "ibm-thai", 20838, yes },
{ "cskoi8r", 20866, yes },
{ "koi", 20866, yes },
{ "koi8", 20866, yes },
{ "koi8-r", 20866, yes },
{ "koi8r", 20866, yes },
{ "cp871", 20871, yes },
{ "csibm871", 20871, yes },
{ "ebcdic-cp-is", 20871, yes },
{ "ibm871", 20871, yes },
{ "cp880", 20880, yes },
{ "csibm880", 20880, yes },
{ "ebcdic-cyrillic", 20880, yes },
{ "ibm880", 20880, yes },
{ "cp905", 20905, yes },
{ "csibm905", 20905, yes },
{ "ebcdic-cp-tr", 20905, yes },
{ "ibm905", 20905, yes },
{ "ccsid00924", 20924, yes },
{ "cp00924", 20924, yes },
{ "ebcdic-latin9--euro", 20924, yes },
{ "ibm00924", 20924, yes },
{ "x-cp20936", 20936, yes },
{ "x-cp20949", 20949, yes },
{ "cp1025", 21025, yes },
{ "x-cp21027", 21027, yes },
{ "koi8-ru", 21866, yes },
{ "koi8-u", 21866, yes },
{ "cp819", 28591, yes },
{ "csisolatin1", 28591, yes },
{ "ibm819", 28591, yes },
{ "iso-8859-1", 28591, yes },
{ "iso-ir-100", 28591, yes },
{ "iso8859-1", 28591, yes },
{ "iso_8859-1", 28591, yes },
{ "iso_8859-1:1987", 28591, yes },
{ "l1", 28591, yes },
{ "latin1", 28591, yes },
{ "csisolatin2", 28592, yes },
{ "iso-8859-2", 28592, yes },
{ "iso-ir-101", 28592, yes },
{ "iso8859-2", 28592, yes },
{ "iso_8859-2", 28592, yes },
{ "iso_8859-2:1987", 28592, yes },
{ "l2", 28592, yes },
{ "latin2", 28592, yes },
{ "csisolatin3", 28593, yes },
{ "iso-8859-3", 28593, yes },
{ "iso-ir-109", 28593, yes },
{ "iso_8859-3", 28593, yes },
{ "iso_8859-3:1988", 28593, yes },
{ "l3", 28593, yes },
{ "latin3", 28593, yes },
{ "csisolatin4", 28594, yes },
{ "iso-8859-4", 28594, yes },
{ "iso-ir-110", 28594, yes },
{ "iso_8859-4", 28594, yes },
{ "iso_8859-4:1988", 28594, yes },
{ "l4", 28594, yes },
{ "latin4", 28594, yes },
{ "csisolatincyrillic", 28595, yes },
{ "cyrillic", 28595, yes },
{ "iso-8859-5", 28595, yes },
{ "iso-ir-144", 28595, yes },
{ "iso_8859-5", 28595, yes },
{ "iso_8859-5:1988", 28595, yes },
{ "arabic", 28596, yes },
{ "csisolatinarabic", 28596, yes },
{ "ecma-114", 28596, yes },
{ "iso-8859-6", 28596, yes },
{ "iso-ir-127", 28596, yes },
{ "iso_8859-6", 28596, yes },
{ "iso_8859-6:1987", 28596, yes },
{ "csisolatingreek", 28597, yes },
{ "ecma-118", 28597, yes },
{ "elot_928", 28597, yes },
{ "greek", 28597, yes },
{ "greek8", 28597, yes },
{ "iso-8859-7", 28597, yes },
{ "iso-ir-126", 28597, yes },
{ "iso_8859-7", 28597, yes },
{ "iso_8859-7:1987", 28597, yes },
{ "csisolatinhebrew", 28598, yes },
{ "hebrew", 28598, yes },
{ "iso-8859-8", 28598, yes },
{ "iso-ir-138", 28598, yes },
{ "iso_8859-8", 28598, yes },
{ "iso_8859-8:1988", 28598, yes },
{ "logical", 28598, yes },
{ "visual", 28598, yes },
{ "csisolatin5", 28599, yes },
{ "iso-8859-9", 28599, yes },
{ "iso-ir-148", 28599, yes },
{ "iso_8859-9", 28599, yes },
{ "iso_8859-9:1989", 28599, yes },
{ "l5", 28599, yes },
{ "latin5", 28599, yes },
{ "iso-8859-13", 28603, yes },
{ "csisolatin9", 28605, yes },
{ "iso-8859-15", 28605, yes },
{ "iso_8859-15", 28605, yes },
{ "l9", 28605, yes },
{ "latin9", 28605, yes },
{ "x-europa", 29001, yes },
{ "iso-8859-8-i", 38598, yes },
{ "iso-2022-jp", 50220, no },
{ "csiso2022jp", 50221, no },
{ "csiso2022kr", 50225, no },
{ "iso-2022-kr", 50225, no },
{ "iso-2022-kr-7", 50225, no },
{ "iso-2022-kr-7bit", 50225, no },
{ "cp50227", 50227, no },
{ "x-cp50227", 50227, no },
{ "cp930", 50930, yes },
{ "x-ebcdic-japaneseanduscanada", 50931, yes },
{ "cp933", 50933, yes },
{ "cp935", 50935, yes },
{ "cp937", 50937, yes },
{ "cp939", 50939, yes },
{ "cseucpkdfmtjapanese", 51932, yes },
{ "euc-jp", 51932, yes },
{ "extended_unix_code_packed_format_for_japanese", 51932, yes },
{ "iso-2022-jpeuc", 51932, yes },
{ "x-euc", 51932, yes },
{ "x-euc-jp", 51932, yes },
{ "euc-cn", 51936, yes },
{ "x-euc-cn", 51936, yes },
{ "cseuckr", 51949, yes },
{ "euc-kr", 51949, yes },
{ "iso-2022-kr-8", 51949, yes },
{ "iso-2022-kr-8bit", 51949, yes },
{ "hz-gb-2312", 52936, no },
{ "gb18030", 54936, yes },
{ "x-iscii-de", 57002, yes },
{ "x-iscii-be", 57003, yes },
{ "x-iscii-ta", 57004, yes },
{ "x-iscii-te", 57005, yes },
{ "x-iscii-as", 57006, yes },
{ "x-iscii-or", 57007, yes },
{ "x-iscii-ka", 57008, yes },
{ "x-iscii-ma", 57009, yes },
{ "x-iscii-gu", 57010, yes },
{ "x-iscii-pa", 57011, yes },
{ "csunicode11utf7", 65000, no },
{ "unicode-1-1-utf-7", 65000, no },
{ "unicode-2-0-utf-7", 65000, no },
{ "utf-7", 65000, no },
{ "x-unicode-1-1-utf-7", 65000, no },
{ "x-unicode-2-0-utf-7", 65000, no },
{ "unicode-1-1-utf-8", 65001, yes },
{ "unicode-2-0-utf-8", 65001, yes },
{ "utf-8", 65001, yes },
{ "x-unicode-1-1-utf-8", 65001, yes },
{ "x-unicode-2-0-utf-8", 65001, yes },
/* final entry */
{ NULL, 0, no }
};
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator, ctmbstr encoding)
{
uint i;
tmbstr enc;
/* ensure name is in lower case */
enc = TY_(tmbstrdup)(allocator,encoding);
enc = TY_(tmbstrtolower)(enc);
for (i = 0; NameWinCPMap[i].name; ++i)
{
if (TY_(tmbstrcmp)(NameWinCPMap[i].name, enc) == 0)
{
IMLangConvertCharset * p = NULL;
uint wincp = NameWinCPMap[i].wincp;
HRESULT hr;
TidyFree(allocator, enc);
/* currently no support for unsafe encodings */
if (!NameWinCPMap[i].safe)
return 0;
/* hack for config.c */
CoInitialize(NULL);
hr = CreateMLangObject(p);
if (hr != S_OK || !p)
{
wincp = 0;
}
else
{
hr = IMLangConvertCharset_Initialize(p, wincp, 1200, 0);
if (hr != S_OK)
wincp = 0;
IMLangConvertCharset_Release(p);
p = NULL;
}
CoUninitialize();
return wincp;
}
}
TidyFree(allocator, enc);
return 0;
}
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp)
{
IMLangConvertCharset * p = NULL;
HRESULT hr;
assert( in != NULL );
CoInitialize(NULL);
if (wincp == 0)
{
/* no codepage found for this encoding */
return no;
}
hr = CreateMLangObject(p);
if (hr != S_OK || !p)
{
/* MLang not supported */
return no;
}
hr = IMLangConvertCharset_Initialize(p, wincp, 1200, 0);
if (hr != S_OK)
{
/* encoding not supported, insufficient memory, etc. */
return no;
}
in->mlang = p;
return yes;
}
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in)
{
IMLangConvertCharset * p;
assert( in != NULL );
p = (IMLangConvertCharset *)in->mlang;
if (p)
{
IMLangConvertCharset_Release(p);
p = NULL;
in->mlang = NULL;
}
CoUninitialize();
}
#if 0
Bool Win32MLangInitOutputTranscoder(TidyAllocator *allocator, StreamOut * out, tmbstr encoding)
{
IMLangConvertCharset * p = NULL;
HRESULT hr;
uint wincp;
assert( out != NULL );
CoInitialize(NULL);
wincp = TY_(Win32MLangGetCPFromName)(allocator, encoding);
if (wincp == 0)
{
/* no codepage found for this encoding */
return no;
}
hr = CreateMLangObject(p);
if (hr != S_OK || !p)
{
/* MLang not supported */
return no;
}
IMLangConvertCharset_Initialize(p, 1200, wincp, MLCONVCHARF_NOBESTFITCHARS);
if (hr != S_OK)
{
/* encoding not supported, insufficient memory, etc. */
return no;
}
out->mlang = p;
return yes;
}
void Win32MLangUninitOutputTranscoder(StreamOut * out)
{
IMLangConvertCharset * p;
assert( out != NULL );
p = (IMLangConvertCharset *)out->mlang;
if (p)
{
IMLangConvertCharset_Release(p);
p = NULL;
out->mlang = NULL;
}
CoUninitialize();
}
#endif
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead)
{
IMLangConvertCharset * p;
TidyInputSource * source;
CHAR inbuf[TC_INBUFSIZE] = { 0 };
WCHAR outbuf[TC_OUTBUFSIZE] = { 0 };
HRESULT hr = S_OK;
size_t inbufsize = 0;
assert( in != NULL );
assert( &in->source != NULL );
assert( bytesRead != NULL );
assert( in->mlang != NULL );
p = (IMLangConvertCharset *)in->mlang;
source = &in->source;
inbuf[inbufsize++] = (CHAR)firstByte;
while(inbufsize < TC_INBUFSIZE)
{
UINT outbufsize = TC_OUTBUFSIZE;
UINT readNow = inbufsize;
int nextByte = EndOfStream;
hr = IMLangConvertCharset_DoConversionToUnicode(p, inbuf, &readNow, outbuf, &outbufsize);
assert( hr == S_OK );
assert( outbufsize <= 2 );
if (outbufsize == 2)
{
/* U+10000-U+10FFFF are returned as a pair of surrogates */
tchar m = (tchar)outbuf[0];
tchar n = (tchar)outbuf[1];
assert( TY_(IsHighSurrogate)(n) && TY_(IsLowSurrogate)(m) );
*bytesRead = readNow;
return (int)TY_(CombineSurrogatePair)(n, m);
}
if (outbufsize == 1)
{
/* we found the character */
/* set bytesRead and return */
*bytesRead = readNow;
return (int)outbuf[0];
}
/* we need more bytes */
nextByte = source->getByte(source->sourceData);
if (nextByte == EndOfStream)
{
/* todo: error message for broken stream? */
*bytesRead = readNow;
return EndOfStream;
}
inbuf[inbufsize++] = (CHAR)nextByte;
}
/* No full character found after reading TC_INBUFSIZE bytes, */
/* give up to read this stream, it's obviously unreadable. */
/* todo: error message for broken stream? */
return EndOfStream;
}
Bool Win32MLangIsConvertible(tchar c, StreamOut * out)
{
IMLangConvertCharset * p;
UINT i = 1;
HRESULT hr;
WCHAR inbuf[2] = { 0 };
UINT inbufsize = 0;
assert( c != 0 );
assert( c <= 0x10FFFF );
assert( out != NULL );
assert( out->mlang != NULL );
if (c > 0xFFFF)
{
tchar high = 0;
tchar low = 0;
TY_(SplitSurrogatePair)(c, &low, &high);
inbuf[inbufsize++] = (WCHAR)low;
inbuf[inbufsize++] = (WCHAR)high;
}
else
inbuf[inbufsize++] = (WCHAR)c;
p = (IMLangConvertCharset *)out->mlang;
hr = IMLangConvertCharset_DoConversionFromUnicode(p, inbuf, &inbufsize, NULL, NULL);
return hr == S_OK ? yes : no;
}
void Win32MLangPutChar(tchar c, StreamOut * out, uint * bytesWritten)
{
IMLangConvertCharset * p;
TidyOutputSink * sink;
CHAR outbuf[TC_OUTBUFSIZE] = { 0 };
UINT outbufsize = TC_OUTBUFSIZE;
HRESULT hr = S_OK;
WCHAR inbuf[2] = { 0 };
UINT inbufsize = 0;
uint i;
assert( c != 0 );
assert( c <= 0x10FFFF );
assert( bytesWritten != NULL );
assert( out != NULL );
assert( &out->sink != NULL );
assert( out->mlang != NULL );
p = (IMLangConvertCharset *)out->mlang;
sink = &out->sink;
if (c > 0xFFFF)
{
tchar high = 0;
tchar low = 0;
TY_(SplitSurrogatePair)(c, &low, &high);
inbuf[inbufsize++] = (WCHAR)low;
inbuf[inbufsize++] = (WCHAR)high;
}
else
inbuf[inbufsize++] = (WCHAR)c;
hr = IMLangConvertCharset_DoConversionFromUnicode(p, inbuf, &inbufsize, outbuf, &outbufsize);
assert( hr == S_OK );
assert( outbufsize > 0 );
assert( inbufsize == 1 || inbufsize == 2 );
for (i = 0; i < outbufsize; ++i)
sink->putByte(sink->sinkData, (byte)(outbuf[i]));
*bytesWritten = outbufsize;
return;
}
#endif /* TIDY_WIN32_MLANG_SUPPORT */
/*
* local variables:
* mode: c
* indent-tabs-mode: nil
* c-basic-offset: 4
* eval: (c-set-offset 'substatement-open 0)
* end:
*/

19
src/win32tc.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef __WIN32TC_H__
#define __WIN32TC_H__
#ifdef TIDY_WIN32_MLANG_SUPPORT
/* win32tc.h -- Interface to Win32 transcoding routines
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
See tidy.h for the copyright notice.
$Id: win32tc.h,v 1.3 2006/12/29 16:31:09 arnaud02 Exp $
*/
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator,ctmbstr encoding);
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp);
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in);
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead);
#endif /* TIDY_WIN32_MLANG_SUPPORT */
#endif /* __WIN32TC_H__ */

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.1</title>
</head>
<body>
<img src="noAlt.jpg">
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.1</title>
</head>
<body>
<img src="hasAlt.gif" alt="0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789">
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.1</title>
</head>
<body>
<img src="gifimage.gif" alt="gifimage.gif">
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.1</title>
</head>
<body>
<img src="bytesImage.gif" alt="34K bytes">
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.1</title>
</head>
<body>
<img src="animage.gif" alt="{short description of image}">
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.10</title>
</head>
<body>
<script><!-- do nothing --></script>
</body>
</html>

View file

@ -0,0 +1,23 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.12</title>
</head>
<body>
<pre>
% __ __ __ __ __ __ __ __ __ __ __ __ __ __
100 | * |
90 | * * |
80 | * * |
70 | @ * |
60 | @ * |
50 | * @ * |
40 | @ * |
30 | * @ @ @ * |
20 | |
10 | @ @ @ @ @ |
0 5 10 15 20 25 30 35 40 45 50 55 60 65 70
Flash frequency (Hertz)
</pre>
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.2</title>
</head>
<body>
<img src="pie-chart.jpg" alt="Pie chart of federal expenditures">
</body>
</html>

View file

@ -0,0 +1,9 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>bobby/g13</title>
</head>
<body>
<img src="pie-chart.jpg" longdesc="pie-chart.html" alt="Pie chart of federal expenditures">
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.2</title>
</head>
<body>
<img src="pie-chart.jpg" alt="Pie chart of federal expenditures">
<a href="pie-chart.html">D</a>
</body>
</html>

View file

@ -0,0 +1,11 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html lang="en">
<head>
<title>aert1.0/1.1.3</title>
</head>
<body>
<form action="">
<input type="image" name="submit" src="submit.jpg">
</form>
</body>
</html>

Some files were not shown because too many files have changed in this diff Show more