new
This commit is contained in:
commit
b92d7aab88
236
build/gmake/Makefile
Normal file
236
build/gmake/Makefile
Normal file
|
@ -0,0 +1,236 @@
|
|||
# Makefile - for tidy - HTML parser and pretty printer
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: arnaud02 $
|
||||
# $Date: 2008/03/22 21:13:38 $
|
||||
# $Revision: 1.37 $
|
||||
#
|
||||
# Copyright (c) 1998-2008 World Wide Web Consortium
|
||||
# (Massachusetts Institute of Technology, European Research
|
||||
# Consortium for Informatics and Mathematics, Keio University).
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Contributing Author(s):
|
||||
#
|
||||
# Dave Raggett <dsr@w3.org>
|
||||
# Terry Teague <terry_teague@users.sourceforge.net>
|
||||
# Pradeep Padala<ppadala@users.sourceforge.net>
|
||||
#
|
||||
# The contributing author(s) would like to thank all those who
|
||||
# helped with testing, bug fixes, and patience. This wouldn't
|
||||
# have been possible without all of you.
|
||||
#
|
||||
# COPYRIGHT NOTICE:
|
||||
#
|
||||
# This software and documentation is provided "as is," and
|
||||
# the copyright holders and contributing author(s) make no
|
||||
# representations or warranties, express or implied, including
|
||||
# but not limited to, warranties of merchantability or fitness
|
||||
# for any particular purpose or that the use of the software or
|
||||
# documentation will not infringe any third party patents,
|
||||
# copyrights, trademarks or other rights.
|
||||
#
|
||||
# The copyright holders and contributing author(s) will not be
|
||||
# liable for any direct, indirect, special or consequential damages
|
||||
# arising out of any use of the software or documentation, even if
|
||||
# advised of the possibility of such damage.
|
||||
#
|
||||
# Permission is hereby granted to use, copy, modify, and distribute
|
||||
# this source code, or portions hereof, documentation and executables,
|
||||
# for any purpose, without fee, subject to the following restrictions:
|
||||
#
|
||||
# 1. The origin of this source code must not be misrepresented.
|
||||
# 2. Altered versions must be plainly marked as such and must
|
||||
# not be misrepresented as being the original source.
|
||||
# 3. This Copyright notice may not be removed or altered from any
|
||||
# source or altered source distribution.
|
||||
#
|
||||
# The copyright holders and contributing author(s) specifically
|
||||
# permit, without fee, and encourage the use of this source code
|
||||
# as a component for supporting the Hypertext Markup Language in
|
||||
# commercial products. If you use this source code in a product,
|
||||
# acknowledgment is not required but would be appreciated.
|
||||
#
|
||||
|
||||
SHELL=/bin/sh
|
||||
|
||||
PROJECT=tidy
|
||||
|
||||
# Installation variables. Spaces OK, only dir create and file copy operations.
|
||||
runinst_prefix=/usr/local
|
||||
devinst_prefix=/usr/local
|
||||
|
||||
bininst = ${runinst_prefix}/bin
|
||||
libinst = ${devinst_prefix}/lib
|
||||
incinst = ${devinst_prefix}/include/$(PROJECT)
|
||||
maninst = ${devinst_prefix}/man
|
||||
|
||||
# Internal variables. - No spaces allowed: libtool chokes on spaces in directory names.
|
||||
TOPDIR = ../..
|
||||
INCDIR = ${TOPDIR}/include
|
||||
APPDIR = ${TOPDIR}/console
|
||||
SRCDIR = ${TOPDIR}/src
|
||||
OBJDIR = ./obj
|
||||
LIBDIR = ${TOPDIR}/lib
|
||||
BINDIR = ${TOPDIR}/bin
|
||||
DOCDIR = ${TOPDIR}/htmldoc
|
||||
|
||||
# Note about shared library and exported symbols:
|
||||
# With gcc, one can control the exported symbols by either using
|
||||
# "-fvisibility=hidden -DTIDY_EXPORT='__attribute__((visibility("default")))'"
|
||||
# or using a linker map (see GNU ld "--version-script").
|
||||
|
||||
# Lookup based on hash table can be disabled with
|
||||
# "-DELEMENT_HASH_LOOKUP=0 -DATTRIBUTE_HASH_LOOKUP=0"
|
||||
|
||||
# Memory mapped i/o can be disabled with -DSUPPORT_POSIX_MAPPED_FILES=0
|
||||
#
|
||||
|
||||
# CFLAGS etc..
|
||||
# For optimised builds, flags such as "-O2" should be added and -D_DEBUG=1
|
||||
# disabled.
|
||||
CC= gcc
|
||||
CFLAGS= -g -pedantic -Wall -I $(INCDIR)
|
||||
# flags only supported with gcc 3.x
|
||||
CFLAGS += -Wunused-parameter
|
||||
|
||||
OTHERCFLAGS=
|
||||
OTHERCFLAGS+= -D_DEBUG=1
|
||||
# OTHERCFLAGS+= -fvisibility=hidden -DTIDY_EXPORT='__attribute__((visibility("default")))'
|
||||
ifdef SUPPORT_UTF16_ENCODINGS
|
||||
CFLAGS += -DSUPPORT_UTF16_ENCODINGS=$(SUPPORT_UTF16_ENCODINGS)
|
||||
endif
|
||||
ifdef SUPPORT_ASIAN_ENCODINGS
|
||||
CFLAGS += -DSUPPORT_ASIAN_ENCODINGS=$(SUPPORT_ASIAN_ENCODINGS)
|
||||
endif
|
||||
ifdef SUPPORT_ACCESSIBILITY_CHECKS
|
||||
CFLAGS += -DSUPPORT_ACCESSIBILITY_CHECKS=$(SUPPORT_ACCESSIBILITY_CHECKS)
|
||||
endif
|
||||
|
||||
DEBUGFLAGS=-g
|
||||
ifdef DMALLOC
|
||||
DEBUGFLAGS += -DDMALLOC
|
||||
endif
|
||||
|
||||
LIBS=
|
||||
DEBUGLIBS=-ldmalloc
|
||||
|
||||
# Tidy lib related variables
|
||||
TIDY_MAJOR = 1
|
||||
TIDY_MINOR = 0
|
||||
|
||||
# This will come from autoconf again
|
||||
LIBPREFIX = lib
|
||||
LIBSUFFIX = .a
|
||||
OBJSUF = .o
|
||||
|
||||
LIBRARY = $(LIBDIR)/$(LIBPREFIX)$(PROJECT)$(LIBSUFFIX)
|
||||
AR=ar -r
|
||||
|
||||
XSLTPROC = xsltproc
|
||||
|
||||
EXES = $(BINDIR)/$(PROJECT) $(BINDIR)/tab2space
|
||||
|
||||
DOCS = $(DOCDIR)/quickref.html $(DOCDIR)/tidy.1
|
||||
|
||||
CONFIGXML = $(DOCDIR)/tidy-config.xml
|
||||
HELPXML = $(DOCDIR)/tidy-help.xml
|
||||
|
||||
OBJFILES=\
|
||||
$(OBJDIR)/access$(OBJSUF) $(OBJDIR)/attrs$(OBJSUF) $(OBJDIR)/istack$(OBJSUF) \
|
||||
$(OBJDIR)/parser$(OBJSUF) $(OBJDIR)/tags$(OBJSUF) $(OBJDIR)/entities$(OBJSUF) \
|
||||
$(OBJDIR)/lexer$(OBJSUF) $(OBJDIR)/pprint$(OBJSUF) $(OBJDIR)/clean$(OBJSUF) \
|
||||
$(OBJDIR)/localize$(OBJSUF) $(OBJDIR)/config$(OBJSUF) $(OBJDIR)/alloc$(OBJSUF) \
|
||||
$(OBJDIR)/attrask$(OBJSUF) $(OBJDIR)/attrdict$(OBJSUF) $(OBJDIR)/attrget$(OBJSUF) \
|
||||
$(OBJDIR)/buffio$(OBJSUF) $(OBJDIR)/fileio$(OBJSUF) $(OBJDIR)/streamio$(OBJSUF) \
|
||||
$(OBJDIR)/tagask$(OBJSUF) $(OBJDIR)/tmbstr$(OBJSUF) $(OBJDIR)/utf8$(OBJSUF) \
|
||||
$(OBJDIR)/tidylib$(OBJSUF) $(OBJDIR)/mappedio$(OBJSUF)
|
||||
|
||||
CFILES= \
|
||||
$(SRCDIR)/access.c $(SRCDIR)/attrs.c $(SRCDIR)/istack.c \
|
||||
$(SRCDIR)/parser.c $(SRCDIR)/tags.c $(SRCDIR)/entities.c \
|
||||
$(SRCDIR)/lexer.c $(SRCDIR)/pprint.c $(SRCDIR)/clean.c \
|
||||
$(SRCDIR)/localize.c $(SRCDIR)/config.c $(SRCDIR)/alloc.c \
|
||||
$(SRCDIR)/attrask.c $(SRCDIR)/attrdict.c $(SRCDIR)/attrget.c \
|
||||
$(SRCDIR)/buffio.c $(SRCDIR)/fileio.c $(SRCDIR)/streamio.c \
|
||||
$(SRCDIR)/tagask.c $(SRCDIR)/tmbstr.c $(SRCDIR)/utf8.c \
|
||||
$(SRCDIR)/tidylib.c $(SRCDIR)/mappedio.c
|
||||
|
||||
HFILES= $(INCDIR)/platform.h $(INCDIR)/tidy.h $(INCDIR)/tidyenum.h \
|
||||
$(INCDIR)/buffio.h
|
||||
|
||||
LIBHFILES= \
|
||||
$(SRCDIR)/access.h $(SRCDIR)/attrs.h $(SRCDIR)/attrdict.h \
|
||||
$(SRCDIR)/clean.h $(SRCDIR)/config.h $(SRCDIR)/entities.h \
|
||||
$(SRCDIR)/fileio.h $(SRCDIR)/forward.h $(SRCDIR)/lexer.h \
|
||||
$(SRCDIR)/mappedio.h $(SRCDIR)/message.h $(SRCDIR)/parser.h \
|
||||
$(SRCDIR)/pprint.h $(SRCDIR)/streamio.h $(SRCDIR)/tags.h \
|
||||
$(SRCDIR)/tmbstr.h $(SRCDIR)/utf8.h $(SRCDIR)/tidy-int.h \
|
||||
$(SRCDIR)/version.h
|
||||
|
||||
|
||||
|
||||
all: $(LIBRARY) $(EXES)
|
||||
|
||||
doc: $(DOCS)
|
||||
|
||||
$(LIBRARY): $(OBJFILES)
|
||||
if [ ! -d $(LIBDIR) ]; then mkdir $(LIBDIR); fi
|
||||
$(AR) $@ $(OBJFILES)
|
||||
ifdef RANLIB
|
||||
$(RANLIB) $@
|
||||
endif
|
||||
|
||||
$(OBJDIR)/%$(OBJSUF): $(SRCDIR)/%.c $(HFILES) $(LIBHFILES) Makefile
|
||||
if [ ! -d $(OBJDIR) ]; then mkdir $(OBJDIR); fi
|
||||
$(CC) $(CFLAGS) $(OTHERCFLAGS) -o $@ -c $<
|
||||
|
||||
$(BINDIR)/$(PROJECT): $(APPDIR)/tidy.c $(HFILES) $(LIBRARY)
|
||||
if [ ! -d $(BINDIR) ]; then mkdir $(BINDIR); fi
|
||||
$(CC) $(CFLAGS) $(OTHERCFLAGS) -o $@ $(APPDIR)/tidy.c -I$(INCDIR) $(LIBRARY)
|
||||
|
||||
$(BINDIR)/tab2space: $(APPDIR)/tab2space.c
|
||||
if [ ! -d $(BINDIR) ]; then mkdir $(BINDIR); fi
|
||||
$(CC) $(CFLAGS) $(OTHERCFLAGS) -o $@ $(APPDIR)/tab2space.c $(LIBS)
|
||||
|
||||
$(HELPXML): $(BINDIR)/$(PROJECT)
|
||||
$(BINDIR)/$(PROJECT) -xml-help > $@
|
||||
|
||||
$(CONFIGXML): $(BINDIR)/$(PROJECT)
|
||||
$(BINDIR)/$(PROJECT) -xml-config > $@
|
||||
|
||||
$(DOCDIR)/quickref.html: $(DOCDIR)/quickref-html.xsl $(CONFIGXML)
|
||||
$(XSLTPROC) -o $@ $(DOCDIR)/quickref-html.xsl $(CONFIGXML)
|
||||
|
||||
$(DOCDIR)/tidy.1: $(DOCDIR)/tidy1.xsl $(HELPXML) $(CONFIGXML)
|
||||
$(XSLTPROC) -o $@ $(DOCDIR)/tidy1.xsl $(HELPXML)
|
||||
|
||||
debug:
|
||||
@$(MAKE) CFLAGS='$(CFLAGS) $(DEBUGFLAGS)' LIBS='$(LIBS) $(DEBUGLIBS)' all
|
||||
|
||||
clean:
|
||||
rm -f $(OBJFILES) $(EXES) $(LIBRARY) $(DOCS) $(HELPXML) $(CONFIGXML) $(OBJDIR)/*.lo
|
||||
if [ -d $(OBJDIR)/.libs ]; then rmdir $(OBJDIR)/.libs; fi
|
||||
if [ -d $(LIBDIR)/.libs ]; then rmdir $(LIBDIR)/.libs; fi
|
||||
if [ "$(OBJDIR)" != "$(TOPDIR)" -a -d $(OBJDIR) ]; then rmdir $(OBJDIR); fi
|
||||
if [ "$(LIBDIR)" != "$(TOPDIR)" -a -d $(LIBDIR) ]; then rmdir $(LIBDIR); fi
|
||||
if [ "$(BINDIR)" != "$(TOPDIR)" -a -d $(BINDIR) ]; then rmdir $(BINDIR); fi
|
||||
|
||||
installhdrs: $(HFILES)
|
||||
if [ ! -d "$(incinst)" ]; then mkdir -p "$(incinst)"; fi
|
||||
cp -f $(HFILES) "$(incinst)/"
|
||||
|
||||
installib: $(LIBRARY)
|
||||
if [ ! -d "$(libinst)" ]; then mkdir -p "$(libinst)"; fi
|
||||
cp -f $(LIBRARY) "$(libinst)/"
|
||||
|
||||
installexes: $(EXES)
|
||||
if [ ! -d "$(bininst)" ]; then mkdir -p "$(bininst)"; fi
|
||||
cp -f $(EXES) "$(bininst)/"
|
||||
|
||||
installmanpage: $(DOCDIR)/tidy.1
|
||||
if [ ! -d "$(maninst)/man1" ]; then mkdir -p "$(maninst)/man1"; fi;
|
||||
cp -f $(DOCDIR)/tidy.1 "$(maninst)/man1/tidy.1";
|
||||
|
||||
install: installhdrs installib installexes installmanpage
|
16
build/gmake/readme.txt
Normal file
16
build/gmake/readme.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
This Makefile works on most Unix platforms. Although, by default, it
|
||||
runs gcc, by setting the CC macro, it runs with many C compilers.
|
||||
|
||||
You can override the default build options by setting environment
|
||||
variables of the same name as the corresponding macro: DMALLOC,
|
||||
SUPPORT_ACCESSIBILITY_CHECKS, SUPPORT_UTF16_ENCODINGS and
|
||||
SUPPORT_ASIAN_ENCODINGS.
|
||||
|
||||
$ DMALLOC=1 gmake
|
||||
|
||||
Note this Makefile will only run with gmake. But you should be able
|
||||
to easily locate a pre-built executable for your platform.
|
||||
|
||||
To customize the location of output files or install locations, just
|
||||
edit the Makefile. There are variable definitions for just about
|
||||
everything, so you shouldn't have to alter the build rules.
|
63
build/gnuauto/Makefile.am
Normal file
63
build/gnuauto/Makefile.am
Normal file
|
@ -0,0 +1,63 @@
|
|||
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: creitzel $
|
||||
# $Date: 2003/03/19 18:37:37 $
|
||||
# $Revision: 1.3 $
|
||||
#
|
||||
# Copyright (c) 1998-2003 World Wide Web Consortium
|
||||
# (Massachusetts Institute of Technology, European Research
|
||||
# Consortium for Informatics and Mathematics, Keio University).
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Contributing Author(s):
|
||||
#
|
||||
# Dave Raggett <dsr@w3.org>
|
||||
# Terry Teague <terry_teague@users.sourceforge.net>
|
||||
# Pradeep Padala<ppadala@users.sourceforge.net>
|
||||
#
|
||||
# The contributing author(s) would like to thank all those who
|
||||
# helped with testing, bug fixes, and patience. This wouldn't
|
||||
# have been possible without all of you.
|
||||
#
|
||||
# COPYRIGHT NOTICE:
|
||||
#
|
||||
# This software and documentation is provided "as is," and
|
||||
# the copyright holders and contributing author(s) make no
|
||||
# representations or warranties, express or implied, including
|
||||
# but not limited to, warranties of merchantability or fitness
|
||||
# for any particular purpose or that the use of the software or
|
||||
# documentation will not infringe any third party patents,
|
||||
# copyrights, trademarks or other rights.
|
||||
#
|
||||
# The copyright holders and contributing author(s) will not be
|
||||
# liable for any direct, indirect, special or consequential damages
|
||||
# arising out of any use of the software or documentation, even if
|
||||
# advised of the possibility of such damage.
|
||||
#
|
||||
# Permission is hereby granted to use, copy, modify, and distribute
|
||||
# this source code, or portions hereof, documentation and executables,
|
||||
# for any purpose, without fee, subject to the following restrictions:
|
||||
#
|
||||
# 1. The origin of this source code must not be misrepresented.
|
||||
# 2. Altered versions must be plainly marked as such and must
|
||||
# not be misrepresented as being the original source.
|
||||
# 3. This Copyright notice may not be removed or altered from any
|
||||
# source or altered source distribution.
|
||||
#
|
||||
# The copyright holders and contributing author(s) specifically
|
||||
# permit, without fee, and encourage the use of this source code
|
||||
# as a component for supporting the Hypertext Markup Language in
|
||||
# commercial products. If you use this source code in a product,
|
||||
# acknowledgment is not required but would be appreciated.
|
||||
#
|
||||
|
||||
SUBDIRS = src console include
|
||||
|
||||
#TODO: Pull man page from htmldoc
|
||||
#installmanpage:
|
||||
# if [ -f "$(TOPDIR)/htmldoc/man_page.txt" ] ; then \
|
||||
# if [ ! -d "$(maninst)/man1" ]; then mkdir -p "$(maninst)/man1"; fi; \
|
||||
# cp -f $(TOPDIR)/htmldoc/man_page.txt "$(maninst)/man1/tidy.1"; \
|
||||
# fi
|
133
build/gnuauto/configure.in
Normal file
133
build/gnuauto/configure.in
Normal file
|
@ -0,0 +1,133 @@
|
|||
# configure.in - HTML TidyLib GNU autoconf input file
|
||||
#
|
||||
# Copyright (c) 2003-2004 World Wide Web Consortium
|
||||
# (Massachusetts Institute of Technology, European Research
|
||||
# Consortium for Informatics and Mathematics, Keio University).
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: arnaud02 $
|
||||
# $Date: 2008/03/24 21:08:16 $
|
||||
# $Revision: 1.4 $
|
||||
#
|
||||
|
||||
AC_INIT([include/tidy.h])
|
||||
|
||||
# Making releases:
|
||||
#
|
||||
# TIDY_MICRO_VERSION += 1;
|
||||
# TIDY_INTERFACE_AGE += 1;
|
||||
# TIDY_BINARY_AGE += 1;
|
||||
#
|
||||
# if any functions have been added, set TIDY_INTERFACE_AGE to 0.
|
||||
# if backwards compatibility has been broken,
|
||||
# set TIDY_BINARY_AGE and TIDY_INTERFACE_AGE to 0.
|
||||
#
|
||||
TIDY_MAJOR_VERSION=0
|
||||
TIDY_MINOR_VERSION=99
|
||||
TIDY_MICRO_VERSION=0
|
||||
TIDY_INTERFACE_AGE=0
|
||||
TIDY_BINARY_AGE=0
|
||||
|
||||
LIBTIDY_VERSION=$TIDY_MAJOR_VERSION.$TIDY_MINOR_VERSION.$TIDY_MICRO_VERSION
|
||||
|
||||
AC_SUBST(LIBTIDY_VERSION)
|
||||
|
||||
# libtool versioning
|
||||
#
|
||||
LT_RELEASE=$TIDY_MAJOR_VERSION.$TIDY_MINOR_VERSION
|
||||
LT_CURRENT=`expr $TIDY_MICRO_VERSION - $TIDY_INTERFACE_AGE`
|
||||
LT_REVISION=$TIDY_INTERFACE_AGE
|
||||
LT_AGE=`expr $TIDY_BINARY_AGE - $TIDY_INTERFACE_AGE`
|
||||
|
||||
AC_SUBST(LT_RELEASE)
|
||||
AC_SUBST(LT_CURRENT)
|
||||
AC_SUBST(LT_REVISION)
|
||||
AC_SUBST(LT_AGE)
|
||||
|
||||
AM_INIT_AUTOMAKE(tidy,$LIBTIDY_VERSION)
|
||||
|
||||
# Checks for programs.
|
||||
|
||||
# =============================================
|
||||
# AC_PROG_CC has a habit of adding -g to CFLAGS
|
||||
#
|
||||
save_cflags="$CFLAGS"
|
||||
|
||||
AC_PROG_CC
|
||||
if test "x$GCC" = "xyes"; then
|
||||
WARNING_CFLAGS="-Wall"
|
||||
else
|
||||
WARNING_CFLAGS=""
|
||||
fi
|
||||
AC_SUBST(WARNING_CFLAGS)
|
||||
|
||||
debug_build=no
|
||||
AC_ARG_ENABLE(debug,[ --enable-debug add -g (instead of -O2) to CFLAGS],[
|
||||
if test "x$enableval" = "xyes"; then
|
||||
debug_build=yes
|
||||
fi
|
||||
])
|
||||
if test $debug_build = yes; then
|
||||
CFLAGS="$save_cflags -g"
|
||||
else
|
||||
CFLAGS="-O2 $save_cflags"
|
||||
fi
|
||||
#
|
||||
# =============================================
|
||||
|
||||
AC_PROG_CPP
|
||||
AC_PROG_CXX
|
||||
AC_PROG_INSTALL
|
||||
AC_PROG_LN_S
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PROG_MAKE_SET
|
||||
|
||||
support_access=yes
|
||||
AC_ARG_ENABLE(access,[ --enable-access support accessibility checks],[
|
||||
if test "x$enableval" = "xno"; then
|
||||
support_access=no
|
||||
fi
|
||||
])
|
||||
if test $support_access = yes; then
|
||||
AC_DEFINE(SUPPORT_ACCESSIBILITY_CHECKS,1)
|
||||
else
|
||||
AC_DEFINE(SUPPORT_ACCESSIBILITY_CHECKS,0)
|
||||
fi
|
||||
|
||||
support_utf16=yes
|
||||
AC_ARG_ENABLE(utf16,[ --enable-utf16 support UTF-16 encoding],[
|
||||
if test "x$enableval" = "xno"; then
|
||||
support_utf16=no
|
||||
fi
|
||||
])
|
||||
if test $support_utf16 = yes; then
|
||||
AC_DEFINE(SUPPORT_UTF16_ENCODINGS,1)
|
||||
else
|
||||
AC_DEFINE(SUPPORT_UTF16_ENCODINGS,0)
|
||||
fi
|
||||
|
||||
support_asian=yes
|
||||
AC_ARG_ENABLE(asian,[ --enable-asian support asian encodings],[
|
||||
if test "x$enableval" = "xno"; then
|
||||
support_asian=no
|
||||
fi
|
||||
])
|
||||
if test $support_asian = yes; then
|
||||
AC_DEFINE(SUPPORT_ASIAN_ENCODINGS,1)
|
||||
else
|
||||
AC_DEFINE(SUPPORT_ASIAN_ENCODINGS,0)
|
||||
fi
|
||||
|
||||
# TODO: this defines "WITH_DMALLOC" but tidy expects "DMALLOC"
|
||||
# need to do: #if defined(DMALLOC) || defined(WITH_DMALLOC)
|
||||
#
|
||||
AM_WITH_DMALLOC
|
||||
|
||||
AC_OUTPUT([
|
||||
Makefile
|
||||
src/Makefile
|
||||
console/Makefile
|
||||
include/Makefile
|
||||
])
|
64
build/gnuauto/console/Makefile.am
Normal file
64
build/gnuauto/console/Makefile.am
Normal file
|
@ -0,0 +1,64 @@
|
|||
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: arnaud02 $
|
||||
# $Date: 2008/03/17 12:49:40 $
|
||||
# $Revision: 1.3 $
|
||||
#
|
||||
# Copyright (c) 1998-2008 World Wide Web Consortium
|
||||
# (Massachusetts Institute of Technology, European Research
|
||||
# Consortium for Informatics and Mathematics, Keio University).
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Contributing Author(s):
|
||||
#
|
||||
# Dave Raggett <dsr@w3.org>
|
||||
# Terry Teague <terry_teague@users.sourceforge.net>
|
||||
# Pradeep Padala<ppadala@users.sourceforge.net>
|
||||
#
|
||||
# The contributing author(s) would like to thank all those who
|
||||
# helped with testing, bug fixes, and patience. This wouldn't
|
||||
# have been possible without all of you.
|
||||
#
|
||||
# COPYRIGHT NOTICE:
|
||||
#
|
||||
# This software and documentation is provided "as is," and
|
||||
# the copyright holders and contributing author(s) make no
|
||||
# representations or warranties, express or implied, including
|
||||
# but not limited to, warranties of merchantability or fitness
|
||||
# for any particular purpose or that the use of the software or
|
||||
# documentation will not infringe any third party patents,
|
||||
# copyrights, trademarks or other rights.
|
||||
#
|
||||
# The copyright holders and contributing author(s) will not be
|
||||
# liable for any direct, indirect, special or consequential damages
|
||||
# arising out of any use of the software or documentation, even if
|
||||
# advised of the possibility of such damage.
|
||||
#
|
||||
# Permission is hereby granted to use, copy, modify, and distribute
|
||||
# this source code, or portions hereof, documentation and executables,
|
||||
# for any purpose, without fee, subject to the following restrictions:
|
||||
#
|
||||
# 1. The origin of this source code must not be misrepresented.
|
||||
# 2. Altered versions must be plainly marked as such and must
|
||||
# not be misrepresented as being the original source.
|
||||
# 3. This Copyright notice may not be removed or altered from any
|
||||
# source or altered source distribution.
|
||||
#
|
||||
# The copyright holders and contributing author(s) specifically
|
||||
# permit, without fee, and encourage the use of this source code
|
||||
# as a component for supporting the Hypertext Markup Language in
|
||||
# commercial products. If you use this source code in a product,
|
||||
# acknowledgment is not required but would be appreciated.
|
||||
#
|
||||
|
||||
AM_CFLAGS = @CFLAGS@ @WARNING_CFLAGS@
|
||||
|
||||
INCLUDES = -I$(top_srcdir)/include
|
||||
|
||||
bin_PROGRAMS = tidy tab2space
|
||||
|
||||
tidy_LDADD = $(top_builddir)/src/libtidy.la
|
||||
|
||||
tab2space_LDADD = $(top_builddir)/src/libtidy.la
|
61
build/gnuauto/include/Makefile.am
Normal file
61
build/gnuauto/include/Makefile.am
Normal file
|
@ -0,0 +1,61 @@
|
|||
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: arnaud02 $
|
||||
# $Date: 2006/10/06 09:25:13 $
|
||||
# $Revision: 1.3 $
|
||||
#
|
||||
# Copyright (c) 1998-2006 World Wide Web Consortium
|
||||
# (Massachusetts Institute of Technology, European Research
|
||||
# Consortium for Informatics and Mathematics, Keio University).
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Contributing Author(s):
|
||||
#
|
||||
# Dave Raggett <dsr@w3.org>
|
||||
# Terry Teague <terry_teague@users.sourceforge.net>
|
||||
# Pradeep Padala<ppadala@users.sourceforge.net>
|
||||
#
|
||||
# The contributing author(s) would like to thank all those who
|
||||
# helped with testing, bug fixes, and patience. This wouldn't
|
||||
# have been possible without all of you.
|
||||
#
|
||||
# COPYRIGHT NOTICE:
|
||||
#
|
||||
# This software and documentation is provided "as is," and
|
||||
# the copyright holders and contributing author(s) make no
|
||||
# representations or warranties, express or implied, including
|
||||
# but not limited to, warranties of merchantability or fitness
|
||||
# for any particular purpose or that the use of the software or
|
||||
# documentation will not infringe any third party patents,
|
||||
# copyrights, trademarks or other rights.
|
||||
#
|
||||
# The copyright holders and contributing author(s) will not be
|
||||
# liable for any direct, indirect, special or consequential damages
|
||||
# arising out of any use of the software or documentation, even if
|
||||
# advised of the possibility of such damage.
|
||||
#
|
||||
# Permission is hereby granted to use, copy, modify, and distribute
|
||||
# this source code, or portions hereof, documentation and executables,
|
||||
# for any purpose, without fee, subject to the following restrictions:
|
||||
#
|
||||
# 1. The origin of this source code must not be misrepresented.
|
||||
# 2. Altered versions must be plainly marked as such and must
|
||||
# not be misrepresented as being the original source.
|
||||
# 3. This Copyright notice may not be removed or altered from any
|
||||
# source or altered source distribution.
|
||||
#
|
||||
# The copyright holders and contributing author(s) specifically
|
||||
# permit, without fee, and encourage the use of this source code
|
||||
# as a component for supporting the Hypertext Markup Language in
|
||||
# commercial products. If you use this source code in a product,
|
||||
# acknowledgment is not required but would be appreciated.
|
||||
#
|
||||
|
||||
#tidyincdir = $(includedir)/tidy
|
||||
tidyincdir = $(includedir)
|
||||
|
||||
tidyinc_HEADERS = \
|
||||
platform.h \
|
||||
tidy.h tidyenum.h buffio.h
|
24
build/gnuauto/readme.txt
Normal file
24
build/gnuauto/readme.txt
Normal file
|
@ -0,0 +1,24 @@
|
|||
To use GNU "Auto" tools (AutoConf/AutoMake/LibTool), run
|
||||
/bin/sh build/gnuauto/setup.sh from the top-level Tidy
|
||||
directory. This script will copy the appropriate
|
||||
Makefile.am files into each source directory, along with
|
||||
configure.in.
|
||||
|
||||
If the script was successful you should now be able
|
||||
to build in the usual way:
|
||||
|
||||
$ ./configure --prefix=/usr
|
||||
$ make
|
||||
$ make install
|
||||
|
||||
to get a list of configure options type: ./configure --help
|
||||
|
||||
Alternatively, you should be able to build outside of the source
|
||||
tree. e.g.:
|
||||
|
||||
$ mkdir ../build-tidy
|
||||
$ cd ../build-tidy
|
||||
$ ../tidy/configure --prefix=/usr
|
||||
$ make
|
||||
$ make install
|
||||
|
56
build/gnuauto/setup.sh
Normal file
56
build/gnuauto/setup.sh
Normal file
|
@ -0,0 +1,56 @@
|
|||
#!/bin/sh
|
||||
|
||||
if ! test -f build/gnuauto/setup.sh; then
|
||||
|
||||
echo ""
|
||||
echo "* * * Execute this script from the top source directory, e.g.:"
|
||||
echo ""
|
||||
echo " $ /bin/sh build/gnuauto/setup.sh"
|
||||
echo ""
|
||||
|
||||
else
|
||||
|
||||
for i in libtoolize glibtoolize
|
||||
do
|
||||
( $i --version) < /dev/null > /dev/null 2>&1 &&
|
||||
LIBTOOLIZE=$i
|
||||
done
|
||||
if test -z "$LIBTOOLIZE" ; then
|
||||
echo "You need libtoolize to continue"
|
||||
exit 1;
|
||||
fi
|
||||
top_srcdir=`pwd`
|
||||
echo ""
|
||||
echo "Generating the build system in $top_srcdir"
|
||||
echo ""
|
||||
echo "copying files into place: cd build/gnuauto && cp -R -f * $top_srcdir"
|
||||
(cd build/gnuauto && cp -R -f * $top_srcdir)
|
||||
echo "running: $LIBTOOLIZE --force --copy"
|
||||
$LIBTOOLIZE --force --copy
|
||||
echo "running: aclocal"
|
||||
aclocal
|
||||
echo "running: automake -a -c --foreign"
|
||||
automake -a -c --foreign
|
||||
echo "running: autoconf"
|
||||
autoconf
|
||||
echo ""
|
||||
echo "If the above commands were successful you should now be able"
|
||||
echo "to build in the usual way:"
|
||||
echo ""
|
||||
echo " $ ./configure --prefix=/usr"
|
||||
echo " $ make"
|
||||
echo " $ make install"
|
||||
echo ""
|
||||
echo "to get a list of configure options type: ./configure --help"
|
||||
echo ""
|
||||
echo "Alternatively, you should be able to build outside of the source"
|
||||
echo "tree. e.g.:"
|
||||
echo ""
|
||||
echo " $ mkdir ../build-tidy"
|
||||
echo " $ cd ../build-tidy"
|
||||
echo " $ ../tidy/configure --prefix=/usr"
|
||||
echo " $ make"
|
||||
echo " $ make install"
|
||||
echo ""
|
||||
|
||||
fi
|
81
build/gnuauto/src/Makefile.am
Normal file
81
build/gnuauto/src/Makefile.am
Normal file
|
@ -0,0 +1,81 @@
|
|||
# Makefile [Makefile.am] - for tidy - HTML parser and pretty printer
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: arnaud02 $
|
||||
# $Date: 2008/03/17 12:49:41 $
|
||||
# $Revision: 1.8 $
|
||||
#
|
||||
# Copyright (c) 1998-2008 World Wide Web Consortium
|
||||
# (Massachusetts Institute of Technology, European Research
|
||||
# Consortium for Informatics and Mathematics, Keio University).
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Contributing Author(s):
|
||||
#
|
||||
# Dave Raggett <dsr@w3.org>
|
||||
# Terry Teague <terry_teague@users.sourceforge.net>
|
||||
# Pradeep Padala<ppadala@users.sourceforge.net>
|
||||
#
|
||||
# The contributing author(s) would like to thank all those who
|
||||
# helped with testing, bug fixes, and patience. This wouldn't
|
||||
# have been possible without all of you.
|
||||
#
|
||||
# COPYRIGHT NOTICE:
|
||||
#
|
||||
# This software and documentation is provided "as is," and
|
||||
# the copyright holders and contributing author(s) make no
|
||||
# representations or warranties, express or implied, including
|
||||
# but not limited to, warranties of merchantability or fitness
|
||||
# for any particular purpose or that the use of the software or
|
||||
# documentation will not infringe any third party patents,
|
||||
# copyrights, trademarks or other rights.
|
||||
#
|
||||
# The copyright holders and contributing author(s) will not be
|
||||
# liable for any direct, indirect, special or consequential damages
|
||||
# arising out of any use of the software or documentation, even if
|
||||
# advised of the possibility of such damage.
|
||||
#
|
||||
# Permission is hereby granted to use, copy, modify, and distribute
|
||||
# this source code, or portions hereof, documentation and executables,
|
||||
# for any purpose, without fee, subject to the following restrictions:
|
||||
#
|
||||
# 1. The origin of this source code must not be misrepresented.
|
||||
# 2. Altered versions must be plainly marked as such and must
|
||||
# not be misrepresented as being the original source.
|
||||
# 3. This Copyright notice may not be removed or altered from any
|
||||
# source or altered source distribution.
|
||||
#
|
||||
# The copyright holders and contributing author(s) specifically
|
||||
# permit, without fee, and encourage the use of this source code
|
||||
# as a component for supporting the Hypertext Markup Language in
|
||||
# commercial products. If you use this source code in a product,
|
||||
# acknowledgment is not required but would be appreciated.
|
||||
#
|
||||
|
||||
AM_CFLAGS = @CFLAGS@ @WARNING_CFLAGS@
|
||||
|
||||
INCLUDES = -I$(top_srcdir)/include
|
||||
|
||||
lib_LTLIBRARIES = libtidy.la
|
||||
|
||||
libtidy_la_SOURCES = \
|
||||
access.c attrs.c istack.c parser.c \
|
||||
tags.c entities.c lexer.c pprint.c \
|
||||
clean.c localize.c config.c alloc.c \
|
||||
attrask.c attrdict.c attrget.c buffio.c \
|
||||
fileio.c streamio.c tagask.c tmbstr.c \
|
||||
utf8.c tidylib.c mappedio.c
|
||||
|
||||
libtidy_la_LDFLAGS = \
|
||||
-version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) \
|
||||
-release $(LT_RELEASE) -no-undefined -export-dynamic
|
||||
|
||||
HFILES = \
|
||||
access.h attrdict.h attrs.h clean.h \
|
||||
config.h entities.h fileio.h forward.h \
|
||||
lexer.h mappedio.h message.h parser.h \
|
||||
pprint.h streamio.h tags.h tmbstr.h \
|
||||
utf8.h tidy-int.h version.h
|
||||
|
||||
EXTRA_DIST = $(HFILES)
|
304
build/msvc/tidy.def
Executable file
304
build/msvc/tidy.def
Executable file
|
@ -0,0 +1,304 @@
|
|||
LIBRARY libtidy
|
||||
EXPORTS
|
||||
tidyCreate @1001
|
||||
tidyRelease @1002
|
||||
tidySetAppData @1003
|
||||
tidyGetAppData @1004
|
||||
tidyReleaseDate @1005
|
||||
tidyStatus @1006
|
||||
tidyDetectedHtmlVersion @1007
|
||||
tidyDetectedXhtml @1008
|
||||
tidyDetectedGenericXml @1009
|
||||
tidyErrorCount @1010
|
||||
tidyWarningCount @1011
|
||||
tidyAccessWarningCount @1012
|
||||
tidyConfigErrorCount @1013
|
||||
tidyLoadConfig @1014
|
||||
tidyLoadConfigEnc @1015
|
||||
tidyFileExists @1016
|
||||
tidySetCharEncoding @1017
|
||||
tidySetInCharEncoding @1018
|
||||
tidySetOutCharEncoding @1019
|
||||
tidySetOptionCallback @1020
|
||||
tidyOptGetIdForName @1021
|
||||
tidyGetOptionList @1022
|
||||
tidyGetNextOption @1023
|
||||
tidyGetOption @1024
|
||||
tidyGetOptionByName @1025
|
||||
tidyOptGetId @1026
|
||||
tidyOptGetName @1027
|
||||
tidyOptGetType @1028
|
||||
tidyOptIsReadOnly @1029
|
||||
tidyOptGetCategory @1030
|
||||
tidyOptGetDefault @1031
|
||||
tidyOptGetDefaultInt @1032
|
||||
tidyOptGetDefaultBool @1033
|
||||
tidyOptGetPickList @1034
|
||||
tidyOptGetNextPick @1035
|
||||
tidyOptGetValue @1036
|
||||
tidyOptSetValue @1037
|
||||
tidyOptParseValue @1038
|
||||
tidyOptGetInt @1039
|
||||
tidyOptSetInt @1040
|
||||
tidyOptGetBool @1041
|
||||
tidyOptSetBool @1042
|
||||
tidyOptResetToDefault @1043
|
||||
tidyOptResetAllToDefault @1044
|
||||
tidyOptSnapshot @1045
|
||||
tidyOptResetToSnapshot @1046
|
||||
tidyOptDiffThanDefault @1047
|
||||
tidyOptDiffThanSnapshot @1048
|
||||
tidyOptCopyConfig @1049
|
||||
tidyOptGetEncName @1050
|
||||
tidyOptGetCurrPick @1051
|
||||
tidyOptGetDeclTagList @1052
|
||||
tidyOptGetNextDeclTag @1053
|
||||
tidyOptGetDoc @1054
|
||||
tidyOptGetDocLinksList @1055
|
||||
tidyOptGetNextDocLinks @1056
|
||||
tidyInitSource @1057
|
||||
tidyGetByte @1058
|
||||
tidyUngetByte @1059
|
||||
tidyIsEOF @1060
|
||||
tidyInitSink @1061
|
||||
tidyPutByte @1062
|
||||
tidySetReportFilter @1063
|
||||
tidySetErrorFile @1064
|
||||
tidySetErrorBuffer @1065
|
||||
tidySetErrorSink @1066
|
||||
tidySetMallocCall @1067
|
||||
tidySetReallocCall @1068
|
||||
tidySetFreeCall @1069
|
||||
tidySetPanicCall @1070
|
||||
tidyParseFile @1071
|
||||
tidyParseStdin @1072
|
||||
tidyParseString @1073
|
||||
tidyParseBuffer @1074
|
||||
tidyParseSource @1075
|
||||
tidyCleanAndRepair @1076
|
||||
tidyRunDiagnostics @1077
|
||||
tidySaveFile @1078
|
||||
tidySaveStdout @1079
|
||||
tidySaveBuffer @1080
|
||||
tidySaveString @1081
|
||||
tidySaveSink @1082
|
||||
tidyOptSaveFile @1083
|
||||
tidyOptSaveSink @1084
|
||||
tidyErrorSummary @1085
|
||||
tidyGeneralInfo @1086
|
||||
tidyGetRoot @1087
|
||||
tidyGetHtml @1088
|
||||
tidyGetHead @1089
|
||||
tidyGetBody @1090
|
||||
tidyGetParent @1091
|
||||
tidyGetChild @1092
|
||||
tidyGetNext @1093
|
||||
tidyGetPrev @1094
|
||||
tidyAttrFirst @1095
|
||||
tidyAttrNext @1096
|
||||
tidyAttrName @1097
|
||||
tidyAttrValue @1098
|
||||
tidyNodeGetType @1099
|
||||
tidyNodeGetName @1100
|
||||
tidyNodeIsText @1101
|
||||
tidyNodeIsProp @1102
|
||||
tidyNodeIsHeader @1103
|
||||
tidyNodeHasText @1104
|
||||
tidyNodeGetText @1105
|
||||
tidyNodeGetId @1106
|
||||
tidyNodeLine @1107
|
||||
tidyNodeColumn @1108
|
||||
tidyNodeIsHTML @1109
|
||||
tidyNodeIsHEAD @1110
|
||||
tidyNodeIsTITLE @1111
|
||||
tidyNodeIsBASE @1112
|
||||
tidyNodeIsMETA @1113
|
||||
tidyNodeIsBODY @1114
|
||||
tidyNodeIsFRAMESET @1115
|
||||
tidyNodeIsFRAME @1116
|
||||
tidyNodeIsIFRAME @1117
|
||||
tidyNodeIsNOFRAMES @1118
|
||||
tidyNodeIsHR @1119
|
||||
tidyNodeIsH1 @1120
|
||||
tidyNodeIsH2 @1121
|
||||
tidyNodeIsPRE @1122
|
||||
tidyNodeIsLISTING @1123
|
||||
tidyNodeIsP @1124
|
||||
tidyNodeIsUL @1125
|
||||
tidyNodeIsOL @1126
|
||||
tidyNodeIsDL @1127
|
||||
tidyNodeIsDIR @1128
|
||||
tidyNodeIsLI @1129
|
||||
tidyNodeIsDT @1130
|
||||
tidyNodeIsDD @1131
|
||||
tidyNodeIsTABLE @1132
|
||||
tidyNodeIsCAPTION @1133
|
||||
tidyNodeIsTD @1134
|
||||
tidyNodeIsTH @1135
|
||||
tidyNodeIsTR @1136
|
||||
tidyNodeIsCOL @1137
|
||||
tidyNodeIsCOLGROUP @1138
|
||||
tidyNodeIsBR @1139
|
||||
tidyNodeIsA @1140
|
||||
tidyNodeIsLINK @1141
|
||||
tidyNodeIsB @1142
|
||||
tidyNodeIsI @1143
|
||||
tidyNodeIsSTRONG @1144
|
||||
tidyNodeIsEM @1145
|
||||
tidyNodeIsBIG @1146
|
||||
tidyNodeIsSMALL @1147
|
||||
tidyNodeIsPARAM @1148
|
||||
tidyNodeIsOPTION @1149
|
||||
tidyNodeIsOPTGROUP @1150
|
||||
tidyNodeIsIMG @1151
|
||||
tidyNodeIsMAP @1152
|
||||
tidyNodeIsAREA @1153
|
||||
tidyNodeIsNOBR @1154
|
||||
tidyNodeIsWBR @1155
|
||||
tidyNodeIsFONT @1156
|
||||
tidyNodeIsLAYER @1157
|
||||
tidyNodeIsSPACER @1158
|
||||
tidyNodeIsCENTER @1159
|
||||
tidyNodeIsSTYLE @1160
|
||||
tidyNodeIsSCRIPT @1161
|
||||
tidyNodeIsNOSCRIPT @1162
|
||||
tidyNodeIsFORM @1163
|
||||
tidyNodeIsTEXTAREA @1164
|
||||
tidyNodeIsBLOCKQUOTE @1165
|
||||
tidyNodeIsAPPLET @1166
|
||||
tidyNodeIsOBJECT @1167
|
||||
tidyNodeIsDIV @1168
|
||||
tidyNodeIsSPAN @1169
|
||||
tidyNodeIsINPUT @1170
|
||||
tidyNodeIsQ @1171
|
||||
tidyNodeIsLABEL @1172
|
||||
tidyNodeIsH3 @1173
|
||||
tidyNodeIsH4 @1174
|
||||
tidyNodeIsH5 @1175
|
||||
tidyNodeIsH6 @1176
|
||||
tidyNodeIsADDRESS @1177
|
||||
tidyNodeIsXMP @1178
|
||||
tidyNodeIsSELECT @1179
|
||||
tidyNodeIsBLINK @1180
|
||||
tidyNodeIsMARQUEE @1181
|
||||
tidyNodeIsEMBED @1182
|
||||
tidyNodeIsBASEFONT @1183
|
||||
tidyNodeIsISINDEX @1184
|
||||
tidyNodeIsS @1185
|
||||
tidyNodeIsSTRIKE @1186
|
||||
tidyNodeIsU @1187
|
||||
tidyNodeIsMENU @1188
|
||||
tidyAttrGetId @1189
|
||||
tidyAttrIsEvent @1190
|
||||
tidyAttrIsProp @1191
|
||||
tidyAttrIsHREF @1192
|
||||
tidyAttrIsSRC @1193
|
||||
tidyAttrIsID @1194
|
||||
tidyAttrIsNAME @1195
|
||||
tidyAttrIsSUMMARY @1196
|
||||
tidyAttrIsALT @1197
|
||||
tidyAttrIsLONGDESC @1198
|
||||
tidyAttrIsUSEMAP @1199
|
||||
tidyAttrIsISMAP @1200
|
||||
tidyAttrIsLANGUAGE @1201
|
||||
tidyAttrIsTYPE @1202
|
||||
tidyAttrIsVALUE @1203
|
||||
tidyAttrIsCONTENT @1204
|
||||
tidyAttrIsTITLE @1205
|
||||
tidyAttrIsXMLNS @1206
|
||||
tidyAttrIsDATAFLD @1207
|
||||
tidyAttrIsWIDTH @1208
|
||||
tidyAttrIsHEIGHT @1209
|
||||
tidyAttrIsFOR @1210
|
||||
tidyAttrIsSELECTED @1211
|
||||
tidyAttrIsCHECKED @1212
|
||||
tidyAttrIsLANG @1213
|
||||
tidyAttrIsTARGET @1214
|
||||
tidyAttrIsHTTP_EQUIV @1215
|
||||
tidyAttrIsREL @1216
|
||||
tidyAttrIsOnMOUSEMOVE @1217
|
||||
tidyAttrIsOnMOUSEDOWN @1218
|
||||
tidyAttrIsOnMOUSEUP @1219
|
||||
tidyAttrIsOnCLICK @1220
|
||||
tidyAttrIsOnMOUSEOVER @1221
|
||||
tidyAttrIsOnMOUSEOUT @1222
|
||||
tidyAttrIsOnKEYDOWN @1223
|
||||
tidyAttrIsOnKEYUP @1224
|
||||
tidyAttrIsOnKEYPRESS @1225
|
||||
tidyAttrIsOnFOCUS @1226
|
||||
tidyAttrIsOnBLUR @1227
|
||||
tidyAttrIsBGCOLOR @1228
|
||||
tidyAttrIsLINK @1229
|
||||
tidyAttrIsALINK @1230
|
||||
tidyAttrIsVLINK @1231
|
||||
tidyAttrIsTEXT @1232
|
||||
tidyAttrIsSTYLE @1233
|
||||
tidyAttrIsABBR @1234
|
||||
tidyAttrIsCOLSPAN @1235
|
||||
tidyAttrIsROWSPAN @1236
|
||||
tidyAttrGetById @1237
|
||||
tidyAttrGetHREF @1238
|
||||
tidyAttrGetSRC @1239
|
||||
tidyAttrGetID @1240
|
||||
tidyAttrGetNAME @1241
|
||||
tidyAttrGetSUMMARY @1242
|
||||
tidyAttrGetALT @1243
|
||||
tidyAttrGetLONGDESC @1244
|
||||
tidyAttrGetUSEMAP @1245
|
||||
tidyAttrGetISMAP @1246
|
||||
tidyAttrGetLANGUAGE @1247
|
||||
tidyAttrGetTYPE @1248
|
||||
tidyAttrGetVALUE @1249
|
||||
tidyAttrGetCONTENT @1250
|
||||
tidyAttrGetTITLE @1251
|
||||
tidyAttrGetXMLNS @1252
|
||||
tidyAttrGetDATAFLD @1253
|
||||
tidyAttrGetWIDTH @1254
|
||||
tidyAttrGetHEIGHT @1255
|
||||
tidyAttrGetFOR @1256
|
||||
tidyAttrGetSELECTED @1257
|
||||
tidyAttrGetCHECKED @1258
|
||||
tidyAttrGetLANG @1259
|
||||
tidyAttrGetTARGET @1260
|
||||
tidyAttrGetHTTP_EQUIV @1261
|
||||
tidyAttrGetREL @1262
|
||||
tidyAttrGetOnMOUSEMOVE @1263
|
||||
tidyAttrGetOnMOUSEDOWN @1264
|
||||
tidyAttrGetOnMOUSEUP @1265
|
||||
tidyAttrGetOnCLICK @1266
|
||||
tidyAttrGetOnMOUSEOVER @1267
|
||||
tidyAttrGetOnMOUSEOUT @1268
|
||||
tidyAttrGetOnKEYDOWN @1269
|
||||
tidyAttrGetOnKEYUP @1270
|
||||
tidyAttrGetOnKEYPRESS @1271
|
||||
tidyAttrGetOnFOCUS @1272
|
||||
tidyAttrGetOnBLUR @1273
|
||||
tidyAttrGetBGCOLOR @1274
|
||||
tidyAttrGetLINK @1275
|
||||
tidyAttrGetALINK @1276
|
||||
tidyAttrGetVLINK @1277
|
||||
tidyAttrGetTEXT @1278
|
||||
tidyAttrGetSTYLE @1279
|
||||
tidyAttrGetABBR @1280
|
||||
tidyAttrGetCOLSPAN @1281
|
||||
tidyAttrGetROWSPAN @1282
|
||||
tidyCreateWithAllocator @1283
|
||||
|
||||
tidyInitInputBuffer @2001
|
||||
tidyInitOutputBuffer @2002
|
||||
tidyBufInit @2003
|
||||
tidyBufAlloc @2004
|
||||
tidyBufCheckAlloc @2005
|
||||
tidyBufFree @2006
|
||||
tidyBufClear @2007
|
||||
tidyBufAttach @2008
|
||||
tidyBufDetach @2009
|
||||
tidyBufAppend @2010
|
||||
tidyBufPutByte @2011
|
||||
tidyBufPopByte @2012
|
||||
tidyBufGetByte @2013
|
||||
tidyBufEndOfInput @2014
|
||||
tidyBufUngetByte @2015
|
||||
tidyBufInitWithAllocator @2016
|
||||
tidyBufAllocWithAllocator @2017
|
||||
tidyNodeGetValue @2018
|
94
build/msvc/tidy.dsp
Normal file
94
build/msvc/tidy.dsp
Normal file
|
@ -0,0 +1,94 @@
|
|||
# Microsoft Developer Studio Project File - Name="tidy" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=tidy - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "tidy.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "tidy.mak" CFG="tidy - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "tidy - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "tidy - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "tidy - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /MT /Za /W3 /GX /O2 /I "..\..\include" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /D TIDYDLL_EXPORT=__declspec(dllimport) /D _CRT_SECURE_NO_DEPRECATE /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /profile /map /machine:I386
|
||||
|
||||
!ELSEIF "$(CFG)" == "tidy - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
|
||||
# ADD CPP /nologo /MTd /Za /W3 /Gm /GX /ZI /Od /I "..\..\include" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D "_MBCS" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /D TIDYDLL_EXPORT=__declspec(dllimport) /D _CRT_SECURE_NO_DEPRECATE /YX /FD /GZ /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "tidy - Win32 Release"
|
||||
# Name "tidy - Win32 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\console\tidy.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
56
build/msvc/tidy.dsw
Normal file
56
build/msvc/tidy.dsw
Normal file
|
@ -0,0 +1,56 @@
|
|||
Microsoft Developer Studio Workspace File, Format Version 6.00
|
||||
# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "tidy"=.\tidy.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
Begin Project Dependency
|
||||
Project_Dep_Name tidylib
|
||||
End Project Dependency
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "tidydll"=.\tidydll.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "tidylib"=.\tidylib.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Global:
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<3>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
296
build/msvc/tidydll.dsp
Normal file
296
build/msvc/tidydll.dsp
Normal file
|
@ -0,0 +1,296 @@
|
|||
# Microsoft Developer Studio Project File - Name="tidydll" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
|
||||
|
||||
CFG=tidydll - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "tidydll.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "tidydll.mak" CFG="tidydll - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "tidydll - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library")
|
||||
!MESSAGE "tidydll - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
MTL=midl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "tidydll - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "tidydll___Win32_Release"
|
||||
# PROP BASE Intermediate_Dir "tidydll___Win32_Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "ReleaseDLL"
|
||||
# PROP Intermediate_Dir "ReleaseDLL"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "TIDYDLL_EXPORTS" /YX /FD /c
|
||||
# ADD CPP /nologo /MD /W3 /GX /O2 /I "..\..\include" /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "TIDYDLL_EXPORTS" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /YX /FD /c
|
||||
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
|
||||
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 /out:"ReleaseDLL/libtidy.dll"
|
||||
|
||||
!ELSEIF "$(CFG)" == "tidydll - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "tidydll___Win32_Debug"
|
||||
# PROP BASE Intermediate_Dir "tidydll___Win32_Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "DebugDLL"
|
||||
# PROP Intermediate_Dir "DebugDLL"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "TIDYDLL_EXPORTS" /YX /FD /GZ /c
|
||||
# ADD CPP /nologo /MDd /W3 /ZI /Od /I "..\..\include" /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D SUPPORT_UTF16_ENCODINGS=1 /D SUPPORT_ASIAN_ENCODINGS=1 /D SUPPORT_ACCESSIBILITY_CHECKS=1 /FD /GZ /c
|
||||
# SUBTRACT CPP /YX
|
||||
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
|
||||
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /out:"DebugDLL/libtidy.dll" /pdbtype:sept
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "tidydll - Win32 Release"
|
||||
# Name "tidydll - Win32 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\access.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\alloc.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrask.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrdict.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrget.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrs.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\buffio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\clean.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\config.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\entities.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\fileio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\istack.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\lexer.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\localize.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\mappedio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\parser.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\pprint.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\streamio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tagask.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\tidy.def
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tags.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tidylib.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tmbstr.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\utf8.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\win32tc.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\access.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrdict.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrs.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\buffio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\clean.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\config.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\entities.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\fileio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\forward.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\lexer.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\mappedio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\message.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\parser.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\platform.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\pprint.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\streamio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tags.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE="..\..\src\tidy-int.h"
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\tidy.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\tidyenum.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tmbstr.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\utf8.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\version.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\win32tc.h
|
||||
# End Source File
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
295
build/msvc/tidylib.dsp
Normal file
295
build/msvc/tidylib.dsp
Normal file
|
@ -0,0 +1,295 @@
|
|||
# Microsoft Developer Studio Project File - Name="tidylib" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Static Library" 0x0104
|
||||
|
||||
CFG=tidylib - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "tidylib.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "tidylib.mak" CFG="tidylib - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "tidylib - Win32 Release" (based on "Win32 (x86) Static Library")
|
||||
!MESSAGE "tidylib - Win32 Debug" (based on "Win32 (x86) Static Library")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "tidylib - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
|
||||
# ADD CPP /nologo /MT /W4 /GX /O2 /I "../../include" /D "NDEBUG" /D "_LIB" /D "WIN32" /D "_MBCS" /D "SUPPORT_UTF16_ENCODINGS" /D "SUPPORT_ASIAN_ENCODINGS" /D "SUPPORT_ACCESSIBILITY_CHECKS" /FD /c
|
||||
# SUBTRACT CPP /YX
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LIB32=link.exe -lib
|
||||
# ADD BASE LIB32 /nologo
|
||||
# ADD LIB32 /nologo /out:"Release\libtidy.lib"
|
||||
|
||||
!ELSEIF "$(CFG)" == "tidylib - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ /c
|
||||
# ADD CPP /nologo /MTd /Za /W4 /Gm /ZI /Od /I "../../include" /D "_DEBUG" /D "_WIN32" /D "_LIB" /D "WIN32" /D "_MBCS" /D "SUPPORT_UTF16_ENCODINGS" /D "SUPPORT_ASIAN_ENCODINGS" /D "SUPPORT_ACCESSIBILITY_CHECKS" /U "WINDOWS" /FD /GZ /c
|
||||
# SUBTRACT CPP /YX
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LIB32=link.exe -lib
|
||||
# ADD BASE LIB32 /nologo
|
||||
# ADD LIB32 /nologo /out:"Debug\libtidy.lib"
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "tidylib - Win32 Release"
|
||||
# Name "tidylib - Win32 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\access.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\alloc.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrask.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrdict.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrget.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrs.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\buffio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\clean.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\config.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\entities.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\fileio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\istack.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\lexer.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\localize.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\mappedio.c
|
||||
|
||||
!IF "$(CFG)" == "tidylib - Win32 Release"
|
||||
|
||||
!ELSEIF "$(CFG)" == "tidylib - Win32 Debug"
|
||||
|
||||
# ADD CPP /Ze
|
||||
|
||||
!ENDIF
|
||||
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\parser.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\pprint.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\streamio.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tagask.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tags.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tidylib.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tmbstr.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\utf8.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\win32tc.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\access.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrdict.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\attrs.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\buffio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\clean.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\config.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\entities.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\fileio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\forward.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\lexer.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\mappedio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\message.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\parser.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\platform.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\pprint.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\streamio.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tags.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE="..\..\src\tidy-int.h"
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\tidy.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\include\tidyenum.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\tmbstr.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\utf8.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\version.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\src\win32tc.h
|
||||
# End Source File
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
38
build/readme.txt
Normal file
38
build/readme.txt
Normal file
|
@ -0,0 +1,38 @@
|
|||
Tidy Build Files
|
||||
|
||||
Each subdirectory contains input files to a selected
|
||||
build system for TidyLib and the command line driver.
|
||||
Some build systems are cross-platform (gmake, autoconf),
|
||||
others (msvc) are platform specific. For details
|
||||
on any given build system, see the readme file for
|
||||
that system.
|
||||
|
||||
Directory System Comments
|
||||
--------- -------------------- --------------------------
|
||||
gmake GNU Make Used for "official" builds
|
||||
|
||||
gnuauto GNU AutoConf Supports shared lib builds
|
||||
|
||||
msvc MS Visual C++ v6 Win32 only
|
||||
|
||||
rpm Script for packages For Linux distribution supporting rpm
|
||||
|
||||
|
||||
Common Build Options
|
||||
|
||||
There are some basic build options for TidyLib, independent
|
||||
of platform and build system. Typically, these options can
|
||||
be enabled or disabled by setting a macro value within the
|
||||
Makefile or its equivalent. An option may be disabled by
|
||||
setting its value to "0". Enable by setting to "1". Again,
|
||||
consult the directions for each build system for details
|
||||
on how to enable/disable each option.
|
||||
|
||||
Option Default Description
|
||||
---------------------------- -------- ---------------------------------
|
||||
DMALLOC Disabled Use dmalloc for memory debugging
|
||||
SUPPORT_ACCESSIBILITY_CHECKS Enabled Support W3C WAI checks
|
||||
SUPPORT_UTF16_ENCODINGS Enabled Support Unicode documents
|
||||
SUPPORT_ASIAN_ENCODINGS Enabled Support Big5 and ShiftJIS docs
|
||||
|
||||
|
40
build/rpm/readme.txt
Normal file
40
build/rpm/readme.txt
Normal file
|
@ -0,0 +1,40 @@
|
|||
# Script for Building tidy rpm packages
|
||||
|
||||
|
||||
# To build the RPM packages for tidy on Redhat and other distros which support rpm.
|
||||
# For making Debian packages, first create rpm package and then generate
|
||||
# debian package by command "rpm2deb filename"
|
||||
|
||||
|
||||
|
||||
The steps are as follows:
|
||||
|
||||
|
||||
1. Let's suppose TIDY_VERSION you are building is 02October2003
|
||||
|
||||
|
||||
2. Unpack original source tree
|
||||
tar zxvf tidy_src.tgz
|
||||
This will extract to a directory called tidy
|
||||
|
||||
|
||||
3. mv tidy tidy-02October2003
|
||||
Edit the tidy.spec file inside directory tidy-02October2003
|
||||
and make sure the Version variable is changed to 02October2003.
|
||||
Also edit the Makefile and change prefix to "exactly" say this:
|
||||
runinst_prefix=${RPMTMP}
|
||||
devinst_prefix=${RPMTMP}
|
||||
|
||||
|
||||
4. tar zcvf tidy-02October2003.tgz tidy-02October2003
|
||||
|
||||
|
||||
5. rpmbuild -ta tidy-02October2003.tgz
|
||||
|
||||
|
||||
6. rm tidy-02October2003.tgz
|
||||
|
||||
|
||||
7. To derive Debian package for tidy run command on created rpm packages
|
||||
rpm2deb tidy-02October2003-1.rpm
|
||||
|
155
build/rpm/tidy.spec
Normal file
155
build/rpm/tidy.spec
Normal file
|
@ -0,0 +1,155 @@
|
|||
#
|
||||
# spec file for package tidy
|
||||
#
|
||||
# (c) 2006 (W3C) MIT, ERCIM, Keio University
|
||||
# See tidy.h for the copyright notice.
|
||||
#
|
||||
# CVS Info :
|
||||
#
|
||||
# $Author: arnaud02 $
|
||||
# $Date: 2006/02/22 14:21:12 $
|
||||
# $Revision: 1.2 $
|
||||
#
|
||||
# Contributing Author(s):
|
||||
# Sierk Bornemann <bornemann@users.sourceforge.net>
|
||||
#
|
||||
# norootforbuild
|
||||
# neededforbuild doxygen libxslt libtool
|
||||
|
||||
BuildRequires: doxygen libxslt libtool
|
||||
|
||||
Name: tidy
|
||||
Version: 1.0
|
||||
Release: YYMMDD
|
||||
%define docrelease YYMMDD
|
||||
Summary: Utility to clean up and pretty print HTML/XHTML/XML
|
||||
Group: Applications/Tools
|
||||
License: W3C Software License, MIT Licence, Other License(s), see package
|
||||
Autoreqprov: on
|
||||
URL: http://tidy.sourceforge.net/
|
||||
Source0: http://sourceforge.net/cvs/?group_id=27659
|
||||
Source1: http://tidy.sourceforge.net/src/tidy_src.tgz
|
||||
Source2: http://tidy.sourceforge.net/docs/tidy_docs.tgz
|
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-build
|
||||
|
||||
|
||||
%description
|
||||
When editing HTML it's easy to make mistakes. Wouldn't it be nice if
|
||||
there was a simple way to fix these mistakes automatically and tidy up
|
||||
sloppy editing into nicely layed out markup? Well now there is! Dave
|
||||
Raggett's HTML TIDY is a free utility for doing just that. It also
|
||||
works great on the atrociously hard to read markup generated by
|
||||
specialized HTML editors and conversion tools, and can help you
|
||||
identify where you need to pay further attention on making your pages
|
||||
more accessible to people with disabilities.
|
||||
|
||||
Tidy is able to fix up a wide range of problems and to bring to your
|
||||
attention things that you need to work on yourself. Each item found is
|
||||
listed with the line number and column so that you can see where the
|
||||
problem lies in your markup. Tidy won't generate a cleaned up version
|
||||
when there are problems that it can't be sure of how to handle. These
|
||||
are logged as "errors" rather than "warnings".
|
||||
|
||||
|
||||
Authors:
|
||||
--------
|
||||
|
||||
Tidy was written by Dave Raggett <dsr@w3.org> and is now maintained
|
||||
and developed by the Tidy team at http://tidy.sourceforge.net/.
|
||||
|
||||
|
||||
%package -n libtidy
|
||||
Summary: Shared library for tidy
|
||||
Group: Development/Libraries
|
||||
Autoreqprov: on
|
||||
|
||||
%description -n libtidy
|
||||
|
||||
This package contains the library needed to run programs dynamically
|
||||
linked with tidy.
|
||||
|
||||
|
||||
%package -n libtidy-devel
|
||||
Summary: Development files for tidy
|
||||
Group: Development/Libraries
|
||||
Requires: libtidy = %{version}-%{release}
|
||||
Autoreqprov: on
|
||||
|
||||
|
||||
%description -n libtidy-devel
|
||||
|
||||
This package contains the headers, the shared libraries and the API
|
||||
documentation which programmers will need to develop applications based on
|
||||
tidy.
|
||||
|
||||
%debug_package
|
||||
%prep
|
||||
%setup -q -n %{name} -b 1
|
||||
mv htmldoc/doxygen.cfg Doxyfile
|
||||
|
||||
|
||||
%build
|
||||
export CFLAGS="$RPM_OPT_FLAGS"
|
||||
/bin/sh build/gnuauto/setup.sh
|
||||
|
||||
%configure --disable-dependency-tracking \
|
||||
--includedir=%{_includedir}/%{name}
|
||||
make %{?_smp_mflags} all
|
||||
make -C build/gmake/ doc
|
||||
doxygen
|
||||
|
||||
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT _api
|
||||
make install DESTDIR=$RPM_BUILD_ROOT
|
||||
# Manpage
|
||||
install -Dpm 644 htmldoc/tidy.1 $RPM_BUILD_ROOT%{_mandir}/man1/tidy.1
|
||||
# Quick Reference
|
||||
install -Dpm 644 htmldoc/quickref.html $RPM_BUILD_ROOT%{_defaultdocdir}/%{name}/quickref.html
|
||||
# Move API directory out of the way
|
||||
mv htmldoc/api _api
|
||||
|
||||
|
||||
%clean
|
||||
if ! test -f /.buildenv; then
|
||||
rm -rf $RPM_BUILD_ROOT;
|
||||
fi
|
||||
|
||||
|
||||
%post -n lib%{name} -p /sbin/ldconfig
|
||||
|
||||
%postun -n lib%{name} -p /sbin/ldconfig
|
||||
|
||||
|
||||
%files
|
||||
%defattr(-, root, root)
|
||||
%doc htmldoc/*
|
||||
%{_bindir}/tidy
|
||||
%{_bindir}/tab2space
|
||||
%{_mandir}/man1/tidy.1*
|
||||
|
||||
|
||||
%files -n libtidy
|
||||
%defattr(-, root, root)
|
||||
%doc htmldoc/license.html
|
||||
%{_libdir}/libtidy*.so.*
|
||||
|
||||
|
||||
%files -n libtidy-devel
|
||||
%defattr(-, root, root)
|
||||
%doc _api/*
|
||||
%{_includedir}/%{name}/*.h
|
||||
%{_libdir}/libtidy.so
|
||||
%{_libdir}/libtidy.a
|
||||
%exclude %{_libdir}/libtidy.la
|
||||
|
||||
|
||||
%changelog -n tidy
|
||||
* Thu Feb 22 2006 - Sierk Bornemann <bornemann@sourceforge.net>
|
||||
Rewritten RPM Spec file:
|
||||
- respects filesystem layout of current FHS-compliant linux distributions.
|
||||
- respects current tidy Makefile and
|
||||
creation of tidy docs (XSL transformation from tidy's XML output).
|
||||
|
||||
* Mon Oct 25 2003 - Al Dev (Alavoor Vasudevan) <alavoor[at]yahoo.com>
|
||||
- Initial version of %{name} rpm
|
362
console/tab2space.c
Normal file
362
console/tab2space.c
Normal file
|
@ -0,0 +1,362 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
#define true 1
|
||||
#define false 0
|
||||
#define TABSIZE 4
|
||||
|
||||
#define DOS_CRLF 0
|
||||
#define UNIX_LF 1
|
||||
#define MAC_CR 2
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Bool pushed;
|
||||
int tabs;
|
||||
int curcol;
|
||||
int lastcol;
|
||||
int maxcol;
|
||||
int curline;
|
||||
int pushed_char;
|
||||
uint size;
|
||||
uint length;
|
||||
char *buf;
|
||||
FILE *fp;
|
||||
} Stream;
|
||||
|
||||
static int tabsize = TABSIZE;
|
||||
static int endline = DOS_CRLF;
|
||||
static Bool tabs = false;
|
||||
|
||||
/*
|
||||
Memory allocation functions vary from one environment to
|
||||
the next, and experience shows that wrapping the local
|
||||
mechanisms up provides for greater flexibility and allows
|
||||
out of memory conditions to be detected in one place.
|
||||
*/
|
||||
void *MemAlloc(size_t size)
|
||||
{
|
||||
void *p;
|
||||
|
||||
p = malloc(size);
|
||||
|
||||
if (!p)
|
||||
{
|
||||
fprintf(stderr, "***** Out of memory! *****\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void *MemRealloc(void *old, size_t size)
|
||||
{
|
||||
void *p;
|
||||
|
||||
p = realloc(old, size);
|
||||
|
||||
if (!p)
|
||||
{
|
||||
fprintf(stderr, "***** Out of memory! *****\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void MemFree(void *p)
|
||||
{
|
||||
free(p);
|
||||
p = NULL;
|
||||
}
|
||||
|
||||
static Stream *NewStream(FILE *fp)
|
||||
{
|
||||
Stream *in;
|
||||
|
||||
in = (Stream *)MemAlloc(sizeof(Stream));
|
||||
|
||||
memset(in, 0, sizeof(Stream));
|
||||
in->fp = fp;
|
||||
return in;
|
||||
}
|
||||
|
||||
static void FreeStream(Stream *in)
|
||||
{
|
||||
if (in->buf)
|
||||
MemFree(in->buf);
|
||||
|
||||
MemFree(in);
|
||||
}
|
||||
|
||||
static void AddByte(Stream *in, uint c)
|
||||
{
|
||||
if (in->size + 1 >= in->length)
|
||||
{
|
||||
while (in->size + 1 >= in->length)
|
||||
{
|
||||
if (in->length == 0)
|
||||
in->length = 8192;
|
||||
else
|
||||
in->length = in->length * 2;
|
||||
}
|
||||
|
||||
in->buf = (char *)MemRealloc(in->buf, in->length*sizeof(char));
|
||||
}
|
||||
|
||||
in->buf[in->size++] = (char)c;
|
||||
in->buf[in->size] = '\0'; /* debug */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Read a character from a stream, keeping track
|
||||
of lines, columns etc. This is used for parsing
|
||||
markup and plain text etc. A single level
|
||||
pushback is allowed with UngetChar(c, in).
|
||||
Returns EndOfStream if there's nothing more to read.
|
||||
*/
|
||||
static int ReadChar(Stream *in)
|
||||
{
|
||||
int c;
|
||||
|
||||
if (in->pushed)
|
||||
{
|
||||
in->pushed = false;
|
||||
|
||||
if (in->pushed_char == '\n')
|
||||
in->curline--;
|
||||
|
||||
return in->pushed_char;
|
||||
}
|
||||
|
||||
in->lastcol = in->curcol;
|
||||
|
||||
/* expanding tab ? */
|
||||
if (in->tabs > 0)
|
||||
{
|
||||
in->curcol++;
|
||||
in->tabs--;
|
||||
return ' ';
|
||||
}
|
||||
|
||||
/* Else go on with normal buffer: */
|
||||
for (;;)
|
||||
{
|
||||
c = getc(in->fp);
|
||||
|
||||
/* end of file? */
|
||||
if (c == EOF)
|
||||
break;
|
||||
|
||||
/* coerce \r\n and isolated \r as equivalent to \n : */
|
||||
if (c == '\r')
|
||||
{
|
||||
c = getc(in->fp);
|
||||
|
||||
if (c != '\n')
|
||||
ungetc(c, in->fp);
|
||||
|
||||
c = '\n';
|
||||
}
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
if (in->maxcol < in->curcol)
|
||||
in->maxcol = in->curcol;
|
||||
|
||||
in->curcol = 1;
|
||||
in->curline++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == '\t')
|
||||
{
|
||||
if (tabs)
|
||||
in->curcol += tabsize - ((in->curcol - 1) % tabsize);
|
||||
else /* expand to spaces */
|
||||
{
|
||||
in->tabs = tabsize - ((in->curcol - 1) % tabsize) - 1;
|
||||
in->curcol++;
|
||||
c = ' ';
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == '\033')
|
||||
break;
|
||||
|
||||
/* strip control characters including '\r' */
|
||||
|
||||
if (0 < c && c < 32)
|
||||
continue;
|
||||
|
||||
in->curcol++;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static Stream *ReadFile(FILE *fin)
|
||||
{
|
||||
int c;
|
||||
Stream *in = NewStream(fin);
|
||||
|
||||
while ((c = ReadChar(in)) >= 0)
|
||||
AddByte(in, (uint)c);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
static void WriteFile(Stream *in, FILE *fout)
|
||||
{
|
||||
int i, c;
|
||||
char *p;
|
||||
|
||||
i = in->size;
|
||||
p = in->buf;
|
||||
|
||||
while (i--)
|
||||
{
|
||||
c = *p++;
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
if (endline == DOS_CRLF)
|
||||
{
|
||||
putc('\r', fout);
|
||||
putc('\n', fout);
|
||||
}
|
||||
else if (endline == UNIX_LF)
|
||||
putc('\n', fout);
|
||||
else if (endline == MAC_CR)
|
||||
putc('\r', fout);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
putc(c, fout);
|
||||
}
|
||||
}
|
||||
|
||||
static void HelpText(FILE *errout, char *prog)
|
||||
{
|
||||
fprintf(errout, "%s: [options] [infile [outfile]] ...\n", prog);
|
||||
fprintf(errout, "Utility to expand tabs and ensure consistent line endings\n");
|
||||
fprintf(errout, "options for tab2space vers: 6th February 2003\n");
|
||||
fprintf(errout, " -help or -h display this help message\n");
|
||||
fprintf(errout, " -dos or -crlf set line ends to CRLF (PC-DOS/Windows - default)\n");
|
||||
fprintf(errout, " -mac or -cr set line ends to CR (classic Mac OS)\n");
|
||||
fprintf(errout, " -unix or -lf set line ends to LF (Unix)\n");
|
||||
fprintf(errout, " -tabs preserve tabs, e.g. for Makefile\n");
|
||||
fprintf(errout, " -t<n> set tabs to <n> (default is 4) spaces\n");
|
||||
fprintf(errout, "\nNote this utility doesn't map spaces to tabs!\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char const *infile, *outfile;
|
||||
char *prog;
|
||||
FILE *fin, *fout;
|
||||
Stream *in = NULL;
|
||||
|
||||
prog = argv[0];
|
||||
|
||||
while (argc > 0)
|
||||
{
|
||||
if (argc > 1 && argv[1][0] == '-')
|
||||
{
|
||||
if (strcmp(argv[1], "-help") == 0 || argv[1][1] == 'h')
|
||||
{
|
||||
HelpText(stdout, prog);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (strcmp(argv[1], "-dos") == 0 ||
|
||||
strcmp(argv[1], "-crlf") == 0)
|
||||
endline = DOS_CRLF;
|
||||
|
||||
else if (strcmp(argv[1], "-mac") == 0 ||
|
||||
strcmp(argv[1], "-cr") == 0)
|
||||
endline = MAC_CR;
|
||||
|
||||
else if (strcmp(argv[1], "-unix") == 0 ||
|
||||
strcmp(argv[1], "-lf") == 0)
|
||||
endline = UNIX_LF;
|
||||
|
||||
else if (strcmp(argv[1], "-tabs") == 0)
|
||||
tabs = true;
|
||||
|
||||
else if (strncmp(argv[1], "-t", 2) == 0)
|
||||
sscanf(argv[1]+2, "%d", &tabsize);
|
||||
|
||||
--argc;
|
||||
++argv;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argc > 1)
|
||||
{
|
||||
infile = argv[1];
|
||||
fin = fopen(infile, "rb");
|
||||
}
|
||||
else
|
||||
{
|
||||
infile = "stdin";
|
||||
fin = stdin;
|
||||
}
|
||||
|
||||
if (argc > 2)
|
||||
{
|
||||
outfile = argv[2];
|
||||
fout = NULL;
|
||||
--argc;
|
||||
++argv;
|
||||
}
|
||||
else
|
||||
{
|
||||
outfile = "stdout";
|
||||
fout = stdout;
|
||||
}
|
||||
|
||||
if (fin)
|
||||
{
|
||||
in = ReadFile(fin);
|
||||
|
||||
if (fin != stdin)
|
||||
fclose(fin);
|
||||
|
||||
if (fout != stdout)
|
||||
fout = fopen(outfile, "wb");
|
||||
|
||||
if (fout)
|
||||
{
|
||||
WriteFile(in, fout);
|
||||
|
||||
if (fout != stdout)
|
||||
fclose(fout);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "%s - can't open \"%s\" for writing\n", prog, outfile);
|
||||
|
||||
FreeStream(in);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "%s - can't open \"%s\" for reading\n", prog, infile);
|
||||
|
||||
--argc;
|
||||
++argv;
|
||||
|
||||
if (argc <= 1)
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
1321
console/tidy.c
Normal file
1321
console/tidy.c
Normal file
File diff suppressed because it is too large
Load diff
47
experimental/TidyNodeIter.c
Normal file
47
experimental/TidyNodeIter.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
#include "platform.h"
|
||||
#include "tidy-int.h"
|
||||
|
||||
#include "TidyNodeIter.h"
|
||||
|
||||
TidyNodeIter *newTidyNodeIter( Node *pStart )
|
||||
{
|
||||
TidyNodeIter *pThis = NULL;
|
||||
if (NULL != (pThis = MemAlloc( sizeof( TidyNodeIter ))))
|
||||
{
|
||||
ClearMemory( pThis, sizeof( TidyNodeIter ));
|
||||
pThis->pTop = pStart;
|
||||
}
|
||||
return pThis;
|
||||
}
|
||||
|
||||
Node *nextTidyNode( TidyNodeIter *pThis )
|
||||
{
|
||||
if (NULL == pThis->pCurrent)
|
||||
{
|
||||
// just starting out, initialize
|
||||
pThis->pCurrent = pThis->pTop->content;
|
||||
}
|
||||
else if (NULL != pThis->pCurrent->content)
|
||||
{
|
||||
// the next element, if any, is my first-born child
|
||||
pThis->pCurrent = pThis->pCurrent->content;
|
||||
}
|
||||
else
|
||||
{
|
||||
// no children, I guess my next younger brother inherits the throne.
|
||||
while ( NULL == pThis->pCurrent->next
|
||||
&& pThis->pTop != pThis->pCurrent->parent )
|
||||
{
|
||||
// no siblings, do any of my ancestors have younger sibs?
|
||||
pThis->pCurrent = pThis->pCurrent->parent;
|
||||
}
|
||||
pThis->pCurrent = pThis->pCurrent->next;
|
||||
}
|
||||
return pThis->pCurrent;
|
||||
}
|
||||
|
||||
void setCurrentNode( TidyNodeIter *pThis, Node *newCurr )
|
||||
{
|
||||
if (NULL != newCurr)
|
||||
pThis->pCurrent = newCurr;
|
||||
}
|
51
experimental/TidyNodeIter.h
Normal file
51
experimental/TidyNodeIter.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* TidyNodeIter
|
||||
|
||||
(c) 1998-2003 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
These files contain utility routines to perform in-order traversals of the
|
||||
Tidy document tree, beginning at an arbitrary node.
|
||||
|
||||
A traversal of the tree can be performed in a manner similar to the following:
|
||||
|
||||
Node *testNode;
|
||||
TidyNodeIter *iter = newTidyNodeIter( FindBody( tdoc ));
|
||||
for (testNode = nextTidyNode( &iter );
|
||||
NULL != testNode;
|
||||
testNode = nextTidyNode( &iter ))
|
||||
{
|
||||
}
|
||||
|
||||
TODO: Add a prevTidyNode() function.
|
||||
*/
|
||||
|
||||
#include "lexer.h"
|
||||
|
||||
typedef struct _TidyNodeIter
|
||||
{
|
||||
Node *pTop, *pCurrent;
|
||||
} TidyNodeIter;
|
||||
|
||||
TidyNodeIter *newTidyNodeIter( Node *pStart );
|
||||
|
||||
/*
|
||||
nextTidyNode( TidyNodeIter *pIter )
|
||||
|
||||
if pCurrent is NULL, this function initializes it to match pTop, and
|
||||
returns that value, otherwise it advances to the next node in order,
|
||||
and returns that value. When pTop == pCurrent, the function returns NULL
|
||||
to indicate that the entire tree has been visited.
|
||||
*/
|
||||
Node *nextTidyNode( TidyNodeIter *pIter );
|
||||
|
||||
/*
|
||||
setCurrentNode( TidyNodeIter *pThis, Node *newCurr )
|
||||
|
||||
Resets pCurrent to match the passed value; useful if you need to back up
|
||||
to an unaltered point in the tree, or to skip a section. The next call to
|
||||
nextTidyNode() will return the node which follows newCurr in order.
|
||||
|
||||
Minimal error checking is performed; unexpected results _will_ occur if
|
||||
newCurr is not a descendant node of pTop.
|
||||
*/
|
||||
void setCurrentNode( TidyNodeIter *pThis, Node *newCurr );
|
224
experimental/httpio.c
Normal file
224
experimental/httpio.c
Normal file
|
@ -0,0 +1,224 @@
|
|||
#include "tmbstr.h"
|
||||
|
||||
#include "httpio.h"
|
||||
|
||||
int
|
||||
makeConnection ( HTTPInputSource *pHttp )
|
||||
{
|
||||
struct sockaddr_in sock;
|
||||
struct hostent *pHost;
|
||||
|
||||
/* Get internet address of the host. */
|
||||
if (!(pHost = gethostbyname ( pHttp->pHostName )))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Copy the address of the host to socket description. */
|
||||
memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length);
|
||||
|
||||
/* Set port and protocol */
|
||||
sock.sin_family = AF_INET;
|
||||
sock.sin_port = htons( pHttp->nPort );
|
||||
|
||||
/* Make an internet socket, stream type. */
|
||||
if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1)
|
||||
return -1;
|
||||
|
||||
/* Connect the socket to the remote host. */
|
||||
if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock )))
|
||||
{
|
||||
if (errno == ECONNREFUSED)
|
||||
return ECONNREFUSED;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int parseURL( HTTPInputSource *pHttp, tmbstr url )
|
||||
{
|
||||
int i, j = 0;
|
||||
ctmbstr pStr;
|
||||
|
||||
pStr = tmbsubstr( url, "://" );
|
||||
|
||||
/* If protocol is there, but not http, bail out, else assume http. */
|
||||
if (NULL != pStr)
|
||||
{
|
||||
if (tmbstrncasecmp( url, "http://", 7 ))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (NULL != pStr)
|
||||
j = pStr - url + 3;
|
||||
for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {}
|
||||
if (i == j)
|
||||
return -1;
|
||||
|
||||
/* Get the hostname. */
|
||||
pHttp->pHostName = tmbstrndup (&url[j], i - j );
|
||||
|
||||
if (url[i] == ':')
|
||||
{
|
||||
/* We have a colon delimiting the hostname. It should mean that
|
||||
a port number is following it */
|
||||
pHttp->nPort = 0;
|
||||
if (isdigit( url[++i] )) /* A port number */
|
||||
{
|
||||
for (; url[i] && url[i] != '/'; i++)
|
||||
{
|
||||
if (isdigit( url[i] ))
|
||||
pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0');
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
if (!pHttp->nPort)
|
||||
return -1;
|
||||
}
|
||||
else /* or just a misformed port number */
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
/* Assume default port. */
|
||||
pHttp->nPort = 80;
|
||||
|
||||
/* skip past the delimiting slash (we'll add it later ) */
|
||||
while (url[i] && url[i] == '/')
|
||||
i++;
|
||||
pHttp->pResource = tmbstrdup (url + i );
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int fillBuffer( HTTPInputSource *in )
|
||||
{
|
||||
if (0 < in->s)
|
||||
{
|
||||
in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0);
|
||||
in->nextBytePos = 0;
|
||||
if (in->nBufSize < sizeof( in->buffer ))
|
||||
in->buffer[in->nBufSize] = '\0';
|
||||
}
|
||||
else
|
||||
in->nBufSize = 0;
|
||||
return in->nBufSize;
|
||||
}
|
||||
|
||||
|
||||
int openURL( HTTPInputSource *in, tmbstr pUrl )
|
||||
{
|
||||
int rc = -1;
|
||||
#ifdef WIN32
|
||||
WSADATA wsaData;
|
||||
|
||||
rc = WSAStartup( 514, &wsaData );
|
||||
#endif
|
||||
|
||||
in->tis.getByte = (TidyGetByteFunc) HTTPGetByte;
|
||||
in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte;
|
||||
in->tis.eof = (TidyEOFFunc) HTTPIsEOF;
|
||||
in->tis.sourceData = (uint) in;
|
||||
in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0;
|
||||
parseURL( in, pUrl );
|
||||
if (0 == (rc = makeConnection( in )))
|
||||
{
|
||||
char ch, lastCh = '\0';
|
||||
int blanks = 0;
|
||||
|
||||
char *getCmd = MemAlloc( 48 + strlen( in->pResource ));
|
||||
sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource );
|
||||
send( in->s, getCmd, strlen( getCmd ), 0 );
|
||||
MemFree( getCmd );
|
||||
|
||||
/* skip past the header information */
|
||||
while ( in->nextBytePos >= in->nBufSize
|
||||
&& 0 < (rc = fillBuffer( in )))
|
||||
{
|
||||
if (1 < blanks)
|
||||
break;
|
||||
for (; in->nextBytePos < sizeof( in->buffer )
|
||||
&& 0 != in->buffer[ in->nextBytePos ];
|
||||
in->nextBytePos++ )
|
||||
{
|
||||
ch = in->buffer[ in->nextBytePos ];
|
||||
if (ch == '\r' || ch == '\n')
|
||||
{
|
||||
if (ch == lastCh)
|
||||
{
|
||||
/* Two carriage returns or two newlines in a row,
|
||||
that's good enough */
|
||||
blanks++;
|
||||
}
|
||||
if (lastCh == '\r' || lastCh == '\n')
|
||||
{
|
||||
blanks++;
|
||||
}
|
||||
}
|
||||
else
|
||||
blanks = 0;
|
||||
lastCh = ch;
|
||||
if (1 < blanks)
|
||||
{
|
||||
/* end of header, scan to first non-white and return */
|
||||
while ('\0' != ch && isspace( ch ))
|
||||
ch = in->buffer[ ++in->nextBytePos ];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
void closeURL( HTTPInputSource *source )
|
||||
{
|
||||
if (0 < source->s)
|
||||
closesocket( source->s );
|
||||
source->s = -1;
|
||||
source->tis.sourceData = 0;
|
||||
#ifdef WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int HTTPGetByte( HTTPInputSource *source )
|
||||
{
|
||||
if (source->nextUnGotBytePos)
|
||||
return source->unGetBuffer[ --source->nextUnGotBytePos ];
|
||||
if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize)
|
||||
{
|
||||
fillBuffer( source );
|
||||
}
|
||||
if (0 == source->nBufSize)
|
||||
return EndOfStream;
|
||||
return source->buffer[ source->nextBytePos++ ];
|
||||
}
|
||||
|
||||
void HTTPUngetByte( HTTPInputSource *source, uint byteValue )
|
||||
{
|
||||
if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */
|
||||
source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue;
|
||||
}
|
||||
|
||||
Bool HTTPIsEOF( HTTPInputSource *source )
|
||||
{
|
||||
if (source->nextUnGotBytePos)
|
||||
/* pending ungot bytes, not done */
|
||||
return no;
|
||||
|
||||
if ( 0 != source->nBufSize
|
||||
&& source->nextBytePos >= source->nBufSize)
|
||||
/* We've consumed the existing buffer, get another */
|
||||
fillBuffer( source );
|
||||
|
||||
if (source->nextBytePos < source->nBufSize)
|
||||
/* we have stuff in the buffer, must not be done. */
|
||||
return no;
|
||||
|
||||
/* Nothing in the buffer, and the last receive failed, must be done. */
|
||||
return yes;
|
||||
}
|
||||
|
48
experimental/httpio.h
Normal file
48
experimental/httpio.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
#ifndef __HTTPIO_H__
|
||||
#define __HTTPIO_H__
|
||||
|
||||
#include "platform.h"
|
||||
#include "tidy.h"
|
||||
|
||||
#ifdef WIN32
|
||||
# include <winsock.h>
|
||||
# define ECONNREFUSED WSAECONNREFUSED
|
||||
#else
|
||||
# include <sys/socket.h>
|
||||
# include <netdb.h>
|
||||
# include <netinet/in.h>
|
||||
#ifndef __BEOS__
|
||||
# include <arpa/inet.h>
|
||||
#endif
|
||||
#endif /* WIN32 */
|
||||
|
||||
TIDY_STRUCT
|
||||
typedef struct _HTTPInputSource
|
||||
{
|
||||
TidyInputSource tis; // This declaration must be first and must not be changed!
|
||||
|
||||
tmbstr pHostName;
|
||||
tmbstr pResource;
|
||||
unsigned short nPort, nextBytePos, nextUnGotBytePos, nBufSize;
|
||||
SOCKET s;
|
||||
char buffer[1024];
|
||||
char unGetBuffer[16];
|
||||
|
||||
} HTTPInputSource;
|
||||
|
||||
/* get next byte from input source */
|
||||
int HTTPGetByte( HTTPInputSource *source );
|
||||
|
||||
/* unget byte back to input source */
|
||||
void HTTPUngetByte( HTTPInputSource *source, uint byteValue );
|
||||
|
||||
/* check if input source at end */
|
||||
Bool HTTPIsEOF( HTTPInputSource *source );
|
||||
|
||||
int parseURL( HTTPInputSource* source, tmbstr pUrl );
|
||||
|
||||
int openURL( HTTPInputSource* source, tmbstr pUrl );
|
||||
|
||||
void closeURL( HTTPInputSource *source );
|
||||
|
||||
#endif
|
11815
html5-for-html-tidy.patch-2
Normal file
11815
html5-for-html-tidy.patch-2
Normal file
File diff suppressed because it is too large
Load diff
1546
htmldoc/Overview.html
Normal file
1546
htmldoc/Overview.html
Normal file
File diff suppressed because it is too large
Load diff
BIN
htmldoc/checked_by_tidy.gif
Normal file
BIN
htmldoc/checked_by_tidy.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
1252
htmldoc/doxygen.cfg
Normal file
1252
htmldoc/doxygen.cfg
Normal file
File diff suppressed because it is too large
Load diff
300
htmldoc/faq.html
Normal file
300
htmldoc/faq.html
Normal file
|
@ -0,0 +1,300 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta name="generator" content=
|
||||
"HTML Tidy for Mac OS X (vers 1st June 2003), see www.w3.org" />
|
||||
<link type="text/css" rel="stylesheet" href="tidy.css" />
|
||||
<title>HTML Tidy - Frequently Asked Questions</title>
|
||||
<style type="text/css">
|
||||
code { font-weight: bold; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Tidy - Frequently Asked Questions</h1>
|
||||
|
||||
<h2>Overview</h2>
|
||||
|
||||
<p class="abstract">Certain questions about Tidy come up on a
|
||||
regular basis. These are some that have been culled from postings
|
||||
to the html-tidy@w3.org and tidy-develop@lists.sourceforge.net
|
||||
mailing lists. If you don't see your question addressed here, see
|
||||
<a href="#support">How To Get Support</a> below.</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="#what-now">What Now?</a></li>
|
||||
|
||||
<li><a href="#support">How to Get Support?</a></li>
|
||||
|
||||
<li><a href="#bug">How to Submit A Bug Report</a></li>
|
||||
|
||||
<li><a href="#feature">How to Submit A Feature Request</a></li>
|
||||
|
||||
<li><a href="#layout">How Do I Control the Output Layout?</a></li>
|
||||
|
||||
<li><a href="#version">What Version of Tidy Should I Use?</a></li>
|
||||
|
||||
<li><a href="#regression">How Do I Run A Regression Test?</a></li>
|
||||
</ul>
|
||||
|
||||
<hr />
|
||||
<dl>
|
||||
<dt><a name="what-now" id="what-now"></a>What Now?</dt>
|
||||
|
||||
<dd><p>If you have a popup screen that reads as follows:
|
||||
<pre>
|
||||
HTML Tidy for Windows <vers 1st August 2002; built on Aug 8 2002, at 15:41:13>
|
||||
Parsing Console input <stdin>
|
||||
</pre>
|
||||
|
||||
<p>and do not know what to do next, read on.</p>
|
||||
|
||||
<p>Tidy is waiting for your HTML to come in, so it can parse it.
|
||||
Tidy is fundamentally a tool that reads in HTML cleans it up and
|
||||
writes it out again. It was developed as a program you run from the
|
||||
console prompt, but there are GUI encapsulations available, e.g.
|
||||
HTML-Kit, which you might prefer.</p>
|
||||
|
||||
<p>If you are using Windows, the first step is to unzip the zip file
|
||||
and place the tidy.exe file in a folder somewhere on your executables
|
||||
path. You may also want to set up a config file to save having to type
|
||||
lots of options each time you run Tidy. From the console prompt you can
|
||||
run Tidy like this:</p>
|
||||
|
||||
<pre>
|
||||
C> tidy -m mywebpage.html
|
||||
</pre>
|
||||
|
||||
<p>In this case, the <code>-m</code> option requests Tidy to write
|
||||
the tidied file back to the same filename as it read from
|
||||
(mywebpage.html). Tidy will give you a breakdown of the problems it
|
||||
found and the version of HTML the file appears to be using.</p>
|
||||
|
||||
<p>To get a listing of Tidy command line options, just type
|
||||
<code>tidy -?</code>. To see a listing on configuration options,
|
||||
try <code>tidy -help-config</code>. To get more info on the
|
||||
config options, see the <a
|
||||
href="http://tidy.sourceforge.net/docs/quickref.html">Quick Reference</a>.</p>
|
||||
|
||||
<p>See also Dave Raggett's <a href="http://tidy.sourceforge.net/docs/Overview.html#help">User Guide</a>.</p>
|
||||
|
||||
<p>If you're not comfortable with the DOS command line, you should
|
||||
try one of the <a href="http://tidy.sourceforge.net/#tidylibapps">GUI
|
||||
Applications</a>.</p>
|
||||
</dd>
|
||||
|
||||
<dt><a name="support" id="support"></a>How To Get Support</dt>
|
||||
|
||||
<dd>
|
||||
<p>For general HTML Tidy support, the original mailing list
|
||||
html-tidy@w3.org is best. Sometimes developers are the last to
|
||||
know... Also, this list covers both Java and C versions, not to
|
||||
mention various value-added products such as GUI front ends, Perl
|
||||
and Python integration, etc. If you don't get a response after a
|
||||
couple tries or if you have a bug fix, bump it over to the
|
||||
developer list at tidy-develop@lists.sourceforge.net. It's not a
|
||||
hard line, but that is the general arrangement.</p>
|
||||
</dd>
|
||||
|
||||
<dt><a name="bug" id="bug"></a>How to Submit A Bug Report</dt>
|
||||
|
||||
<dd>
|
||||
<p>You are encouraged to report bugs you found to the Tidy
|
||||
developer team. Tidy's quality depends on your feedback. You can
|
||||
either file your bug report in the Sourceforge <a
|
||||
href="http://sourceforge.net/tracker/?func=add&group_id=27659&atid=390963">
|
||||
bug tracker</a> for HTML Tidy (<em>recommended</em>) or send a mail
|
||||
to the mailing list at html-tidy@w3.org. Note you do <em>not</em>
|
||||
have to have a Sourceforge account in order to file bug reports, or
|
||||
be subscribed to html-tidy@w3.org in order to post messages to the
|
||||
list.</p>
|
||||
|
||||
<p>Prior to submitting a bug report, please check that the bug is
|
||||
not already known. Many are. If you are not sure, just ask. If it
|
||||
is new bug, make sure to include at least the following information
|
||||
in your report:</p>
|
||||
|
||||
<ul>
|
||||
<li>A desciption of what you think went wrong.</li>
|
||||
|
||||
<li>The HTML Tidy version (find it out by running <code>tidy
|
||||
-v</code>) and operating system you are running.</li>
|
||||
|
||||
<li>The input, that exposes the bug.<br />
|
||||
A small HTML document that reproduces the problem is best.</li>
|
||||
|
||||
<li>The configuration options you've used. Command line options
|
||||
like<br />
|
||||
<code>-asxml</code>, configuration files, etc. You may use
|
||||
<code>tidy -show-config</code> to get an overview of the active
|
||||
Tidy settings.</li>
|
||||
|
||||
<li>Your e-mail address for further questions and comments.</li>
|
||||
</ul>
|
||||
|
||||
<p>These information are necessary to reproduce whatever is
|
||||
failing, without them we cannot help you. Additional information -
|
||||
and patches - are very welcome!</p>
|
||||
|
||||
<p><em>Please include only one bug per report.</em> Reports with
|
||||
multiple bugs are less easy to track and some bugs may get
|
||||
missed.</p>
|
||||
</dd>
|
||||
|
||||
<dt><a name="feature" id="feature"></a>How to Submit A Feature
|
||||
Request</dt>
|
||||
|
||||
<dd>
|
||||
<p>If you want Tidy to do something new that it doesn't do today
|
||||
(or stop doing something), then it is probably a feature
|
||||
request.</p>
|
||||
|
||||
<p>The process for submitting a feature request is very similar to
|
||||
bug requests. A different <a
|
||||
href="http://sourceforge.net/tracker/?atid=390966&group_id=27659">
|
||||
tracker</a> is used on SourceForge to denote the difference in
|
||||
subject matter.</p>
|
||||
|
||||
<p>As with bugs, please be sure that the feature has not already
|
||||
been requested. If the feature has already requested, you can add
|
||||
your comments to the feature request tracker, or send mail to the
|
||||
<a href="mailto:html-tidy@w3.org">mailing list</a> indicating your
|
||||
wish to also have the feature implemented. If the feature has not
|
||||
already been requested, send the same information as for a bug
|
||||
report, but place special emphasis on the desired output for a
|
||||
given input, desired options, etc. - please be as specific as
|
||||
possible about what you want Tidy to <em>do</em>.</p>
|
||||
</dd>
|
||||
|
||||
<dt><a name="layout" id="layout"></a>How Do I Control the Output Layout?</dt>
|
||||
|
||||
<dd>
|
||||
<p>There are three primary options that control how Tidy
|
||||
formats your markup:</p>
|
||||
<ul>
|
||||
<li><a class="code"
|
||||
href="quickref.html#indent">indent</a></li>
|
||||
<li><a class="code"
|
||||
href="quickref.html#indent-attributes">indent-attributes</a></li>
|
||||
<li><a class="code"
|
||||
href="quickref.html#vertical-space">vertical-space</a></li>
|
||||
</ul>
|
||||
|
||||
<p>Briefly, <code>indent</code> sets the level of left-to-right indenting
|
||||
and, somewhat, how often elements are put onto a new line. The options
|
||||
are <code>yes</code>, <code>no</code>, and <code>auto</code>.
|
||||
<code>indent-attributes</code> is a flag that, when set, tells Tidy to
|
||||
put each attribute on a new line. <code>vertical-space</code> is a flag
|
||||
that, when set, tells Tidy to add some empty lines for readability. The
|
||||
default for all three is <code>no</code>. These options may be used in
|
||||
any combination to control you you want your markup to look. The best
|
||||
thing is to experiment a bit to see what you like. Be aware that
|
||||
<code>indent yes</code> is deprecated for production use as it will
|
||||
cause visual changes in most browsers.</p>
|
||||
|
||||
<p>To get Tidy <em>Classic</em> <code>--indent auto</code> layout, use the following options:</p>
|
||||
|
||||
<pre>
|
||||
indent: auto
|
||||
indent-attributes: no
|
||||
vertical-space: yes
|
||||
</pre>
|
||||
|
||||
<p>You can read about more <em>Pretty Print</em> options
|
||||
<a href="quickref.html#PrettyPrintHeader">here</a>.</p>
|
||||
</dd>
|
||||
|
||||
<dt><a name="version" id="version"></a>What Version of Tidy Should
|
||||
I Use?</dt>
|
||||
|
||||
<dd>
|
||||
<p>The current Source Forge builds are recommended. You can find these at
|
||||
<a href="http://tidy.sourceforge.net">http://tidy.sourceforge.net</a>.
|
||||
People continue to report examples where Tidy does not catch some
|
||||
ill-formed HTML or, worse, generates ill-formed HTML. These cases have
|
||||
been significantly reduced. That said, be sure to test Tidy with some
|
||||
representative files from your environment.</p>
|
||||
|
||||
<p>For development work, use CVS directly on your development
|
||||
system. For information on how to pull Tidy sources from <a
|
||||
href="http://sourceforge.net/cvs/?group_id=27659">CVS</a>. This way
|
||||
you can keep abreast of changes to Tidy and quickly resolve
|
||||
conflicts.</p>
|
||||
|
||||
<p>For building a front end (e.g. GUI or language binding), the
|
||||
simplest approach is to use TidyLib. For more information
|
||||
about building and coding with TidyLib, see the <a
|
||||
href="http://tidy.sourceforge.net/libintro.html">Introduction To TidyLib</a>.</p>
|
||||
</dd>
|
||||
|
||||
<dt><a name="regression" id="regression">How Do I Run A
|
||||
Regression Test?</a></dt>
|
||||
<dd>
|
||||
<p>You might ask, "Why should I run a regression test?". If you
|
||||
are a Tidy user, you might want to compare a new version of Tidy
|
||||
to the version you are currently running. This is a good idea
|
||||
if you are using Tidy in production applications such as web
|
||||
publishing. If you are a Tidy developer, it is a good idea to
|
||||
run the regression test suite to make sure your fix or enhancement
|
||||
doesn't add new bugs.</p>
|
||||
|
||||
<p>Detecting new bugs is easier said than done, because sometimes
|
||||
they are subtle and can only be seen in browsers (or one particular
|
||||
browser you don't even have). But you can catch most crashes and
|
||||
many layout problems by running the test suite as described here.</p>
|
||||
|
||||
<p>The basic process is simple: run the test suite <strong>before</strong>
|
||||
and <strong>after</strong> making changes to TidyLib and compare the output
|
||||
markup and messages. Be aware that the test scripts for WinNT/2K/XP
|
||||
(alltest.cmd) and Linux/Unix (testall.sh) place the output files in
|
||||
<code>tidy/test/tmp</code>. If you forget to run the <strong>before</strong>
|
||||
test, you can always download a binary from the <a
|
||||
href="http://tidy.sourceforge.net/#binaries">Project Page</a>. If you
|
||||
are not a TidyLib developer, you can download the <a
|
||||
href="http://tidy.sourceforge.net/test/tidy_test.tgz">Test Suite</a>
|
||||
directly. Here are the steps to evaluate the impact of a TidyLib change.</p>
|
||||
|
||||
<h3>For Windows</h3>
|
||||
<p><strong>Before</strong> making changes:</p>
|
||||
<pre>
|
||||
C:\tidy\test> alltest.cmd
|
||||
C:\tidy\test> ren tmp baseline
|
||||
</pre>
|
||||
|
||||
<p><strong>After</strong> making changes and building Tidy:</p>
|
||||
<pre>
|
||||
C:\tidy\test> alltest.cmd
|
||||
C:\tidy\test> windiff tmp baseline
|
||||
</pre>
|
||||
|
||||
<h3>For Linux/Unix</h3>
|
||||
<p><strong>Before</strong> making changes:</p>
|
||||
<pre>
|
||||
~/tidy/test$ ./testall.sh
|
||||
~/tidy/test$ mv tmp baseline
|
||||
</pre>
|
||||
|
||||
<p><strong>After</strong> making changes and building Tidy:</p>
|
||||
<pre>
|
||||
~/tidy/test$ ./testall.sh
|
||||
~/tidy/test$ diff -u tmp baseline > diff.txt
|
||||
</pre>
|
||||
</dd>
|
||||
|
||||
<!--
|
||||
<dt><a name="" id=""></a></dt>
|
||||
<dd>
|
||||
</dd>
|
||||
|
||||
<dt><a name="" id=""></a></dt>
|
||||
<dd>
|
||||
</dd>
|
||||
-->
|
||||
<!-- Save for future questions
|
||||
<dt><a name="" id=""></a></dt>
|
||||
<dd>
|
||||
</dd>
|
||||
-->
|
||||
</dl>
|
||||
</body>
|
||||
</html>
|
BIN
htmldoc/grid.gif
Normal file
BIN
htmldoc/grid.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.5 KiB |
50
htmldoc/license.html
Normal file
50
htmldoc/license.html
Normal file
|
@ -0,0 +1,50 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML Tidy License</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<pre>
|
||||
HTML Tidy
|
||||
|
||||
HTML parser and pretty printer
|
||||
|
||||
Copyright (c) 1998-2003 World Wide Web Consortium
|
||||
(Massachusetts Institute of Technology, European Research
|
||||
Consortium for Informatics and Mathematics, Keio University).
|
||||
All Rights Reserved.
|
||||
|
||||
This software and documentation is provided "as is," and
|
||||
the copyright holders and contributing author(s) make no
|
||||
representations or warranties, express or implied, including
|
||||
but not limited to, warranties of merchantability or fitness
|
||||
for any particular purpose or that the use of the software or
|
||||
documentation will not infringe any third party patents,
|
||||
copyrights, trademarks or other rights.
|
||||
|
||||
The copyright holders and contributing author(s) will not be held
|
||||
liable for any direct, indirect, special or consequential damages
|
||||
arising out of any use of the software or documentation, even if
|
||||
advised of the possibility of such damage.
|
||||
|
||||
Permission is hereby granted to use, copy, modify, and distribute
|
||||
this source code, or portions hereof, documentation and executables,
|
||||
for any purpose, without fee, subject to the following restrictions:
|
||||
|
||||
1. The origin of this source code must not be misrepresented.
|
||||
2. Altered versions must be plainly marked as such and must
|
||||
not be misrepresented as being the original source.
|
||||
3. This Copyright notice may not be removed or altered from any
|
||||
source or altered source distribution.
|
||||
|
||||
The copyright holders and contributing author(s) specifically
|
||||
permit, without fee, and encourage the use of this source code
|
||||
as a component for supporting the Hypertext Markup Language in
|
||||
commercial products. If you use this source code in a product,
|
||||
acknowledgment is not required but would be appreciated.
|
||||
</pre>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
554
htmldoc/pending.html
Normal file
554
htmldoc/pending.html
Normal file
|
@ -0,0 +1,554 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta name="generator" content="HTML Tidy, see www.w3.org" />
|
||||
<title>HTML TIDY - Notes on pending work</title>
|
||||
<meta name="keywords"
|
||||
content="HTML, validation, error correction, pretty-printing" />
|
||||
<meta name="author" content="Dave Raggett <dsr@w3.org>" />
|
||||
<style type="text/css">
|
||||
body {
|
||||
margin-left: 10%;
|
||||
margin-right: 10%;
|
||||
font-family: sans-serif
|
||||
}
|
||||
h1 { margin-left: -8% }
|
||||
h2,h3,h4,h5,h6 { margin-left: -4% }
|
||||
pre { color: green; font-weight: bold;
|
||||
font-size: 80%; font-family: monospace}
|
||||
em { font-style: italic; font-weight: bold }
|
||||
strong { text-transform: uppercase; font-weight: bold }
|
||||
.note {font-style: italic; color: rgb(192, 101, 101) }
|
||||
//hr {text-align: center; width: 60% }
|
||||
blockquote {
|
||||
color: navy;
|
||||
margin-left: 1%;
|
||||
margin-right: 1%;
|
||||
text-align: center;
|
||||
font-family: "Comic Sans MS", "Times New Roman", serif
|
||||
}
|
||||
table {
|
||||
font-family: sans-serif;
|
||||
font-size: 80%;
|
||||
background: rgb(255,255,153)
|
||||
}
|
||||
td {
|
||||
font-size: 80%
|
||||
}
|
||||
.people {font-family: "Lucida Calligraphy", serif}
|
||||
:link { color: rgb(0, 0, 153) }
|
||||
:visited { color: rgb(153, 0, 153) }
|
||||
:active { color: rgb(255, 0, 102) }
|
||||
a :hover { color: rgb(0, 0, 255) }
|
||||
</style>
|
||||
|
||||
<style type="text/css">
|
||||
p.c1 {font-style: italic}
|
||||
</style>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" background="grid.gif" text="black"
|
||||
link="navy" vlink="black" alink="red">
|
||||
<h1>HTML TIDY - Notes on Pending Work</h1>
|
||||
|
||||
<p><a href="http://www.w3.org/People/Raggett">Dave Raggett</a> <a
|
||||
href="mailto:dsr@w3.org">dsr@w3.org</a></p>
|
||||
|
||||
<p>This is a page where I am keeping the suggestions for
|
||||
improvements or bug fixes. My current work load means that I
|
||||
don't get much time to work on HTML Tidy, so I am interested in
|
||||
offers of help!</p>
|
||||
|
||||
<h4>Public Email List for Tidy: <<a
|
||||
href="mailto:html-tidy@w3.org">html-tidy@w3.org</a>></h4>
|
||||
|
||||
<p>I have set up an archived mailing list devoted to Tidy. To
|
||||
subscribe send an email to html-tidy-request@w3.org with the word
|
||||
subscribe in the subject line (include the word unsubscribe if
|
||||
you want to unsubscribe). The <a
|
||||
href="http://lists.w3.org/Archives/Public/html-tidy/">archive</a>
|
||||
for this list is accessible online. Please use this list to
|
||||
report errors or enhancement requests.</p>
|
||||
|
||||
<h2>Things awaiting further attention</h2>
|
||||
|
||||
<ul>
|
||||
<li>Support for BIG5 and ShiftJIS (Rick Jelliffe)</li>
|
||||
|
||||
<li>Stronger checking on which attributes appear on what
|
||||
elements</li>
|
||||
|
||||
<li>Sorting attributes in a canonical order</li>
|
||||
|
||||
<li>Version checking for HTML 4.01 vs 4.0 (Tidy currently will
|
||||
set the document type to 4.01 in preference to 4.0)</li>
|
||||
|
||||
<li>Noticing that the document isn't really XHTML if it isn't
|
||||
wellformed, i.e. it lacks end tags and quotes on attribute
|
||||
values</li>
|
||||
|
||||
<li>Converting <font face="Symbol">a</font> etc. to
|
||||
the corresponding Unicode characters, when cleaning HTML.</li>
|
||||
|
||||
<li>link checking - this would involve some platform dependent
|
||||
code as the network interface varies significantly from one
|
||||
platform to the next.</li>
|
||||
|
||||
<li>When exporting Word2000 to Web page, there is a need for
|
||||
smarter rules of thumb for working out whether the paragraph is a
|
||||
bulletted or numbered list item, and determining the level of
|
||||
nesting. Perhaps the style attribute holds the key? This tends to
|
||||
include substrings like: "mso-list:l0 level1 lfo2;" and
|
||||
"mso-list:l1 level1 lfo1;". Unfortunately, these aren't always
|
||||
present, and I have yet to figure out a foolproof heuristic.</li>
|
||||
</ul>
|
||||
|
||||
<p>I need to set up an index of precisely what attributes are
|
||||
supported on each element. Right now, some elements check their
|
||||
own attributes, whilst others are checked via default checks
|
||||
defined for each attribute independently of the element. Until
|
||||
this is done, you sometimes find that validation services
|
||||
discovering errors unnoticed by Tidy itself.</p>
|
||||
|
||||
<p>Jelks Cabaniss asks: <i>Could Tidy be made to automatically
|
||||
"clean" (FONTs to CSS) if the Strict DOCTYPE is requested? An
|
||||
HTML or XHTML Strict document can't have FONT tags according to
|
||||
the DTDs</i>. Jelks has a bunch of other good ideas such as
|
||||
converting the bgcolor attribute over to CSS.</p>
|
||||
|
||||
<p>Adding an option to select slide transition effects. I would
|
||||
also like to provide an optional feature for sorting attribute
|
||||
values.</p>
|
||||
|
||||
<p>I am having problems with form elements as direct children of
|
||||
tr or table. It is dangerous to create an implicit table cell,
|
||||
and what is needed is a way to move the form element into the
|
||||
next cell. If this can't be done an error needs to be raised
|
||||
since Tidy will be stuck. On a separate note, Tidy is still
|
||||
breaking lines between <img> and </a> which in
|
||||
Netscape shows as an underlined space. It's fine in IE.</p>
|
||||
|
||||
<p>Benjamin Holzman <bah@orientation.com> writes: I'm
|
||||
wrapping tidy (release-date 2000.01.13) in some perl objects
|
||||
(using SWIG), and CharEncoding being a global is a bit of a pain.
|
||||
I was wondering what your thoughts would be on how to fix that.
|
||||
The character encoding is already a property of struct Out; is
|
||||
there any reason why making it part of struct StreamIn as well,
|
||||
and perhaps setting that property in OpenInput, based on the
|
||||
existing CharEncoding variable, wouldn't allow us to move
|
||||
CharEncoding to be local to main?</p>
|
||||
|
||||
<p>Oh, in case you're curious about the API, here's a short
|
||||
script using my wrappers to be an html to xhtml filter:</p>
|
||||
|
||||
<pre>
|
||||
#!/usr/bin/perl
|
||||
|
||||
require tidy;
|
||||
|
||||
my $tidy = Tidy->new(*STDIN);
|
||||
my $document = $tidy->parse;
|
||||
$tidy->as_xhtml(*STDOUT);
|
||||
</pre>
|
||||
|
||||
<p>Rick Parsons would like there to be a new wrap-attributes
|
||||
option that can be used to suppress line wrapping within
|
||||
attributes. There is already a similar option for JavaScript
|
||||
literals.</p>
|
||||
|
||||
<p>Vijay Patil would like tidy -h to display options sorted
|
||||
alphabetically.</p>
|
||||
|
||||
<p>Julian Reschke would like there to be an option to add the
|
||||
xml:space="preserve" attribute to pre elements when outputting
|
||||
xml.</p>
|
||||
|
||||
<p>Armando Asantos would like to use Tidy to produce a list of
|
||||
URLs for images or hypertext links according to a config option.
|
||||
This would be straightforward, but is a lower priority than bug
|
||||
fixes etc.</p>
|
||||
|
||||
<p>Omri Traub would like an option to wrap the contents of style
|
||||
and script elements in CDATA marked sections when converting to
|
||||
XHTML. He is also interested in direct support for 16 bit
|
||||
character file I/O.</p>
|
||||
|
||||
<p>Bertilo Wennergren notes:</p>
|
||||
|
||||
<blockquote>If I configure Tidy to "upgrade to style sheets", it
|
||||
does so for a few things in my main document, but the code thus
|
||||
created get error reports if I feed it back to Tidy. It turns out
|
||||
that Tidy creates extra "class" attributes on tags that already
|
||||
have "class" attributes set. This happens with this page:
|
||||
<http://www.concinnity.se/bertilow/index.htm>.</blockquote>
|
||||
|
||||
<p>Randi Waki notes:</p>
|
||||
|
||||
<blockquote>
|
||||
<p>If a quoted URL attribute value (e.g., href in <a>
|
||||
elements) contains a line break, 13-Jan-2000 Tidy changes the
|
||||
line break to a space while IE and Netscape discard the line
|
||||
break. This can result in a broken link in the tidied
|
||||
document.</p>
|
||||
|
||||
<p>I believe the following change fixes the problem. In lexer.c,
|
||||
insert the following lines before line 2502:</p>
|
||||
|
||||
<pre>
|
||||
/* discard line breaks in quoted URLs */
|
||||
if (c == '\n' && IsUrl(name))
|
||||
continue;
|
||||
|
||||
/* existing line 2502 */ c = ' ';
|
||||
</pre>
|
||||
</blockquote>
|
||||
|
||||
<p>Stephen Reynolds would like Tidy to keep track of whether a
|
||||
comment started on a new line and preserve this in the
|
||||
output.</p>
|
||||
|
||||
<p>Terry Teague says:</p>
|
||||
|
||||
<blockquote>
|
||||
<p>Sorry, I should have been more clear. Part of the problem is
|
||||
the current HelpText() function in localize.c doesn't actually
|
||||
reflect current reality.</p>
|
||||
|
||||
<p>You need to at least add the following line to HelpText()
|
||||
:</p>
|
||||
|
||||
<pre>
|
||||
tidy_out(out, " -version or -v show version\n");
|
||||
</pre>
|
||||
|
||||
<p>And I suppose it should mention the use of the new
|
||||
"--<config options>" type syntax.</p>
|
||||
|
||||
<p>Regards, Terry</p>
|
||||
</blockquote>
|
||||
|
||||
<p>John Russel notes:</p>
|
||||
|
||||
<pre>
|
||||
what i wonder is
|
||||
1] does the specification indicate these are WRONG
|
||||
2] if so why do they pass thru tidy ....
|
||||
is url syntax such a can of worms that it is left to user
|
||||
to check .......
|
||||
|
||||
CASE 1: misuse of slash for folders
|
||||
site had background="pics\fancy.jpg"
|
||||
instead of "pics/fancy.jpg"
|
||||
|
||||
CASE 2: spaces in filename
|
||||
site had href="coin album.html"
|
||||
instead of "coin%20album.html"
|
||||
</pre>
|
||||
|
||||
<p>Andre Stechert would like a way to prevent Tidy from
|
||||
"cleaning" newly declared elements which don't have any content
|
||||
but do have end tags, see his mail of 17th January 2000</p>
|
||||
|
||||
<p>Todd Clark would like to use Tidy with Microsoft's WebClass
|
||||
tags. Unfortunately these include unusual characters in the tag
|
||||
names such as @ which Tidy objects to, for instance:</p>
|
||||
|
||||
<pre>
|
||||
<WC@DOMAINNAME>test.com</WC@DOMAINNAME>
|
||||
</pre>
|
||||
|
||||
<p>Perhaps it makes sense to offer an option to make Tidy less
|
||||
picky about what characters it accepts in tag names. Or perhaps
|
||||
"WebClass: yes".</p>
|
||||
|
||||
<p>Jelks Cabaniss suggests an option to control dropping of empty
|
||||
elements, e.g. according to what attributes they have.</p>
|
||||
|
||||
<p>Paavo Hartikainen writes:</p>
|
||||
|
||||
<blockquote>
|
||||
<p>Tidy always expands '&' to '&' even if I have
|
||||
'quote-ampersand: no' defined in configuration file. This is not
|
||||
a good thing to do for URLs that have '&' characters in them.
|
||||
OS is Debian GNU/Linux 2.1 SPARC. Same thing happens on Alpha.
|
||||
Other architectures I have not tried.</p>
|
||||
|
||||
<p>My configuration looks like this:</p>
|
||||
|
||||
<pre>
|
||||
char-encoding: latin1
|
||||
error-file: ./errors
|
||||
indent-spaces: 2
|
||||
logical-emphasis: yes
|
||||
output-xhtml: yes
|
||||
quiet: no
|
||||
quote-ampersand: no
|
||||
show-warnings: yes
|
||||
tidy-mark: yes
|
||||
wrap: 78
|
||||
wrap-attributes: no
|
||||
write-back: yes
|
||||
keep-time: yes
|
||||
</pre>
|
||||
</blockquote>
|
||||
|
||||
<p>Paul White reports that Tidy isn't recognizing HTML 3.2 when
|
||||
the doctype is "-//W3C//DTD HTML 3.2 Final//EN" (as per the REC),
|
||||
and similarly for HTML 4.01. This would appear to call for a
|
||||
change to the table of names in lexer.c.</p>
|
||||
|
||||
<p>Stuart Hungerford would like Tidy to detect and fix duplicate
|
||||
attributes e.g. multiple class attributes. Celeste Suliin Burris
|
||||
would like Tidy to replace spaces in URLs by %20 as some versions
|
||||
of Netscape "croak big time" on this. Denis Kokarev also wants
|
||||
Tidy to remove duplicate attributes when the values are the same.
|
||||
This apparently stops XSLT from working. Brian Schweitzer notes
|
||||
that Tidy adds a 2nd class attribute rather than merging the
|
||||
classes into a space separated list.</p>
|
||||
|
||||
<p>Bertilo Wennergren writes: Tidy seems not to recognize frame
|
||||
elements with a closing "/". It actually removes them. Try his <a
|
||||
href="http://www.concinnity.se/bertilow/pmeg/pmeg9/k_bazo.htm">example</a>.
|
||||
Tidy can produce XHTML Frameset docs, but when fed them back</p>
|
||||
|
||||
<p>again it cries foul.</p>
|
||||
|
||||
<p>Jose Manuel Cerqueira Esteves notes:</p>
|
||||
|
||||
<pre>
|
||||
I've used `tidy' to convert a few HTML 4.0 files to XHTML 1.0 and noticed
|
||||
a problem when dealing with constructs like
|
||||
|
||||
<small><small>some text</small></small>
|
||||
|
||||
First, `tidy' acts as if the second "<small>" was meant as a closing tag:
|
||||
|
||||
Warning: "<small> is probably intended as </small>"
|
||||
|
||||
Then it trims the resulting empty <small></small>:
|
||||
|
||||
Warning: trimming empty <small>
|
||||
|
||||
And finally both remaining closing tags ("</small>"), now spurious,
|
||||
are removed:
|
||||
|
||||
Warning: discarding unexpected </small>
|
||||
Warning: discarding unexpected </small>
|
||||
|
||||
It would be convenient to have at least some `tidy' option to prevent this
|
||||
from happening (or perhaps some different heuristics?).
|
||||
</pre>
|
||||
|
||||
<p>Robbert Hans Baron would like to see Tidy warning about
|
||||
duplicate attributes and fixing these when the values are
|
||||
identical.</p>
|
||||
|
||||
<p>Jutta Wrage notes that: When parsing HTML 3.2 Pages, tidy
|
||||
doesn't accept textareas in forms correctly. The HTML Reference
|
||||
specification (HTML 3.2 Final) allows: name, rows and cols, but
|
||||
upon seeing these Tidy thinks the document is 4.0.</p>
|
||||
|
||||
<p>Matthew Brealey notes that a heading start tag is coerced to
|
||||
an end heading tag when the end tag is missing. This is
|
||||
deliberate, but perhaps not the best heuristic.</p>
|
||||
|
||||
<p>HIYAMA Masayuki notes that Tidy should set the encoding
|
||||
attribute to match the language encoding, e.g. ?xml version="1.0"
|
||||
encoding="iso-2022-jp"?><.</p>
|
||||
|
||||
<p>Mark Modrall has extended Tidy to support selectively
|
||||
stripping out listed tags and attributes, see his email of March
|
||||
14th.</p>
|
||||
|
||||
<p>Yong Taek Bae notes that with the omit end tags option Tidy
|
||||
omits the body tag even if it has attributes. This is an
|
||||
error.</p>
|
||||
|
||||
<p>Tapio Markula reports that Tidy is incorrectly replacing
|
||||
accented characters in script elements by entities. The script
|
||||
element (in HTML but not XHTML) is CDATA and as such entities
|
||||
won't be expanded. This bug needs to be fixed along with the
|
||||
support for CDATA sections.</p>
|
||||
|
||||
<p>Terrill Bennett reports tidy crashing when producing slides,
|
||||
and when the -i option has been set. He later added the crash
|
||||
occurs when the page doesn't include an h1 element. See
|
||||
Terrill-Bennett-11mar00.txt.</p>
|
||||
|
||||
<p>Stephen Lewis notes that if an <hr> element is present
|
||||
in the head before the title element, then Tidy gets confused and
|
||||
adds in a spurious extra empty title element. This would be
|
||||
avoided if Tidy could move the hr into the body before the body
|
||||
element is encountered. This raises a number of problems for
|
||||
instance working out when to copy in attributes from an explicit
|
||||
body element.</p>
|
||||
|
||||
<p>Carl Osterly would like Tidy to avoid breaking lines before or
|
||||
after the = sign in attribute values when this is practical.
|
||||
Perhaps a simple rule of thumb could be used to decide this?</p>
|
||||
|
||||
<p>Rick H Wesson notes that Tidy crashes on CDATA marked sections
|
||||
when parsing XML.</p>
|
||||
|
||||
<p>Luigi Federici would like an option to set the DTD URI for XML
|
||||
or XHTML.</p>
|
||||
|
||||
<p>Mat Sander notes: If I have php code the indentation behaves
|
||||
strange. Repeated tidying php content and end tag indented one
|
||||
level extra for each time. The result ends up something like
|
||||
this:</p>
|
||||
|
||||
<pre>
|
||||
...
|
||||
<?php
|
||||
$r=0;
|
||||
?<
|
||||
...
|
||||
|
||||
I have the fillowing config file for Tidy:
|
||||
---
|
||||
tidy-mark: no
|
||||
markup: yes
|
||||
wrap: 0
|
||||
indent: auto
|
||||
output-xml: no
|
||||
output-xhtml: yes
|
||||
doctype: loose
|
||||
char-encoding: latin1
|
||||
quote-marks: yes
|
||||
assume-xml-procins: yes
|
||||
word-2000: yes
|
||||
clean: yes
|
||||
logical-emphasis: yes
|
||||
drop-empty-paras: yes
|
||||
enclose-text: yes
|
||||
fix-bad-comments: yes
|
||||
alt-text: .
|
||||
write-back: bool
|
||||
keep-time: yes
|
||||
show-warnings: no
|
||||
quiet: yes
|
||||
split: no
|
||||
---
|
||||
|
||||
Best Regards,
|
||||
Mats-Olof Sander
|
||||
|
||||
</pre>
|
||||
|
||||
<p>Don Hasson notes that if you make a mistake and leave off the
|
||||
ending "/" in the <title> tag, tidy will generate an extra
|
||||
set of <title>s.</p>
|
||||
|
||||
<p>Example:</p>
|
||||
|
||||
<pre>
|
||||
<html>
|
||||
<head><title>No end here<title></head>
|
||||
<body>
|
||||
Empty
|
||||
</body>
|
||||
</html>
|
||||
|
||||
</pre>
|
||||
|
||||
<p>produces this:</p>
|
||||
|
||||
<pre>
|
||||
<html>
|
||||
<head>
|
||||
<title>No end here</title>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
Empty
|
||||
</body>
|
||||
</html>
|
||||
|
||||
</pre>
|
||||
|
||||
<p>Jeff Wilkinson would like the HTML Tidy page to include
|
||||
internal anchors so that he can link directly to the appropriate
|
||||
sections.</p>
|
||||
|
||||
<p>Peter Vince would like to be able to clean presentation
|
||||
attributes on the body element, as well as translating b and i to
|
||||
span.</p>
|
||||
|
||||
<p>Dave Bryan and Mathew Brealey would like there to be a way to
|
||||
suppress the default handling of inline elements in favor of
|
||||
simply inserting the appropriate end tag when encountering an
|
||||
element that isn't allowed in an inline context. The default
|
||||
behavior replicates the rendering on existing browsers but can
|
||||
cause problems for hand editors.</p>
|
||||
|
||||
<p>Dave Bryan notes that tidy isn't updating the column position
|
||||
when parsing attributes.</p>
|
||||
|
||||
<p>Can Tidy track when a line break occurs after a PI or comment
|
||||
and reproduce this in the output? This idea occurred to me after
|
||||
reading a comment from Brad Stowers.</p>
|
||||
|
||||
<p>One interesting suggestion is to make some of Tidy's rules of
|
||||
thumb sensitive to the program that generated the markup as
|
||||
indicated by the meta element. This would allow for greater
|
||||
robustness in how the rules operate.</p>
|
||||
|
||||
<p>Dave Bryan would like the quiet mode to be tweaked to suppress
|
||||
the general info at the end of the report. see
|
||||
Dave-Bryan-24mar00.txt.</p>
|
||||
|
||||
<p>Erik Rossen would like an option to suppress line wrap within
|
||||
tags, so that the tag is always on the same line regardless of
|
||||
the number and length of the attributes.</p>
|
||||
|
||||
<p>Dan Satria suggest that the clean mechanism check to see if
|
||||
there are any existing matching style rules before adding new
|
||||
ones.</p>
|
||||
|
||||
<p>Zoltan Hawryluk suggests mapping the Netscape layer tag into
|
||||
the equivalent CSS positioning syntax.</p>
|
||||
|
||||
<p>Jim Walker says Tidy doesn't correctly report errors such as
|
||||
<tt></</head></tt>.</p>
|
||||
|
||||
<p>Tidy's slide feature: see Johannes-Poutre-12jul00.txt</p>
|
||||
|
||||
<p>Carole Mah suggests Tidy should recover from multiple class
|
||||
attributes on the same element.</p>
|
||||
|
||||
<h2>Other ideas</h2>
|
||||
|
||||
<ul>
|
||||
<li>Recursion through subdirectories, so you can fix up your
|
||||
entire web site at one go. This assumes I can find a way that is
|
||||
portable across a wide range of platforms!</li>
|
||||
|
||||
<li>Support for W3C's <a
|
||||
href="http://www.w3.org/TR/REC-DOM-Level-1/">Document Object
|
||||
Model</a> (DOM) level one.</li>
|
||||
|
||||
<li>Full validation of all attribute values.</li>
|
||||
|
||||
<li>Mapping Unicode bidi control characters to HTML tags.</li>
|
||||
|
||||
<li>Full support for parsing XML (still somewhat limited).</li>
|
||||
|
||||
<li>How to say which XML elements should be printed
|
||||
"inline".</li>
|
||||
|
||||
<li>Acting on the XML encoding attribute, e.g.
|
||||
<?xml encoding="iso-8859-1"></li>
|
||||
|
||||
<li>Improved mapping from HTML presentation attributes/elements
|
||||
to CSS.</li>
|
||||
|
||||
<li>Improved support for <a
|
||||
href="http://java.sun.com/products/jsp/">JSP</a> (Java Server
|
||||
pages)</li>
|
||||
|
||||
<li>Ugly print option which removes all optional whitespace</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
|
247
htmldoc/quickref-html.xsl
Normal file
247
htmldoc/quickref-html.xsl
Normal file
|
@ -0,0 +1,247 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
For generating the `quickref.html` web page from output of
|
||||
`tidy -xml-config`
|
||||
|
||||
(c) 2005 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Written by Charles Reitzel and Jelks Cabaniss
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2005/04/13 11:10:58 $
|
||||
$Revision: 1.2 $
|
||||
|
||||
-->
|
||||
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns="http://www.w3.org/1999/xhtml"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="xml" indent="yes"
|
||||
encoding="us-ascii"
|
||||
omit-xml-declaration="yes"
|
||||
doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd" />
|
||||
|
||||
<xsl:template match="/">
|
||||
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>HTML Tidy Configuration Options Quick Reference</title>
|
||||
<link type="text/css" rel="stylesheet" href="tidy.css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1 id="top">Quick Reference</h1>
|
||||
|
||||
<h2>HTML Tidy Configuration Options</h2>
|
||||
|
||||
<p><em>Generated automatically with HTML Tidy released
|
||||
on <xsl:value-of select="config/@version" />.</em></p>
|
||||
|
||||
<p><a class="h3" href="#MarkupHeader">HTML, XHTML, XML</a><br />
|
||||
<a class="h3" href="#DiagnosticsHeader">Diagnostics</a><br />
|
||||
<a class="h3" href="#PrettyPrintHeader">Pretty Print</a><br />
|
||||
<a class="h3" href="#EncodingHeader">Character Encoding</a><br />
|
||||
<a class="h3" href="#MiscellaneousHeader">Miscellaneous</a></p>
|
||||
|
||||
<xsl:call-template name="link-section" />
|
||||
|
||||
<xsl:call-template name="detail-section" />
|
||||
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Named Templates: -->
|
||||
|
||||
<xsl:template name="link-section">
|
||||
<table summary="Tidy Options Quick Reference Header Section" border="0"
|
||||
cellpadding="3" cellspacing="0">
|
||||
<colgroup>
|
||||
<col width="33%" />
|
||||
<col width="33%" />
|
||||
<col width="33%" />
|
||||
</colgroup>
|
||||
<xsl:call-template name="links">
|
||||
<xsl:with-param name="class">markup</xsl:with-param>
|
||||
<xsl:with-param name="header">HTML, XHTML, XML</xsl:with-param>
|
||||
<xsl:with-param name="headerID">MarkupHeader</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="links">
|
||||
<xsl:with-param name="class">diagnostics</xsl:with-param>
|
||||
<xsl:with-param name="header">Diagnostics</xsl:with-param>
|
||||
<xsl:with-param name="headerID">DiagnosticsHeader</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="links">
|
||||
<xsl:with-param name="class">print</xsl:with-param>
|
||||
<xsl:with-param name="header">Pretty Print</xsl:with-param>
|
||||
<xsl:with-param name="headerID">PrettyPrintHeader</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="links">
|
||||
<xsl:with-param name="class">encoding</xsl:with-param>
|
||||
<xsl:with-param name="header">Character Encoding</xsl:with-param>
|
||||
<xsl:with-param name="headerID">EncodingHeader</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="links">
|
||||
<xsl:with-param name="class">misc</xsl:with-param>
|
||||
<xsl:with-param name="header">Miscellaneous</xsl:with-param>
|
||||
<xsl:with-param name="headerID">MiscellaneousHeader</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
</table>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="detail-section">
|
||||
<table summary="Tidy Options Quick Reference Detail Section" border="0"
|
||||
cellpadding="3" cellspacing="0">
|
||||
<xsl:call-template name="reference">
|
||||
<xsl:with-param name="class">markup</xsl:with-param>
|
||||
<xsl:with-param name="header">HTML, XHTML, XML</xsl:with-param>
|
||||
<xsl:with-param name="headerID">MarkupReference</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="reference">
|
||||
<xsl:with-param name="class">diagnostics</xsl:with-param>
|
||||
<xsl:with-param name="header">Diagnostics</xsl:with-param>
|
||||
<xsl:with-param name="headerID">DiagnosticsReference</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="reference">
|
||||
<xsl:with-param name="class">print</xsl:with-param>
|
||||
<xsl:with-param name="header">Pretty Print</xsl:with-param>
|
||||
<xsl:with-param name="headerID">PrettyPrintReference</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="reference">
|
||||
<xsl:with-param name="class">encoding</xsl:with-param>
|
||||
<xsl:with-param name="header">Character Encoding</xsl:with-param>
|
||||
<xsl:with-param name="headerID">EncodingReference</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name="reference">
|
||||
<xsl:with-param name="class">misc</xsl:with-param>
|
||||
<xsl:with-param name="header">Miscellaneous</xsl:with-param>
|
||||
<xsl:with-param name="headerID">MiscellaneousReference</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
</table>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="links">
|
||||
<xsl:param name="class"/>
|
||||
<xsl:param name="header"/>
|
||||
<xsl:param name="headerID"/>
|
||||
<tr valign="bottom">
|
||||
<td class="h3" colspan="2" id="{$headerID}">
|
||||
<xsl:value-of select="$header"/> Options</td>
|
||||
<td valign="top"><a href="#top">Top</a></td>
|
||||
</tr>
|
||||
<xsl:call-template name="ClassHeaders" />
|
||||
<xsl:for-each select="/config/option[@class=$class]">
|
||||
<xsl:sort select="name" order="ascending" />
|
||||
<tr>
|
||||
<td><a href="#{name}"><xsl:value-of select="name"/></a></td>
|
||||
<td><xsl:apply-templates select="type"/></td>
|
||||
<td><xsl:choose>
|
||||
<xsl:when test="string-length(default) > 0 ">
|
||||
<xsl:apply-templates select="default" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<strong>-</strong>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:for-each>
|
||||
<tr valign="bottom">
|
||||
<td colspan="3"> </td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="reference">
|
||||
<xsl:param name="class"/>
|
||||
<xsl:param name="header"/>
|
||||
<xsl:param name="headerID"/>
|
||||
|
||||
<tr>
|
||||
<td> </td>
|
||||
</tr>
|
||||
<tr valign="bottom">
|
||||
<td valign="top" colspan="2" class="h2" id="{$headerID}">
|
||||
<xsl:value-of select="$header"/> Options Reference
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> </td>
|
||||
</tr>
|
||||
|
||||
<xsl:for-each select="/config/option[@class=$class]">
|
||||
<xsl:sort select="name" order="ascending" />
|
||||
<tr>
|
||||
<td class="tabletitle" valign="top" id="{name}">
|
||||
<xsl:value-of select="name"/>
|
||||
</td>
|
||||
<td class="tabletitlelink" valign="top" align="right">
|
||||
<a href="#top">Top</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top">Type: <strong><xsl:value-of
|
||||
select="type"/></strong><br />
|
||||
|
||||
<xsl:choose>
|
||||
<xsl:when test="string-length(default) > 0">
|
||||
Default: <strong><xsl:apply-templates select="default" /></strong>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
Default: <strong>-</strong>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
|
||||
<xsl:choose>
|
||||
<xsl:when test="string-length(example) > 0">
|
||||
<br />Example: <strong><xsl:apply-templates
|
||||
select="example"/></strong>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<br />Example: <strong>-</strong>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</td>
|
||||
<td align="right" valign="top">
|
||||
<xsl:for-each select="seealso">
|
||||
<a href="#{.}"><xsl:apply-templates select="." /></a>
|
||||
<xsl:if test="position() != last()">
|
||||
<br />
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="2"><xsl:apply-templates select="description"/></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td> </td>
|
||||
</tr>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="ClassHeaders">
|
||||
<tr>
|
||||
<td class="tabletitle">Option</td>
|
||||
<td class="tabletitle">Type</td>
|
||||
<td class="tabletitle">Default</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Regular Templates: -->
|
||||
<xsl:template match="a | code | em | strong | br">
|
||||
<xsl:element name="{local-name(.)}">
|
||||
<xsl:copy-of select="@* | node()" />
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
1771
htmldoc/release-notes.html
Normal file
1771
htmldoc/release-notes.html
Normal file
File diff suppressed because it is too large
Load diff
329
htmldoc/tidy.css
Normal file
329
htmldoc/tidy.css
Normal file
|
@ -0,0 +1,329 @@
|
|||
/* 1st Style ignored by Netscape */
|
||||
td.dummy, font.dummy, .dummy, a:link.dummy, a:visited.dummy, a:active.dummy
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 16pt;
|
||||
color: #336699;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
body
|
||||
{
|
||||
margin-left: 10%;
|
||||
margin-right: 10%;
|
||||
font-family: sans-serif;
|
||||
background-color: #FFFFFF
|
||||
}
|
||||
|
||||
/* Blue TITLE */
|
||||
td.title, font.title, .title, a:link.title, a:visited.title, a:active.title
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 16pt;
|
||||
color: #336699;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
/* BODY TEXT */
|
||||
td.text, font.text, .text, a:link.text, a:visited.text, a:active.text
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 9pt;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
/* BOLD BODY TEXT */
|
||||
td.textbold, font.textbold, .textbold, a:link.textbold, a:visited.textbold, a:active.textbold
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 9pt;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
/* BOLD BODY TEXT LINK WITH UNDERLINE*/
|
||||
td.textboldlink, font.textboldlink, .textboldlink, a:link.textboldlink, a:visited.textboldlink, a:active.textboldlink
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 9pt;
|
||||
color: #000000;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
/* SMALL BODY TEXT */
|
||||
td.smtext, font.smtext, .smtext, a:link.smtext, a:visited.smtext, a:active.smtext
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 8pt;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
/* SMALL BOLD BODY TEXT */
|
||||
td.smtextbold, font.smtextbold, .smtextbold, a:link.smtextbold, a:visited.smtextbold, a:active.smtextbold
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 8pt;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
/* TITLES
|
||||
td.title, font.title, .title, a:link.title, a:visited.title, a:active.title
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 12pt;
|
||||
color: #CC3300;
|
||||
text-decoration: none;
|
||||
font-weight: bold
|
||||
}
|
||||
*/
|
||||
|
||||
/* SUBTITLES */
|
||||
td.subtitle, font.subtitle, .subtitle, a:link.subtitle, a:visited.subtitle, a:active.subtitle
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 11pt;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
/* LEGAL TEXT */
|
||||
td.legal, font.legal, .legal, a:link.legal, a:visited.legal, a:active.legal
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 8pt;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
td.legallink, font.legallink, .legallink, a:link.legallink, a:visited.legallink, a:active.legallink
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 8pt;
|
||||
color: #CC3300;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
/* RED TEXT */
|
||||
td.textred, font.textred, .textred, a:link.textred, a:visited.textred, a:active.textred
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #CC3300;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
/* RED TEXT BOLD*/
|
||||
td.textredbold, font.textredbold, .textredbold, a:link.textredbold, a:visited.textredbold, a:active.textredbold
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #CC3300;
|
||||
text-decoration: none;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
/* LINKS */
|
||||
td.link, font.link, .link, a:link.link, a:visited.link, a:active.link
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #3366CC;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
td.tabletitlelink, font.tabletitlelink, .tabletitlelink, a:link.tabletitlelink, a:visited.tabletitlelink, a:active.tabletitlelink
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
background-color: #e9e9d3;
|
||||
color: #000000;
|
||||
}
|
||||
|
||||
/* TABLE TITLES */
|
||||
td.tabletitle, font.tabletitle, .tabletitle, a:link.tabletitle, a:visited.tabletitle, a:active.tabletitle
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #336699;
|
||||
background-color: #e9e9d3;
|
||||
/* text-decoration: none; */
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
/* TABLE CELL */
|
||||
td.cell, tr.cell, font.cell, .cell, a:link.cell, a:visited.cell, a:active.cell
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 12pt;
|
||||
color: #000000;
|
||||
font-weight: normal;
|
||||
/* background-color: #e9e9d3 */
|
||||
background-color: #f5f5f5
|
||||
}
|
||||
|
||||
/* SHADED TABLE CELL */
|
||||
td.shaded, tr.shaded, font.shaded, .shaded, a:link.shaded, a:visited.shaded, a:active.shaded
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 12pt;
|
||||
color: #000000;
|
||||
font-weight: normal;
|
||||
background-color: #f5f5f5
|
||||
}
|
||||
|
||||
/* GLOSSARY TERM */
|
||||
td.term, font.term, .term, a:link.term, a:visited.term, a:active.term
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
font-style: normal;
|
||||
color: #000000;
|
||||
text-decoration: none;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
/* ELEMENT TAGS */
|
||||
ul
|
||||
{
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
font-size: 10pt;
|
||||
font-style: normal;
|
||||
font-weight: normal
|
||||
}
|
||||
li
|
||||
{
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
font-size: 10pt;
|
||||
font-style: normal;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
a:link.h1, a:visited.h1, .h1
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 12pt;
|
||||
color: #0066CC
|
||||
}
|
||||
a:active.h1
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 12pt;
|
||||
font-weight: bold;
|
||||
color: #0066CC
|
||||
}
|
||||
h1
|
||||
{
|
||||
margin-left: -8%;
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 12pt;
|
||||
color: #0066CC
|
||||
}
|
||||
|
||||
.h2
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 11pt;
|
||||
/* font-weight: bold; */
|
||||
color: #000000
|
||||
}
|
||||
|
||||
h2
|
||||
{
|
||||
margin-left: -4%;
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 11pt;
|
||||
/* font-weight: bold; */
|
||||
color: #000000
|
||||
}
|
||||
|
||||
A:link.h3, A:visited.h3, .h3
|
||||
{ font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #000000;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
A:active.h3
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #000000;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
h3
|
||||
{
|
||||
margin-left: -4%;
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
font-weight: bold;
|
||||
color: #000000
|
||||
}
|
||||
|
||||
h4
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 9pt;
|
||||
font-weight: bold;
|
||||
color: #000000
|
||||
}
|
||||
|
||||
.code, A:active.code, A:link.code, A:visited.code
|
||||
{
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
}
|
||||
|
||||
.abstract
|
||||
{
|
||||
font-style : italic;
|
||||
}
|
||||
|
||||
p
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
font-style: normal
|
||||
}
|
||||
|
||||
td
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
font-style: normal
|
||||
}
|
||||
|
||||
/* LINKS */
|
||||
a:link, a:active
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #3366CC;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
a:visited
|
||||
{
|
||||
font-family: Arial, Helvetica, "Trebuchet MS", sans-serif;
|
||||
font-size: 10pt;
|
||||
color: #333366;
|
||||
font-weight: normal
|
||||
}
|
||||
|
||||
code {
|
||||
/* use browser/user default for `font-family` */
|
||||
font-weight: bold;
|
||||
color: brown;
|
||||
background: transparent;
|
||||
}
|
BIN
htmldoc/tidy.gif
Normal file
BIN
htmldoc/tidy.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 244 B |
423
htmldoc/tidy1.xsl
Normal file
423
htmldoc/tidy1.xsl
Normal file
|
@ -0,0 +1,423 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
For generating the `tidy.1` man page from the
|
||||
output of `tidy -xml-help` and `tidy -xml-config`
|
||||
|
||||
(c) 2005-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Written by Jelks Cabaniss and Arnaud Desitter
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/02/01 12:25:21 $
|
||||
$Revision: 1.8 $
|
||||
|
||||
-->
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:strip-space elements="description" />
|
||||
|
||||
<xsl:output method="text" />
|
||||
|
||||
<!--
|
||||
The default template match is to the document passed on the
|
||||
command line to the XSLT processor, currently "tidy-help.xml".
|
||||
For the detailed config options section however, the template
|
||||
match is to the file "tidy-config.xml". This is captured in
|
||||
the $CONFIG variable, declared here:
|
||||
-->
|
||||
|
||||
<xsl:variable name="CONFIG" select="document('tidy-config.xml')"/>
|
||||
|
||||
|
||||
<!-- Main Template: -->
|
||||
|
||||
<xsl:template match="/">
|
||||
<xsl:call-template name="header-section" />
|
||||
<xsl:call-template name="cmdline-section" />
|
||||
<xsl:call-template name="config-section" />
|
||||
<xsl:call-template name="manpage-see-also-section" />
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Named Templates: -->
|
||||
|
||||
|
||||
<xsl:template name="header-section">
|
||||
<xsl:text/>.\" tidy man page for the Tidy Sourceforge project
|
||||
.TH tidy 1 "$Date: 2007/02/01 12:25:21 $" "HTML Tidy <xsl:value-of select="cmdline/@version" />" "User commands"
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="cmdline-section">
|
||||
.SH NAME
|
||||
\fBtidy\fR - validate, correct, and pretty-print HTML files
|
||||
.br
|
||||
(version: <xsl:value-of select="cmdline/@version" />)
|
||||
.SH SYNOPSIS
|
||||
\fBtidy\fR [option ...] [file ...] [option ...] [file ...]
|
||||
.SH DESCRIPTION
|
||||
Tidy reads HTML, XHTML and XML files and writes cleaned up markup. For HTML variants, it detects and corrects many common coding errors and strives to produce visually equivalent markup that is both W3C compliant and works on most browsers. A common use of Tidy is to convert plain HTML to XHTML. For generic XML files, Tidy is limited to correcting basic well-formedness errors and pretty printing.
|
||||
.LP
|
||||
If no input file is specified, Tidy reads the standard input. If no output file is specified, Tidy writes the tidied markup to the standard output. If no error file is specified, Tidy writes messages to the standard error.
|
||||
For command line options that expect a numerical argument, a default is assumed if no meaningful value can be found.
|
||||
.SH OPTIONS
|
||||
<xsl:call-template name="show-cmdline-options" />
|
||||
.SH USAGE
|
||||
.LP
|
||||
Use \fB--\fR\fIoptionX valueX\fR for the detailed configuration option "optionX" with argument "valueX". See also below under \fBDetailed Configuration Options\fR as to how to conveniently group all such options in a single config file.
|
||||
.LP
|
||||
Input/Output default to stdin/stdout respectively. Single letter options apart from \fB-f\fR and \fB-o\fR may be combined as in:
|
||||
.LP
|
||||
.in 1i
|
||||
\fBtidy -f errs.txt -imu foo.html\fR
|
||||
.LP
|
||||
For further info on HTML see \fIhttp://www.w3.org/MarkUp\fR.
|
||||
.LP
|
||||
For more information about HTML Tidy, visit the project home page at \fIhttp://tidy.sourceforge.net\fR. Here, you will find links to documentation, mailing lists (with searchable archives) and links to report bugs.
|
||||
.SH ENVIRONMENT
|
||||
.TP
|
||||
.B HTML_TIDY
|
||||
Name of the default configuration file. This should be an absolute path, since you will probably invoke \fBtidy\fR from different directories. The value of HTML_TIDY will be parsed after the compiled-in default (defined with -DTIDY_CONFIG_FILE), but before any of the files specified using \fB-config\fR.
|
||||
.SH "EXIT STATUS"
|
||||
.IP 0
|
||||
All input files were processed successfully.
|
||||
.IP 1
|
||||
There were warnings.
|
||||
.IP 2
|
||||
There were errors.
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="config-section">
|
||||
.SH ______________________________
|
||||
.SH " "
|
||||
.SH "DETAILED CONFIGURATION OPTIONS"
|
||||
This section describes the Detailed (i.e., "expanded") Options, which may be specified by preceding each option with \fB--\fR at the command line, followed by its desired value, OR by placing the options and values in a configuration file, and telling tidy to read that file with the \fB-config\fR standard option.
|
||||
.SH SYNOPSIS
|
||||
\fBtidy --\fR\fIoption1 \fRvalue1 \fB--\fIoption2 \fRvalue2 [standard options ...]
|
||||
.br
|
||||
\fBtidy -config \fIconfig-file \fR[standard options ...]
|
||||
.SH WARNING
|
||||
The options detailed here do not include the "standard" command-line options (i.e., those preceded by a single '\fB-\fR') described above in the first section of this man page.
|
||||
.SH DESCRIPTION
|
||||
A list of options for configuring the behavior of Tidy, which can be passed either on the command line, or specified in a configuration file.
|
||||
.LP
|
||||
A Tidy configuration file is simply a text file, where each option
|
||||
is listed on a separate line in the form
|
||||
.LP
|
||||
.in 1i
|
||||
\fBoption1\fR: \fIvalue1\fR
|
||||
.br
|
||||
\fBoption2\fR: \fIvalue2\fR
|
||||
.br
|
||||
etc.
|
||||
.LP
|
||||
The permissible values for a given option depend on the option's \fBType\fR. There are five types: \fIBoolean\fR, \fIAutoBool\fR, \fIDocType\fR, \fIEnum\fR, and \fIString\fR. Boolean types allow any of \fIyes/no, y/n, true/false, t/f, 1/0\fR. AutoBools allow \fIauto\fR in addition to the values allowed by Booleans. Integer types take non-negative integers. String types generally have no defaults, and you should provide them in non-quoted form (unless you wish the output to contain the literal quotes).
|
||||
.LP
|
||||
Enum, Encoding, and DocType "types" have a fixed repertoire of items; consult the \fIExample\fR[s] provided below for the option[s] in question.
|
||||
.LP
|
||||
You only need to provide options and values for those whose defaults you wish to override, although you may wish to include some already-defaulted options and values for the sake of documentation and explicitness.
|
||||
.LP
|
||||
Here is a sample config file, with at least one example of each of the five Types:
|
||||
.LP
|
||||
\fI
|
||||
// sample Tidy configuration options
|
||||
output-xhtml: yes
|
||||
add-xml-decl: no
|
||||
doctype: strict
|
||||
char-encoding: ascii
|
||||
indent: auto
|
||||
wrap: 76
|
||||
repeated-attributes: keep-last
|
||||
error-file: errs.txt
|
||||
\fR
|
||||
.LP
|
||||
Below is a summary and brief description of each of the options. They are listed alphabetically within each category. There are five categories: \fIHTML, XHTML, XML\fR options, \fIDiagnostics\fR options, \fIPretty Print\fR options, \fICharacter Encoding\fR options, and \fIMiscellaneous\fR options.
|
||||
.LP
|
||||
.SH OPTIONS
|
||||
<xsl:call-template name="show-config-options" />
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="show-cmdline-options">
|
||||
.SS File manipulation
|
||||
<xsl:call-template name="cmdline-detail">
|
||||
<xsl:with-param name="category">file-manip</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Processing directives
|
||||
<xsl:call-template name="cmdline-detail">
|
||||
<xsl:with-param name="category">process-directives</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Character encodings
|
||||
<xsl:call-template name="cmdline-detail">
|
||||
<xsl:with-param name="category">char-encoding</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Miscellaneous
|
||||
<xsl:call-template name="cmdline-detail">
|
||||
<xsl:with-param name="category">misc</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="cmdline-detail">
|
||||
<!--
|
||||
For each option in one of the 3 categories/classes, provide its
|
||||
1. names
|
||||
2. description
|
||||
3. equivalent configuration option
|
||||
-->
|
||||
<xsl:param name="category" />
|
||||
<xsl:for-each select='/cmdline/option[@class=$category]'>
|
||||
<xsl:text>
|
||||
.TP
|
||||
</xsl:text>
|
||||
<xsl:call-template name="process-names" />
|
||||
<xsl:text>
|
||||
</xsl:text>
|
||||
<xsl:apply-templates select="description" />
|
||||
<xsl:text>
|
||||
</xsl:text>
|
||||
<xsl:call-template name="process-eqconfig" />
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="process-names">
|
||||
<!-- Used only in the cmdline section -->
|
||||
<xsl:for-each select="name">
|
||||
<xsl:text />\fB<xsl:value-of select="." />\fR<xsl:text />
|
||||
<xsl:if test="position() != last()">
|
||||
<xsl:text>, </xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="process-eqconfig">
|
||||
<!-- Used only in the cmdline section -->
|
||||
<xsl:if test="string-length(eqconfig) > 0">
|
||||
<xsl:for-each select="eqconfig">
|
||||
<xsl:text>(\fI</xsl:text>
|
||||
<xsl:value-of select="." />
|
||||
<xsl:text>\fR)</xsl:text>
|
||||
</xsl:for-each>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="show-config-options">
|
||||
<!-- Used only in the cmdline section -->
|
||||
.SS HTML, XHTML, XML options:
|
||||
<xsl:call-template name="config-detail">
|
||||
<xsl:with-param name="category">markup</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Diagnostics options:
|
||||
<xsl:call-template name="config-detail">
|
||||
<xsl:with-param name="category">diagnostics</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Pretty Print options:
|
||||
<xsl:call-template name="config-detail">
|
||||
<xsl:with-param name="category">print</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Character Encoding options:
|
||||
<xsl:call-template name="config-detail">
|
||||
<xsl:with-param name="category">encoding</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
.SS Miscellaneous options:
|
||||
<xsl:call-template name="config-detail">
|
||||
<xsl:with-param name="category">misc</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!--
|
||||
Note that any templates called implicitly or explicitly
|
||||
from the "config-detail" template below will match on
|
||||
the document referred to by the $CONFIG variable, i.e.,
|
||||
the file "tidy-config.xml", created by running
|
||||
|
||||
tidy -xml-config > tidy-config.xml
|
||||
|
||||
The $CONFIG variable is set at the top level of this
|
||||
stylesheet.
|
||||
-->
|
||||
|
||||
<xsl:template name="config-detail">
|
||||
<!--
|
||||
For each option in one of the 5 categories/classes, provide its
|
||||
1. name
|
||||
2. type
|
||||
3. default (if any)
|
||||
4. example (if any)
|
||||
5. seealso (if any)
|
||||
6. description
|
||||
-->
|
||||
<xsl:param name="category" />
|
||||
<xsl:for-each select='$CONFIG/config/option[@class=$category]'>
|
||||
<xsl:sort select="name" order="ascending" />
|
||||
.TP
|
||||
\fB<xsl:apply-templates select="name" />\fR
|
||||
|
||||
Type: \fI<xsl:apply-templates select="type" />\fR
|
||||
.br
|
||||
<xsl:call-template name="provide-default" />
|
||||
.br
|
||||
<xsl:call-template name="provide-example" />
|
||||
<xsl:text>
|
||||
|
||||
</xsl:text>
|
||||
<xsl:apply-templates select="description" />
|
||||
<xsl:call-template name="seealso" />
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Used only in the config options section: -->
|
||||
<xsl:template name="seealso">
|
||||
<xsl:if test="seealso">
|
||||
<xsl:text>
|
||||
|
||||
</xsl:text>
|
||||
.rj 1
|
||||
\fBSee also\fR: <xsl:text />
|
||||
<xsl:for-each select="seealso">
|
||||
<xsl:text />\fI<xsl:value-of select="." />\fR<xsl:text />
|
||||
<xsl:if test="position() != last()">
|
||||
<xsl:text>, </xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Used only in the config options section: -->
|
||||
<xsl:template name="provide-default">
|
||||
<!--
|
||||
Picks up the default from the XML. If the `default` element
|
||||
doesn't exist, or it's empty, a single '-' is provided.
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="string-length(default) > 0 ">
|
||||
<xsl:text />Default: \fI<xsl:apply-templates
|
||||
select="default" />\fR<xsl:text />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:text />Default: \fI-\fR<xsl:text />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Used only in the config options section: -->
|
||||
<xsl:template name="provide-example">
|
||||
<!--
|
||||
By default, doesn't output examples for String types (mirroring the
|
||||
quickref page). But for *any* options in the XML instance that
|
||||
have an `example` child, that example will be used in lieu of a
|
||||
stylesheet-provided one. (Useful e.g. for `repeated-attributes`).
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="string-length(example) > 0">
|
||||
<xsl:text />Example: \fI<xsl:apply-templates
|
||||
select="example" />\fR<xsl:text />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:text />Default: \fI-\fR<xsl:text />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Called from the templates below matching `code`, `em`, `strong`: -->
|
||||
<xsl:template name="escape-backslash">
|
||||
<!--
|
||||
Since backslashes are "special" to the *roff processors used
|
||||
to generate man pages, we need to escape backslash characters
|
||||
appearing in content with another backslash.
|
||||
-->
|
||||
<xsl:choose>
|
||||
<xsl:when test="contains(.,'\')">
|
||||
<xsl:value-of select=
|
||||
"concat( substring-before(.,'\'), '\\', substring-after(.,'\') )" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:apply-templates />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Appears at the bottom of the man page: -->
|
||||
<xsl:template name="manpage-see-also-section">
|
||||
.SH "SEE ALSO"
|
||||
HTML Tidy Project Page at \fIhttp://tidy.sourceforge.net\fR
|
||||
.SH AUTHOR
|
||||
\fBTidy\fR was written by Dave Raggett <\fIdsr@w3.org\fR>, and is now maintained and developed by the Tidy team at \fIhttp://tidy.sourceforge.net/\fR. It is released under the \fIMIT Licence\fR.
|
||||
.LP
|
||||
Generated automatically with HTML Tidy released on <xsl:value-of select="cmdline/@version" />.
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Regular Templates: -->
|
||||
|
||||
|
||||
<xsl:template match="description">
|
||||
<xsl:apply-templates />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="a">
|
||||
<xsl:apply-templates />
|
||||
<xsl:text /> at \fI<xsl:value-of select="@href" />\fR<xsl:text />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="code | em">
|
||||
<xsl:text />\fI<xsl:call-template name="escape-backslash" />\fR<xsl:text />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="br">
|
||||
<xsl:text>
|
||||
.br
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="strong">
|
||||
<xsl:text />\fB<xsl:call-template name="escape-backslash" />\fR<xsl:text />
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!--
|
||||
The following templates
|
||||
a) normalize whitespace, primarily necessary for `description`
|
||||
b) do so without stripping possible whitespace surrounding `code`
|
||||
d) strip leading and trailing whitespace in 'description` and `code`
|
||||
(courtesy of Ken Holman on the XSL-list):
|
||||
-->
|
||||
|
||||
<xsl:template match="text()[preceding-sibling::node() and
|
||||
following-sibling::node()]">
|
||||
<xsl:variable name="ns" select="normalize-space(concat('x',.,'x'))"/>
|
||||
<xsl:value-of select="substring( $ns, 2, string-length($ns) - 2 )" />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()[preceding-sibling::node() and
|
||||
not( following-sibling::node() )]">
|
||||
<xsl:variable name="ns" select="normalize-space(concat('x',.))"/>
|
||||
<xsl:value-of select="substring( $ns, 2, string-length($ns) - 1 )" />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()[not( preceding-sibling::node() ) and
|
||||
following-sibling::node()]">
|
||||
<xsl:variable name="ns" select="normalize-space(concat(.,'x'))"/>
|
||||
<xsl:value-of select="substring( $ns, 1, string-length($ns) - 1 )" />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()[not( preceding-sibling::node() ) and
|
||||
not( following-sibling::node() )]">
|
||||
<xsl:value-of select="normalize-space(.)"/>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
118
include/buffio.h
Normal file
118
include/buffio.h
Normal file
|
@ -0,0 +1,118 @@
|
|||
#ifndef __TIDY_BUFFIO_H__
|
||||
#define __TIDY_BUFFIO_H__
|
||||
|
||||
/** @file buffio.h - Treat buffer as an I/O stream.
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/01/23 11:17:45 $
|
||||
$Revision: 1.9 $
|
||||
|
||||
Requires buffer to automatically grow as bytes are added.
|
||||
Must keep track of current read and write points.
|
||||
|
||||
*/
|
||||
|
||||
#include "platform.h"
|
||||
#include "tidy.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** TidyBuffer - A chunk of memory */
|
||||
TIDY_STRUCT
|
||||
struct _TidyBuffer
|
||||
{
|
||||
TidyAllocator* allocator; /**< Memory allocator */
|
||||
byte* bp; /**< Pointer to bytes */
|
||||
uint size; /**< # bytes currently in use */
|
||||
uint allocated; /**< # bytes allocated */
|
||||
uint next; /**< Offset of current input position */
|
||||
};
|
||||
|
||||
/** Initialize data structure using the default allocator */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufInit( TidyBuffer* buf );
|
||||
|
||||
/** Initialize data structure using the given custom allocator */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufInitWithAllocator( TidyBuffer* buf, TidyAllocator* allocator );
|
||||
|
||||
/** Free current buffer, allocate given amount, reset input pointer,
|
||||
use the default allocator */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufAlloc( TidyBuffer* buf, uint allocSize );
|
||||
|
||||
/** Free current buffer, allocate given amount, reset input pointer,
|
||||
use the given custom allocator */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufAllocWithAllocator( TidyBuffer* buf,
|
||||
TidyAllocator* allocator,
|
||||
uint allocSize );
|
||||
|
||||
/** Expand buffer to given size.
|
||||
** Chunk size is minimum growth. Pass 0 for default of 256 bytes.
|
||||
*/
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufCheckAlloc( TidyBuffer* buf,
|
||||
uint allocSize, uint chunkSize );
|
||||
|
||||
/** Free current contents and zero out */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufFree( TidyBuffer* buf );
|
||||
|
||||
/** Set buffer bytes to 0 */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufClear( TidyBuffer* buf );
|
||||
|
||||
/** Attach to existing buffer */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufAttach( TidyBuffer* buf, byte* bp, uint size );
|
||||
|
||||
/** Detach from buffer. Caller must free. */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufDetach( TidyBuffer* buf );
|
||||
|
||||
|
||||
/** Append bytes to buffer. Expand if necessary. */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufAppend( TidyBuffer* buf, void* vp, uint size );
|
||||
|
||||
/** Append one byte to buffer. Expand if necessary. */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufPutByte( TidyBuffer* buf, byte bv );
|
||||
|
||||
/** Get byte from end of buffer */
|
||||
TIDY_EXPORT int TIDY_CALL tidyBufPopByte( TidyBuffer* buf );
|
||||
|
||||
|
||||
/** Get byte from front of buffer. Increment input offset. */
|
||||
TIDY_EXPORT int TIDY_CALL tidyBufGetByte( TidyBuffer* buf );
|
||||
|
||||
/** At end of buffer? */
|
||||
TIDY_EXPORT Bool TIDY_CALL tidyBufEndOfInput( TidyBuffer* buf );
|
||||
|
||||
/** Put a byte back into the buffer. Decrement input offset. */
|
||||
TIDY_EXPORT void TIDY_CALL tidyBufUngetByte( TidyBuffer* buf, byte bv );
|
||||
|
||||
|
||||
/**************
|
||||
TIDY
|
||||
**************/
|
||||
|
||||
/* Forward declarations
|
||||
*/
|
||||
|
||||
/** Initialize a buffer input source */
|
||||
TIDY_EXPORT void TIDY_CALL tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf );
|
||||
|
||||
/** Initialize a buffer output sink */
|
||||
TIDY_EXPORT void TIDY_CALL tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* __TIDY_BUFFIO_H__ */
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
636
include/platform.h
Normal file
636
include/platform.h
Normal file
|
@ -0,0 +1,636 @@
|
|||
#ifndef __TIDY_PLATFORM_H__
|
||||
#define __TIDY_PLATFORM_H__
|
||||
|
||||
/* platform.h -- Platform specifics
|
||||
|
||||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2008/03/17 12:57:01 $
|
||||
$Revision: 1.66 $
|
||||
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
Uncomment and edit one of the following #defines if you
|
||||
want to specify the config file at compile-time.
|
||||
*/
|
||||
|
||||
/* #define TIDY_CONFIG_FILE "/etc/tidy_config.txt" */ /* original */
|
||||
/* #define TIDY_CONFIG_FILE "/etc/tidyrc" */
|
||||
/* #define TIDY_CONFIG_FILE "/etc/tidy.conf" */
|
||||
|
||||
/*
|
||||
Uncomment the following #define if you are on a system
|
||||
supporting the HOME environment variable.
|
||||
It enables tidy to find config files named ~/.tidyrc if
|
||||
the HTML_TIDY environment variable is not set.
|
||||
*/
|
||||
/* #define TIDY_USER_CONFIG_FILE "~/.tidyrc" */
|
||||
|
||||
/*
|
||||
Uncomment the following #define if your
|
||||
system supports the call getpwnam().
|
||||
E.g. Unix and Linux.
|
||||
|
||||
It enables tidy to find files named
|
||||
~your/foo for use in the HTML_TIDY environment
|
||||
variable or CONFIG_FILE or USER_CONFIGFILE or
|
||||
on the command line: -config ~joebob/tidy.cfg
|
||||
|
||||
Contributed by Todd Lewis.
|
||||
*/
|
||||
|
||||
/* #define SUPPORT_GETPWNAM */
|
||||
|
||||
|
||||
/* Enable/disable support for Big5 and Shift_JIS character encodings */
|
||||
#ifndef SUPPORT_ASIAN_ENCODINGS
|
||||
#define SUPPORT_ASIAN_ENCODINGS 1
|
||||
#endif
|
||||
|
||||
/* Enable/disable support for UTF-16 character encodings */
|
||||
#ifndef SUPPORT_UTF16_ENCODINGS
|
||||
#define SUPPORT_UTF16_ENCODINGS 1
|
||||
#endif
|
||||
|
||||
/* Enable/disable support for additional accessibility checks */
|
||||
#ifndef SUPPORT_ACCESSIBILITY_CHECKS
|
||||
#define SUPPORT_ACCESSIBILITY_CHECKS 1
|
||||
#endif
|
||||
|
||||
|
||||
/* Convenience defines for Mac platforms */
|
||||
|
||||
#if defined(macintosh)
|
||||
/* Mac OS 6.x/7.x/8.x/9.x, with or without CarbonLib - MPW or Metrowerks 68K/PPC compilers */
|
||||
#define MAC_OS_CLASSIC
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Mac OS"
|
||||
#endif
|
||||
|
||||
/* needed for access() */
|
||||
#if !defined(_POSIX) && !defined(NO_ACCESS_SUPPORT)
|
||||
#define NO_ACCESS_SUPPORT
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_GETPWNAM
|
||||
#undef SUPPORT_GETPWNAM
|
||||
#endif
|
||||
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
/* Mac OS X (client) 10.x (or server 1.x/10.x) - gcc or Metrowerks MachO compilers */
|
||||
#define MAC_OS_X
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Mac OS X"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MAC_OS_CLASSIC) || defined(MAC_OS_X)
|
||||
/* Any OS on Mac platform */
|
||||
#define MAC_OS
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#define strcasecmp strcmp
|
||||
#ifndef DFLT_REPL_CHARENC
|
||||
#define DFLT_REPL_CHARENC MACROMAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for BSD like platforms */
|
||||
|
||||
#if defined(__FreeBSD__)
|
||||
#define BSD_BASED_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "FreeBSD"
|
||||
#endif
|
||||
|
||||
#elif defined(__NetBSD__)
|
||||
#define BSD_BASED_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "NetBSD"
|
||||
#endif
|
||||
|
||||
#elif defined(__OpenBSD__)
|
||||
#define BSD_BASED_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "OpenBSD"
|
||||
#endif
|
||||
|
||||
#elif defined(__DragonFly__)
|
||||
#define BSD_BASED_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "DragonFly"
|
||||
#endif
|
||||
|
||||
#elif defined(__MINT__)
|
||||
#define BSD_BASED_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "FreeMiNT"
|
||||
#endif
|
||||
|
||||
#elif defined(__bsdi__)
|
||||
#define BSD_BASED_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "BSD/OS"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* Convenience defines for Windows platforms */
|
||||
|
||||
#if defined(WINDOWS) || defined(_WIN32)
|
||||
|
||||
#define WINDOWS_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Windows"
|
||||
#endif
|
||||
|
||||
#if defined(__MWERKS__) || defined(__MSL__)
|
||||
/* not available with Metrowerks Standard Library */
|
||||
|
||||
#ifdef SUPPORT_GETPWNAM
|
||||
#undef SUPPORT_GETPWNAM
|
||||
#endif
|
||||
|
||||
/* needed for setmode() */
|
||||
#if !defined(NO_SETMODE_SUPPORT)
|
||||
#define NO_SETMODE_SUPPORT
|
||||
#endif
|
||||
|
||||
#define strcasecmp _stricmp
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__BORLANDC__)
|
||||
#define strcasecmp stricmp
|
||||
#endif
|
||||
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#define SUPPORT_POSIX_MAPPED_FILES 0
|
||||
|
||||
#endif
|
||||
|
||||
/* Convenience defines for Linux platforms */
|
||||
|
||||
#if defined(linux) && defined(__alpha__)
|
||||
/* Linux on Alpha - gcc compiler */
|
||||
#define LINUX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Linux/Alpha"
|
||||
#endif
|
||||
|
||||
#elif defined(linux) && defined(__sparc__)
|
||||
/* Linux on Sparc - gcc compiler */
|
||||
#define LINUX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Linux/Sparc"
|
||||
#endif
|
||||
|
||||
#elif defined(linux) && (defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__))
|
||||
/* Linux on x86 - gcc compiler */
|
||||
#define LINUX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Linux/x86"
|
||||
#endif
|
||||
|
||||
#elif defined(linux) && defined(__powerpc__)
|
||||
/* Linux on PPC - gcc compiler */
|
||||
#define LINUX_OS
|
||||
|
||||
#if defined(__linux__) && defined(__powerpc__)
|
||||
|
||||
/* #if #system(linux) */
|
||||
/* MkLinux on PPC - gcc (egcs) compiler */
|
||||
/* #define MAC_OS_MKLINUX */
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "MkLinux"
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Linux/PPC"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#elif defined(linux) || defined(__linux__)
|
||||
/* generic Linux */
|
||||
#define LINUX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Linux"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/* Convenience defines for Solaris platforms */
|
||||
|
||||
#if defined(sun)
|
||||
#define SOLARIS_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Solaris"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for HPUX + gcc platforms */
|
||||
|
||||
#if defined(__hpux)
|
||||
#define HPUX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "HPUX"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for RISCOS + gcc platforms */
|
||||
|
||||
#if defined(__riscos__)
|
||||
#define RISC_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "RISC OS"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for OS/2 + icc/gcc platforms */
|
||||
|
||||
#if defined(__OS2__) || defined(__EMX__)
|
||||
#define OS2_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "OS/2"
|
||||
#endif
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#define strcasecmp stricmp
|
||||
#endif
|
||||
|
||||
/* Convenience defines for IRIX */
|
||||
|
||||
#if defined(__sgi)
|
||||
#define IRIX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "SGI IRIX"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for AIX */
|
||||
|
||||
#if defined(_AIX)
|
||||
#define AIX_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "IBM AIX"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Convenience defines for BeOS platforms */
|
||||
|
||||
#if defined(__BEOS__)
|
||||
#define BE_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "BeOS"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for Cygwin platforms */
|
||||
|
||||
#if defined(__CYGWIN__)
|
||||
#define CYGWIN_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Cygwin"
|
||||
#endif
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#endif
|
||||
|
||||
/* Convenience defines for OpenVMS */
|
||||
|
||||
#if defined(__VMS)
|
||||
#define OPENVMS_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "OpenVMS"
|
||||
#endif
|
||||
#define FILENAMES_CASE_SENSITIVE 0
|
||||
#endif
|
||||
|
||||
/* Convenience defines for DEC Alpha OSF + gcc platforms */
|
||||
|
||||
#if defined(__osf__)
|
||||
#define OSF_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "DEC Alpha OSF"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Convenience defines for ARM platforms */
|
||||
|
||||
#if defined(__arm)
|
||||
#define ARM_OS
|
||||
|
||||
#if defined(forARM) && defined(__NEWTON_H)
|
||||
|
||||
/* Using Newton C++ Tools ARMCpp compiler */
|
||||
#define NEWTON_OS
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "Newton"
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifndef PLATFORM_NAME
|
||||
#define PLATFORM_NAME "ARM"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <setjmp.h> /* for longjmp on error exit */
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h> /* may need <varargs.h> for Unix V */
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef NEEDS_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_GETPWNAM
|
||||
#include <pwd.h>
|
||||
#endif
|
||||
|
||||
#ifdef NEEDS_UNISTD_H
|
||||
#include <unistd.h> /* needed for unlink on some Unix systems */
|
||||
#endif
|
||||
|
||||
/* This can be set at compile time. Usually Windows,
|
||||
** except for Macintosh builds.
|
||||
*/
|
||||
#ifndef DFLT_REPL_CHARENC
|
||||
#define DFLT_REPL_CHARENC WIN1252
|
||||
#endif
|
||||
|
||||
/* By default, use case-sensitive filename comparison.
|
||||
*/
|
||||
#ifndef FILENAMES_CASE_SENSITIVE
|
||||
#define FILENAMES_CASE_SENSITIVE 1
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
Tidy preserves the last modified time for the files it
|
||||
cleans up.
|
||||
*/
|
||||
|
||||
/*
|
||||
If your platform doesn't support <utime.h> and the
|
||||
utime() function, or <sys/futime> and the futime()
|
||||
function then set PRESERVE_FILE_TIMES to 0.
|
||||
|
||||
If your platform doesn't support <sys/utime.h> and the
|
||||
futime() function, then set HAS_FUTIME to 0.
|
||||
|
||||
If your platform supports <utime.h> and the
|
||||
utime() function requires the file to be
|
||||
closed first, then set UTIME_NEEDS_CLOSED_FILE to 1.
|
||||
*/
|
||||
|
||||
/* Keep old PRESERVEFILETIMES define for compatibility */
|
||||
#ifdef PRESERVEFILETIMES
|
||||
#undef PRESERVE_FILE_TIMES
|
||||
#define PRESERVE_FILE_TIMES PRESERVEFILETIMES
|
||||
#endif
|
||||
|
||||
#ifndef PRESERVE_FILE_TIMES
|
||||
#if defined(RISC_OS) || defined(OPENVMS_OS) || defined(OSF_OS)
|
||||
#define PRESERVE_FILE_TIMES 0
|
||||
#else
|
||||
#define PRESERVE_FILE_TIMES 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if PRESERVE_FILE_TIMES
|
||||
|
||||
#ifndef HAS_FUTIME
|
||||
#if defined(CYGWIN_OS) || defined(BE_OS) || defined(OS2_OS) || defined(HPUX_OS) || defined(SOLARIS_OS) || defined(LINUX_OS) || defined(BSD_BASED_OS) || defined(MAC_OS) || defined(__MSL__) || defined(IRIX_OS) || defined(AIX_OS) || defined(__BORLANDC__)
|
||||
#define HAS_FUTIME 0
|
||||
#else
|
||||
#define HAS_FUTIME 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UTIME_NEEDS_CLOSED_FILE
|
||||
#if defined(SOLARIS_OS) || defined(BSD_BASED_OS) || defined(MAC_OS) || defined(__MSL__) || defined(LINUX_OS)
|
||||
#define UTIME_NEEDS_CLOSED_FILE 1
|
||||
#else
|
||||
#define UTIME_NEEDS_CLOSED_FILE 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MAC_OS_X) || (!defined(MAC_OS_CLASSIC) && !defined(__MSL__))
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#else
|
||||
#include <stat.h>
|
||||
#endif
|
||||
|
||||
#if HAS_FUTIME
|
||||
#include <sys/utime.h>
|
||||
#else
|
||||
#include <utime.h>
|
||||
#endif /* HASFUTIME */
|
||||
|
||||
/*
|
||||
MS Windows needs _ prefix for Unix file functions.
|
||||
Not required by Metrowerks Standard Library (MSL).
|
||||
|
||||
Tidy uses following for preserving the last modified time.
|
||||
|
||||
WINDOWS automatically set by Win16 compilers.
|
||||
_WIN32 automatically set by Win32 compilers.
|
||||
*/
|
||||
#if defined(_WIN32) && !defined(__MSL__) && !defined(__BORLANDC__)
|
||||
|
||||
#define futime _futime
|
||||
#define fstat _fstat
|
||||
#define utimbuf _utimbuf /* Windows seems to want utimbuf */
|
||||
#define stat _stat
|
||||
#define utime _utime
|
||||
#define vsnprintf _vsnprintf
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#endif /* PRESERVE_FILE_TIMES */
|
||||
|
||||
/*
|
||||
MS Windows needs _ prefix for Unix file functions.
|
||||
Not required by Metrowerks Standard Library (MSL).
|
||||
|
||||
WINDOWS automatically set by Win16 compilers.
|
||||
_WIN32 automatically set by Win32 compilers.
|
||||
*/
|
||||
#if defined(_WIN32) && !defined(__MSL__) && !defined(__BORLANDC__)
|
||||
|
||||
#ifndef __WATCOMC__
|
||||
#define fileno _fileno
|
||||
#define setmode _setmode
|
||||
#endif
|
||||
|
||||
#define access _access
|
||||
#define strcasecmp _stricmp
|
||||
|
||||
#if _MSC_VER > 1000
|
||||
#pragma warning( disable : 4189 ) /* local variable is initialized but not referenced */
|
||||
#pragma warning( disable : 4100 ) /* unreferenced formal parameter */
|
||||
#pragma warning( disable : 4706 ) /* assignment within conditional expression */
|
||||
#endif
|
||||
|
||||
#if _MSC_VER > 1300
|
||||
#pragma warning( disable : 4996 ) /* disable depreciation warning */
|
||||
#endif
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
#if (defined(_USRDLL) || defined(_WINDLL)) && !defined(TIDY_EXPORT)
|
||||
#define TIDY_EXPORT __declspec( dllexport )
|
||||
#endif
|
||||
|
||||
#ifndef TIDY_CALL
|
||||
#ifdef _WIN64
|
||||
# define TIDY_CALL __fastcall
|
||||
#else
|
||||
# define TIDY_CALL __stdcall
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
/* hack for gnu sys/types.h file which defines uint and ulong */
|
||||
|
||||
#if defined(BE_OS) || defined(SOLARIS_OS) || defined(BSD_BASED_OS) || defined(OSF_OS) || defined(IRIX_OS) || defined(AIX_OS)
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#if !defined(HPUX_OS) && !defined(CYGWIN_OS) && !defined(MAC_OS_X) && !defined(BE_OS) && !defined(SOLARIS_OS) && !defined(BSD_BASED_OS) && !defined(OSF_OS) && !defined(IRIX_OS) && !defined(AIX_OS) && !defined(LINUX_OS)
|
||||
# undef uint
|
||||
typedef unsigned int uint;
|
||||
#endif
|
||||
#if defined(HPUX_OS) || defined(CYGWIN_OS) || defined(MAC_OS) || defined(BSD_BASED_OS) || defined(_WIN32)
|
||||
# undef ulong
|
||||
typedef unsigned long ulong;
|
||||
#endif
|
||||
|
||||
/*
|
||||
With GCC 4, __attribute__ ((visibility("default"))) can be used along compiling with tidylib
|
||||
with "-fvisibility=hidden". See http://gcc.gnu.org/wiki/Visibility and build/gmake/Makefile.
|
||||
*/
|
||||
/*
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
#define TIDY_EXPORT __attribute__ ((visibility("default")))
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef TIDY_EXPORT /* Define it away for most builds */
|
||||
#define TIDY_EXPORT
|
||||
#endif
|
||||
|
||||
#ifndef TIDY_STRUCT
|
||||
#define TIDY_STRUCT
|
||||
#endif
|
||||
|
||||
typedef unsigned char byte;
|
||||
|
||||
typedef uint tchar; /* single, full character */
|
||||
typedef char tmbchar; /* single, possibly partial character */
|
||||
#ifndef TMBSTR_DEFINED
|
||||
typedef tmbchar* tmbstr; /* pointer to buffer of possibly partial chars */
|
||||
typedef const tmbchar* ctmbstr; /* Ditto, but const */
|
||||
#define NULLSTR (tmbstr)""
|
||||
#define TMBSTR_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef TIDY_CALL
|
||||
#define TIDY_CALL
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
# define ARG_UNUSED(x) x __attribute__((unused))
|
||||
#else
|
||||
# define ARG_UNUSED(x) x
|
||||
#endif
|
||||
|
||||
/* HAS_VSNPRINTF triggers the use of "vsnprintf", which is safe related to
|
||||
buffer overflow. Therefore, we make it the default unless HAS_VSNPRINTF
|
||||
has been defined. */
|
||||
#ifndef HAS_VSNPRINTF
|
||||
# define HAS_VSNPRINTF 1
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_POSIX_MAPPED_FILES
|
||||
# define SUPPORT_POSIX_MAPPED_FILES 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
bool is a reserved word in some but
|
||||
not all C++ compilers depending on age
|
||||
work around is to avoid bool altogether
|
||||
by introducing a new enum called Bool
|
||||
*/
|
||||
/* We could use the C99 definition where supported
|
||||
typedef _Bool Bool;
|
||||
#define no (_Bool)0
|
||||
#define yes (_Bool)1
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
no,
|
||||
yes
|
||||
} Bool;
|
||||
|
||||
/* for NULL pointers
|
||||
#define null ((const void*)0)
|
||||
extern void* null;
|
||||
*/
|
||||
|
||||
#if defined(DMALLOC)
|
||||
#include "dmalloc.h"
|
||||
#endif
|
||||
|
||||
/* Opaque data structure.
|
||||
* Cast to implementation type struct within lib.
|
||||
* This will reduce inter-dependencies/conflicts w/ application code.
|
||||
*/
|
||||
#if 1
|
||||
#define opaque_type( typenam )\
|
||||
struct _##typenam { int _opaque; };\
|
||||
typedef struct _##typenam const * typenam
|
||||
#else
|
||||
#define opaque_type(typenam) typedef const void* typenam
|
||||
#endif
|
||||
|
||||
/* Opaque data structure used to pass back
|
||||
** and forth to keep current position in a
|
||||
** list or other collection.
|
||||
*/
|
||||
opaque_type( TidyIterator );
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* __TIDY_PLATFORM_H__ */
|
||||
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
1097
include/tidy.h
Normal file
1097
include/tidy.h
Normal file
File diff suppressed because it is too large
Load diff
747
include/tidyenum.h
Normal file
747
include/tidyenum.h
Normal file
|
@ -0,0 +1,747 @@
|
|||
#ifndef __TIDYENUM_H__
|
||||
#define __TIDYENUM_H__
|
||||
|
||||
/* @file tidyenum.h -- Split public enums into separate header
|
||||
|
||||
Simplifies enum re-use in various wrappers. e.g. SWIG
|
||||
generated wrappers and COM IDL files.
|
||||
|
||||
Copyright (c) 1998-2008 World Wide Web Consortium
|
||||
(Massachusetts Institute of Technology, European Research
|
||||
Consortium for Informatics and Mathematics, Keio University).
|
||||
All Rights Reserved.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2008/06/18 20:18:54 $
|
||||
$Revision: 1.18 $
|
||||
|
||||
Contributing Author(s):
|
||||
|
||||
Dave Raggett <dsr@w3.org>
|
||||
|
||||
The contributing author(s) would like to thank all those who
|
||||
helped with testing, bug fixes and suggestions for improvements.
|
||||
This wouldn't have been possible without your help.
|
||||
|
||||
COPYRIGHT NOTICE:
|
||||
|
||||
This software and documentation is provided "as is," and
|
||||
the copyright holders and contributing author(s) make no
|
||||
representations or warranties, express or implied, including
|
||||
but not limited to, warranties of merchantability or fitness
|
||||
for any particular purpose or that the use of the software or
|
||||
documentation will not infringe any third party patents,
|
||||
copyrights, trademarks or other rights.
|
||||
|
||||
The copyright holders and contributing author(s) will not be held
|
||||
liable for any direct, indirect, special or consequential damages
|
||||
arising out of any use of the software or documentation, even if
|
||||
advised of the possibility of such damage.
|
||||
|
||||
Permission is hereby granted to use, copy, modify, and distribute
|
||||
this source code, or portions hereof, documentation and executables,
|
||||
for any purpose, without fee, subject to the following restrictions:
|
||||
|
||||
1. The origin of this source code must not be misrepresented.
|
||||
2. Altered versions must be plainly marked as such and must
|
||||
not be misrepresented as being the original source.
|
||||
3. This Copyright notice may not be removed or altered from any
|
||||
source or altered source distribution.
|
||||
|
||||
The copyright holders and contributing author(s) specifically
|
||||
permit, without fee, and encourage the use of this source code
|
||||
as a component for supporting the Hypertext Markup Language in
|
||||
commercial products. If you use this source code in a product,
|
||||
acknowledgment is not required but would be appreciated.
|
||||
|
||||
|
||||
Created 2001-05-20 by Charles Reitzel
|
||||
Updated 2002-07-01 by Charles Reitzel - 1st Implementation
|
||||
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Enumerate configuration options
|
||||
*/
|
||||
|
||||
/** Categories of Tidy configuration options
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyMarkup, /**< Markup options: (X)HTML version, etc */
|
||||
TidyDiagnostics, /**< Diagnostics */
|
||||
TidyPrettyPrint, /**< Output layout */
|
||||
TidyEncoding, /**< Character encodings */
|
||||
TidyMiscellaneous /**< File handling, message format, etc. */
|
||||
} TidyConfigCategory;
|
||||
|
||||
|
||||
/** Option IDs Used to get/set option values.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyUnknownOption, /**< Unknown option! */
|
||||
TidyIndentSpaces, /**< Indentation n spaces */
|
||||
TidyWrapLen, /**< Wrap margin */
|
||||
TidyTabSize, /**< Expand tabs to n spaces */
|
||||
|
||||
TidyCharEncoding, /**< In/out character encoding */
|
||||
TidyInCharEncoding, /**< Input character encoding (if different) */
|
||||
TidyOutCharEncoding, /**< Output character encoding (if different) */
|
||||
TidyNewline, /**< Output line ending (default to platform) */
|
||||
|
||||
TidyDoctypeMode, /**< See doctype property */
|
||||
TidyDoctype, /**< User specified doctype */
|
||||
|
||||
TidyDuplicateAttrs, /**< Keep first or last duplicate attribute */
|
||||
TidyAltText, /**< Default text for alt attribute */
|
||||
|
||||
/* obsolete */
|
||||
TidySlideStyle, /**< Style sheet for slides: not used for anything yet */
|
||||
|
||||
TidyErrFile, /**< File name to write errors to */
|
||||
TidyOutFile, /**< File name to write markup to */
|
||||
TidyWriteBack, /**< If true then output tidied markup */
|
||||
TidyShowMarkup, /**< If false, normal output is suppressed */
|
||||
TidyShowWarnings, /**< However errors are always shown */
|
||||
TidyQuiet, /**< No 'Parsing X', guessed DTD or summary */
|
||||
TidyIndentContent, /**< Indent content of appropriate tags */
|
||||
/**< "auto" does text/block level content indentation */
|
||||
TidyHideEndTags, /**< Suppress optional end tags */
|
||||
TidyXmlTags, /**< Treat input as XML */
|
||||
TidyXmlOut, /**< Create output as XML */
|
||||
TidyXhtmlOut, /**< Output extensible HTML */
|
||||
TidyHtmlOut, /**< Output plain HTML, even for XHTML input.
|
||||
Yes means set explicitly. */
|
||||
TidyXmlDecl, /**< Add <?xml?> for XML docs */
|
||||
TidyUpperCaseTags, /**< Output tags in upper not lower case */
|
||||
TidyUpperCaseAttrs, /**< Output attributes in upper not lower case */
|
||||
TidyMakeBare, /**< Make bare HTML: remove Microsoft cruft */
|
||||
TidyMakeClean, /**< Replace presentational clutter by style rules */
|
||||
TidyLogicalEmphasis, /**< Replace i by em and b by strong */
|
||||
TidyDropPropAttrs, /**< Discard proprietary attributes */
|
||||
TidyDropFontTags, /**< Discard presentation tags */
|
||||
TidyDropEmptyParas, /**< Discard empty p elements */
|
||||
TidyFixComments, /**< Fix comments with adjacent hyphens */
|
||||
TidyBreakBeforeBR, /**< Output newline before <br> or not? */
|
||||
|
||||
/* obsolete */
|
||||
TidyBurstSlides, /**< Create slides on each h2 element */
|
||||
|
||||
TidyNumEntities, /**< Use numeric entities */
|
||||
TidyQuoteMarks, /**< Output " marks as " */
|
||||
TidyQuoteNbsp, /**< Output non-breaking space as entity */
|
||||
TidyQuoteAmpersand, /**< Output naked ampersand as & */
|
||||
TidyWrapAttVals, /**< Wrap within attribute values */
|
||||
TidyWrapScriptlets, /**< Wrap within JavaScript string literals */
|
||||
TidyWrapSection, /**< Wrap within <![ ... ]> section tags */
|
||||
TidyWrapAsp, /**< Wrap within ASP pseudo elements */
|
||||
TidyWrapJste, /**< Wrap within JSTE pseudo elements */
|
||||
TidyWrapPhp, /**< Wrap within PHP pseudo elements */
|
||||
TidyFixBackslash, /**< Fix URLs by replacing \ with / */
|
||||
TidyIndentAttributes,/**< Newline+indent before each attribute */
|
||||
TidyXmlPIs, /**< If set to yes PIs must end with ?> */
|
||||
TidyXmlSpace, /**< If set to yes adds xml:space attr as needed */
|
||||
TidyEncloseBodyText, /**< If yes text at body is wrapped in P's */
|
||||
TidyEncloseBlockText,/**< If yes text in blocks is wrapped in P's */
|
||||
TidyKeepFileTimes, /**< If yes last modied time is preserved */
|
||||
TidyWord2000, /**< Draconian cleaning for Word2000 */
|
||||
TidyMark, /**< Add meta element indicating tidied doc */
|
||||
TidyEmacs, /**< If true format error output for GNU Emacs */
|
||||
TidyEmacsFile, /**< Name of current Emacs file */
|
||||
TidyLiteralAttribs, /**< If true attributes may use newlines */
|
||||
TidyBodyOnly, /**< Output BODY content only */
|
||||
TidyFixUri, /**< Applies URI encoding if necessary */
|
||||
TidyLowerLiterals, /**< Folds known attribute values to lower case */
|
||||
TidyHideComments, /**< Hides all (real) comments in output */
|
||||
TidyIndentCdata, /**< Indent <!CDATA[ ... ]]> section */
|
||||
TidyForceOutput, /**< Output document even if errors were found */
|
||||
TidyShowErrors, /**< Number of errors to put out */
|
||||
TidyAsciiChars, /**< Convert quotes and dashes to nearest ASCII char */
|
||||
TidyJoinClasses, /**< Join multiple class attributes */
|
||||
TidyJoinStyles, /**< Join multiple style attributes */
|
||||
TidyEscapeCdata, /**< Replace <![CDATA[]]> sections with escaped text */
|
||||
|
||||
#if SUPPORT_ASIAN_ENCODINGS
|
||||
TidyLanguage, /**< Language property: not used for anything yet */
|
||||
TidyNCR, /**< Allow numeric character references */
|
||||
#else
|
||||
TidyLanguageNotUsed,
|
||||
TidyNCRNotUsed,
|
||||
#endif
|
||||
#if SUPPORT_UTF16_ENCODINGS
|
||||
TidyOutputBOM, /**< Output a Byte Order Mark (BOM) for UTF-16 encodings */
|
||||
/**< auto: if input stream has BOM, we output a BOM */
|
||||
#else
|
||||
TidyOutputBOMNotUsed,
|
||||
#endif
|
||||
|
||||
TidyReplaceColor, /**< Replace hex color attribute values with names */
|
||||
TidyCSSPrefix, /**< CSS class naming for -clean option */
|
||||
|
||||
TidyInlineTags, /**< Declared inline tags */
|
||||
TidyBlockTags, /**< Declared block tags */
|
||||
TidyEmptyTags, /**< Declared empty tags */
|
||||
TidyPreTags, /**< Declared pre tags */
|
||||
|
||||
TidyAccessibilityCheckLevel, /**< Accessibility check level
|
||||
0 (old style), or 1, 2, 3 */
|
||||
|
||||
TidyVertSpace, /**< degree to which markup is spread out vertically */
|
||||
#if SUPPORT_ASIAN_ENCODINGS
|
||||
TidyPunctWrap, /**< consider punctuation and breaking spaces for wrapping */
|
||||
#else
|
||||
TidyPunctWrapNotUsed,
|
||||
#endif
|
||||
TidyMergeDivs, /**< Merge multiple DIVs */
|
||||
TidyDecorateInferredUL, /**< Mark inferred UL elements with no indent CSS */
|
||||
TidyPreserveEntities, /**< Preserve entities */
|
||||
TidySortAttributes, /**< Sort attributes */
|
||||
TidyMergeSpans, /**< Merge multiple SPANs */
|
||||
TidyAnchorAsName, /**< Define anchors as name attributes */
|
||||
N_TIDY_OPTIONS /**< Must be last */
|
||||
} TidyOptionId;
|
||||
|
||||
/** Option data types
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyString, /**< String */
|
||||
TidyInteger, /**< Integer or enumeration */
|
||||
TidyBoolean /**< Boolean flag */
|
||||
} TidyOptionType;
|
||||
|
||||
|
||||
/** AutoBool values used by ParseBool, ParseTriState, ParseIndent, ParseBOM
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyNoState, /**< maps to 'no' */
|
||||
TidyYesState, /**< maps to 'yes' */
|
||||
TidyAutoState /**< Automatic */
|
||||
} TidyTriState;
|
||||
|
||||
/** TidyNewline option values to control output line endings.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyLF, /**< Use Unix style: LF */
|
||||
TidyCRLF, /**< Use DOS/Windows style: CR+LF */
|
||||
TidyCR /**< Use Macintosh style: CR */
|
||||
} TidyLineEnding;
|
||||
|
||||
|
||||
/** Mode controlling treatment of doctype
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyDoctypeOmit, /**< Omit DOCTYPE altogether */
|
||||
TidyDoctypeAuto, /**< Keep DOCTYPE in input. Set version to content */
|
||||
TidyDoctypeStrict, /**< Convert document to HTML 4 strict content model */
|
||||
TidyDoctypeLoose, /**< Convert document to HTML 4 transitional
|
||||
content model */
|
||||
TidyDoctypeUser /**< Set DOCTYPE FPI explicitly */
|
||||
} TidyDoctypeModes;
|
||||
|
||||
/** Mode controlling treatment of duplicate Attributes
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyKeepFirst,
|
||||
TidyKeepLast
|
||||
} TidyDupAttrModes;
|
||||
|
||||
/** Mode controlling treatment of sorting attributes
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidySortAttrNone,
|
||||
TidySortAttrAlpha
|
||||
} TidyAttrSortStrategy;
|
||||
|
||||
/* I/O and Message handling interface
|
||||
**
|
||||
** By default, Tidy will define, create and use
|
||||
** instances of input and output handlers for
|
||||
** standard C buffered I/O (i.e. FILE* stdin,
|
||||
** FILE* stdout and FILE* stderr for content
|
||||
** input, content output and diagnostic output,
|
||||
** respectively. A FILE* cfgFile input handler
|
||||
** will be used for config files. Command line
|
||||
** options will just be set directly.
|
||||
*/
|
||||
|
||||
/** Message severity level
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyInfo, /**< Information about markup usage */
|
||||
TidyWarning, /**< Warning message */
|
||||
TidyConfig, /**< Configuration error */
|
||||
TidyAccess, /**< Accessibility message */
|
||||
TidyError, /**< Error message - output suppressed */
|
||||
TidyBadDocument, /**< I/O or file system error */
|
||||
TidyFatal /**< Crash! */
|
||||
} TidyReportLevel;
|
||||
|
||||
|
||||
/* Document tree traversal functions
|
||||
*/
|
||||
|
||||
/** Node types
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyNode_Root, /**< Root */
|
||||
TidyNode_DocType, /**< DOCTYPE */
|
||||
TidyNode_Comment, /**< Comment */
|
||||
TidyNode_ProcIns, /**< Processing Instruction */
|
||||
TidyNode_Text, /**< Text */
|
||||
TidyNode_Start, /**< Start Tag */
|
||||
TidyNode_End, /**< End Tag */
|
||||
TidyNode_StartEnd, /**< Start/End (empty) Tag */
|
||||
TidyNode_CDATA, /**< Unparsed Text */
|
||||
TidyNode_Section, /**< XML Section */
|
||||
TidyNode_Asp, /**< ASP Source */
|
||||
TidyNode_Jste, /**< JSTE Source */
|
||||
TidyNode_Php, /**< PHP Source */
|
||||
TidyNode_XmlDecl /**< XML Declaration */
|
||||
} TidyNodeType;
|
||||
|
||||
|
||||
/** Known HTML element types
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyTag_UNKNOWN, /**< Unknown tag! */
|
||||
TidyTag_A, /**< A */
|
||||
TidyTag_ABBR, /**< ABBR */
|
||||
TidyTag_ACRONYM, /**< ACRONYM */
|
||||
TidyTag_ADDRESS, /**< ADDRESS */
|
||||
TidyTag_ALIGN, /**< ALIGN */
|
||||
TidyTag_APPLET, /**< APPLET */
|
||||
TidyTag_AREA, /**< AREA */
|
||||
TidyTag_B, /**< B */
|
||||
TidyTag_BASE, /**< BASE */
|
||||
TidyTag_BASEFONT, /**< BASEFONT */
|
||||
TidyTag_BDO, /**< BDO */
|
||||
TidyTag_BGSOUND, /**< BGSOUND */
|
||||
TidyTag_BIG, /**< BIG */
|
||||
TidyTag_BLINK, /**< BLINK */
|
||||
TidyTag_BLOCKQUOTE, /**< BLOCKQUOTE */
|
||||
TidyTag_BODY, /**< BODY */
|
||||
TidyTag_BR, /**< BR */
|
||||
TidyTag_BUTTON, /**< BUTTON */
|
||||
TidyTag_CAPTION, /**< CAPTION */
|
||||
TidyTag_CENTER, /**< CENTER */
|
||||
TidyTag_CITE, /**< CITE */
|
||||
TidyTag_CODE, /**< CODE */
|
||||
TidyTag_COL, /**< COL */
|
||||
TidyTag_COLGROUP, /**< COLGROUP */
|
||||
TidyTag_COMMENT, /**< COMMENT */
|
||||
TidyTag_DD, /**< DD */
|
||||
TidyTag_DEL, /**< DEL */
|
||||
TidyTag_DFN, /**< DFN */
|
||||
TidyTag_DIR, /**< DIR */
|
||||
TidyTag_DIV, /**< DIF */
|
||||
TidyTag_DL, /**< DL */
|
||||
TidyTag_DT, /**< DT */
|
||||
TidyTag_EM, /**< EM */
|
||||
TidyTag_EMBED, /**< EMBED */
|
||||
TidyTag_FIELDSET, /**< FIELDSET */
|
||||
TidyTag_FONT, /**< FONT */
|
||||
TidyTag_FORM, /**< FORM */
|
||||
TidyTag_FRAME, /**< FRAME */
|
||||
TidyTag_FRAMESET, /**< FRAMESET */
|
||||
TidyTag_H1, /**< H1 */
|
||||
TidyTag_H2, /**< H2 */
|
||||
TidyTag_H3, /**< H3 */
|
||||
TidyTag_H4, /**< H4 */
|
||||
TidyTag_H5, /**< H5 */
|
||||
TidyTag_H6, /**< H6 */
|
||||
TidyTag_HEAD, /**< HEAD */
|
||||
TidyTag_HR, /**< HR */
|
||||
TidyTag_HTML, /**< HTML */
|
||||
TidyTag_I, /**< I */
|
||||
TidyTag_IFRAME, /**< IFRAME */
|
||||
TidyTag_ILAYER, /**< ILAYER */
|
||||
TidyTag_IMG, /**< IMG */
|
||||
TidyTag_INPUT, /**< INPUT */
|
||||
TidyTag_INS, /**< INS */
|
||||
TidyTag_ISINDEX, /**< ISINDEX */
|
||||
TidyTag_KBD, /**< KBD */
|
||||
TidyTag_KEYGEN, /**< KEYGEN */
|
||||
TidyTag_LABEL, /**< LABEL */
|
||||
TidyTag_LAYER, /**< LAYER */
|
||||
TidyTag_LEGEND, /**< LEGEND */
|
||||
TidyTag_LI, /**< LI */
|
||||
TidyTag_LINK, /**< LINK */
|
||||
TidyTag_LISTING, /**< LISTING */
|
||||
TidyTag_MAP, /**< MAP */
|
||||
TidyTag_MARQUEE, /**< MARQUEE */
|
||||
TidyTag_MENU, /**< MENU */
|
||||
TidyTag_META, /**< META */
|
||||
TidyTag_MULTICOL, /**< MULTICOL */
|
||||
TidyTag_NOBR, /**< NOBR */
|
||||
TidyTag_NOEMBED, /**< NOEMBED */
|
||||
TidyTag_NOFRAMES, /**< NOFRAMES */
|
||||
TidyTag_NOLAYER, /**< NOLAYER */
|
||||
TidyTag_NOSAVE, /**< NOSAVE */
|
||||
TidyTag_NOSCRIPT, /**< NOSCRIPT */
|
||||
TidyTag_OBJECT, /**< OBJECT */
|
||||
TidyTag_OL, /**< OL */
|
||||
TidyTag_OPTGROUP, /**< OPTGROUP */
|
||||
TidyTag_OPTION, /**< OPTION */
|
||||
TidyTag_P, /**< P */
|
||||
TidyTag_PARAM, /**< PARAM */
|
||||
TidyTag_PLAINTEXT,/**< PLAINTEXT */
|
||||
TidyTag_PRE, /**< PRE */
|
||||
TidyTag_Q, /**< Q */
|
||||
TidyTag_RB, /**< RB */
|
||||
TidyTag_RBC, /**< RBC */
|
||||
TidyTag_RP, /**< RP */
|
||||
TidyTag_RT, /**< RT */
|
||||
TidyTag_RTC, /**< RTC */
|
||||
TidyTag_RUBY, /**< RUBY */
|
||||
TidyTag_S, /**< S */
|
||||
TidyTag_SAMP, /**< SAMP */
|
||||
TidyTag_SCRIPT, /**< SCRIPT */
|
||||
TidyTag_SELECT, /**< SELECT */
|
||||
TidyTag_SERVER, /**< SERVER */
|
||||
TidyTag_SERVLET, /**< SERVLET */
|
||||
TidyTag_SMALL, /**< SMALL */
|
||||
TidyTag_SPACER, /**< SPACER */
|
||||
TidyTag_SPAN, /**< SPAN */
|
||||
TidyTag_STRIKE, /**< STRIKE */
|
||||
TidyTag_STRONG, /**< STRONG */
|
||||
TidyTag_STYLE, /**< STYLE */
|
||||
TidyTag_SUB, /**< SUB */
|
||||
TidyTag_SUP, /**< SUP */
|
||||
TidyTag_TABLE, /**< TABLE */
|
||||
TidyTag_TBODY, /**< TBODY */
|
||||
TidyTag_TD, /**< TD */
|
||||
TidyTag_TEXTAREA, /**< TEXTAREA */
|
||||
TidyTag_TFOOT, /**< TFOOT */
|
||||
TidyTag_TH, /**< TH */
|
||||
TidyTag_THEAD, /**< THEAD */
|
||||
TidyTag_TITLE, /**< TITLE */
|
||||
TidyTag_TR, /**< TR */
|
||||
TidyTag_TT, /**< TT */
|
||||
TidyTag_U, /**< U */
|
||||
TidyTag_UL, /**< UL */
|
||||
TidyTag_VAR, /**< VAR */
|
||||
TidyTag_WBR, /**< WBR */
|
||||
TidyTag_XMP, /**< XMP */
|
||||
TidyTag_NEXTID, /**< NEXTID */
|
||||
|
||||
TidyTag_ARTICLE,
|
||||
TidyTag_ASIDE,
|
||||
TidyTag_AUDIO,
|
||||
TidyTag_CANVAS,
|
||||
TidyTag_COMMAND,
|
||||
TidyTag_DATALIST,
|
||||
TidyTag_DETAILS,
|
||||
TidyTag_FIGCAPTION,
|
||||
TidyTag_FIGURE,
|
||||
TidyTag_FOOTER,
|
||||
TidyTag_HEADER,
|
||||
TidyTag_HGROUP,
|
||||
TidyTag_MARK,
|
||||
TidyTag_METER,
|
||||
TidyTag_NAV,
|
||||
TidyTag_OUTPUT,
|
||||
TidyTag_PROGRESS,
|
||||
TidyTag_SECTION,
|
||||
TidyTag_SOURCE,
|
||||
TidyTag_SUMMARY,
|
||||
TidyTag_TIME,
|
||||
TidyTag_TRACK,
|
||||
TidyTag_VIDEO,
|
||||
|
||||
N_TIDY_TAGS /**< Must be last */
|
||||
} TidyTagId;
|
||||
|
||||
/* Attribute interrogation
|
||||
*/
|
||||
|
||||
/** Known HTML attributes
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
TidyAttr_UNKNOWN, /**< UNKNOWN= */
|
||||
TidyAttr_ABBR, /**< ABBR= */
|
||||
TidyAttr_ACCEPT, /**< ACCEPT= */
|
||||
TidyAttr_ACCEPT_CHARSET, /**< ACCEPT_CHARSET= */
|
||||
TidyAttr_ACCESSKEY, /**< ACCESSKEY= */
|
||||
TidyAttr_ACTION, /**< ACTION= */
|
||||
TidyAttr_ADD_DATE, /**< ADD_DATE= */
|
||||
TidyAttr_ALIGN, /**< ALIGN= */
|
||||
TidyAttr_ALINK, /**< ALINK= */
|
||||
TidyAttr_ALT, /**< ALT= */
|
||||
TidyAttr_ARCHIVE, /**< ARCHIVE= */
|
||||
TidyAttr_AXIS, /**< AXIS= */
|
||||
TidyAttr_BACKGROUND, /**< BACKGROUND= */
|
||||
TidyAttr_BGCOLOR, /**< BGCOLOR= */
|
||||
TidyAttr_BGPROPERTIES, /**< BGPROPERTIES= */
|
||||
TidyAttr_BORDER, /**< BORDER= */
|
||||
TidyAttr_BORDERCOLOR, /**< BORDERCOLOR= */
|
||||
TidyAttr_BOTTOMMARGIN, /**< BOTTOMMARGIN= */
|
||||
TidyAttr_CELLPADDING, /**< CELLPADDING= */
|
||||
TidyAttr_CELLSPACING, /**< CELLSPACING= */
|
||||
TidyAttr_CHAR, /**< CHAR= */
|
||||
TidyAttr_CHAROFF, /**< CHAROFF= */
|
||||
TidyAttr_CHARSET, /**< CHARSET= */
|
||||
TidyAttr_CHECKED, /**< CHECKED= */
|
||||
TidyAttr_CITE, /**< CITE= */
|
||||
TidyAttr_CLASS, /**< CLASS= */
|
||||
TidyAttr_CLASSID, /**< CLASSID= */
|
||||
TidyAttr_CLEAR, /**< CLEAR= */
|
||||
TidyAttr_CODE, /**< CODE= */
|
||||
TidyAttr_CODEBASE, /**< CODEBASE= */
|
||||
TidyAttr_CODETYPE, /**< CODETYPE= */
|
||||
TidyAttr_COLOR, /**< COLOR= */
|
||||
TidyAttr_COLS, /**< COLS= */
|
||||
TidyAttr_COLSPAN, /**< COLSPAN= */
|
||||
TidyAttr_COMPACT, /**< COMPACT= */
|
||||
TidyAttr_CONTENT, /**< CONTENT= */
|
||||
TidyAttr_COORDS, /**< COORDS= */
|
||||
TidyAttr_DATA, /**< DATA= */
|
||||
TidyAttr_DATAFLD, /**< DATAFLD= */
|
||||
TidyAttr_DATAFORMATAS, /**< DATAFORMATAS= */
|
||||
TidyAttr_DATAPAGESIZE, /**< DATAPAGESIZE= */
|
||||
TidyAttr_DATASRC, /**< DATASRC= */
|
||||
TidyAttr_DATETIME, /**< DATETIME= */
|
||||
TidyAttr_DECLARE, /**< DECLARE= */
|
||||
TidyAttr_DEFER, /**< DEFER= */
|
||||
TidyAttr_DIR, /**< DIR= */
|
||||
TidyAttr_DISABLED, /**< DISABLED= */
|
||||
TidyAttr_ENCODING, /**< ENCODING= */
|
||||
TidyAttr_ENCTYPE, /**< ENCTYPE= */
|
||||
TidyAttr_FACE, /**< FACE= */
|
||||
TidyAttr_FOR, /**< FOR= */
|
||||
TidyAttr_FRAME, /**< FRAME= */
|
||||
TidyAttr_FRAMEBORDER, /**< FRAMEBORDER= */
|
||||
TidyAttr_FRAMESPACING, /**< FRAMESPACING= */
|
||||
TidyAttr_GRIDX, /**< GRIDX= */
|
||||
TidyAttr_GRIDY, /**< GRIDY= */
|
||||
TidyAttr_HEADERS, /**< HEADERS= */
|
||||
TidyAttr_HEIGHT, /**< HEIGHT= */
|
||||
TidyAttr_HREF, /**< HREF= */
|
||||
TidyAttr_HREFLANG, /**< HREFLANG= */
|
||||
TidyAttr_HSPACE, /**< HSPACE= */
|
||||
TidyAttr_HTTP_EQUIV, /**< HTTP_EQUIV= */
|
||||
TidyAttr_ID, /**< ID= */
|
||||
TidyAttr_ISMAP, /**< ISMAP= */
|
||||
TidyAttr_LABEL, /**< LABEL= */
|
||||
TidyAttr_LANG, /**< LANG= */
|
||||
TidyAttr_LANGUAGE, /**< LANGUAGE= */
|
||||
TidyAttr_LAST_MODIFIED, /**< LAST_MODIFIED= */
|
||||
TidyAttr_LAST_VISIT, /**< LAST_VISIT= */
|
||||
TidyAttr_LEFTMARGIN, /**< LEFTMARGIN= */
|
||||
TidyAttr_LINK, /**< LINK= */
|
||||
TidyAttr_LONGDESC, /**< LONGDESC= */
|
||||
TidyAttr_LOWSRC, /**< LOWSRC= */
|
||||
TidyAttr_MARGINHEIGHT, /**< MARGINHEIGHT= */
|
||||
TidyAttr_MARGINWIDTH, /**< MARGINWIDTH= */
|
||||
TidyAttr_MAXLENGTH, /**< MAXLENGTH= */
|
||||
TidyAttr_MEDIA, /**< MEDIA= */
|
||||
TidyAttr_METHOD, /**< METHOD= */
|
||||
TidyAttr_MULTIPLE, /**< MULTIPLE= */
|
||||
TidyAttr_NAME, /**< NAME= */
|
||||
TidyAttr_NOHREF, /**< NOHREF= */
|
||||
TidyAttr_NORESIZE, /**< NORESIZE= */
|
||||
TidyAttr_NOSHADE, /**< NOSHADE= */
|
||||
TidyAttr_NOWRAP, /**< NOWRAP= */
|
||||
TidyAttr_OBJECT, /**< OBJECT= */
|
||||
TidyAttr_OnAFTERUPDATE, /**< OnAFTERUPDATE= */
|
||||
TidyAttr_OnBEFOREUNLOAD, /**< OnBEFOREUNLOAD= */
|
||||
TidyAttr_OnBEFOREUPDATE, /**< OnBEFOREUPDATE= */
|
||||
TidyAttr_OnBLUR, /**< OnBLUR= */
|
||||
TidyAttr_OnCHANGE, /**< OnCHANGE= */
|
||||
TidyAttr_OnCLICK, /**< OnCLICK= */
|
||||
TidyAttr_OnDATAAVAILABLE, /**< OnDATAAVAILABLE= */
|
||||
TidyAttr_OnDATASETCHANGED, /**< OnDATASETCHANGED= */
|
||||
TidyAttr_OnDATASETCOMPLETE, /**< OnDATASETCOMPLETE= */
|
||||
TidyAttr_OnDBLCLICK, /**< OnDBLCLICK= */
|
||||
TidyAttr_OnERRORUPDATE, /**< OnERRORUPDATE= */
|
||||
TidyAttr_OnFOCUS, /**< OnFOCUS= */
|
||||
TidyAttr_OnKEYDOWN, /**< OnKEYDOWN= */
|
||||
TidyAttr_OnKEYPRESS, /**< OnKEYPRESS= */
|
||||
TidyAttr_OnKEYUP, /**< OnKEYUP= */
|
||||
TidyAttr_OnLOAD, /**< OnLOAD= */
|
||||
TidyAttr_OnMOUSEDOWN, /**< OnMOUSEDOWN= */
|
||||
TidyAttr_OnMOUSEMOVE, /**< OnMOUSEMOVE= */
|
||||
TidyAttr_OnMOUSEOUT, /**< OnMOUSEOUT= */
|
||||
TidyAttr_OnMOUSEOVER, /**< OnMOUSEOVER= */
|
||||
TidyAttr_OnMOUSEUP, /**< OnMOUSEUP= */
|
||||
TidyAttr_OnRESET, /**< OnRESET= */
|
||||
TidyAttr_OnROWENTER, /**< OnROWENTER= */
|
||||
TidyAttr_OnROWEXIT, /**< OnROWEXIT= */
|
||||
TidyAttr_OnSELECT, /**< OnSELECT= */
|
||||
TidyAttr_OnSUBMIT, /**< OnSUBMIT= */
|
||||
TidyAttr_OnUNLOAD, /**< OnUNLOAD= */
|
||||
TidyAttr_PROFILE, /**< PROFILE= */
|
||||
TidyAttr_PROMPT, /**< PROMPT= */
|
||||
TidyAttr_RBSPAN, /**< RBSPAN= */
|
||||
TidyAttr_READONLY, /**< READONLY= */
|
||||
TidyAttr_REL, /**< REL= */
|
||||
TidyAttr_REV, /**< REV= */
|
||||
TidyAttr_RIGHTMARGIN, /**< RIGHTMARGIN= */
|
||||
TidyAttr_ROWS, /**< ROWS= */
|
||||
TidyAttr_ROWSPAN, /**< ROWSPAN= */
|
||||
TidyAttr_RULES, /**< RULES= */
|
||||
TidyAttr_SCHEME, /**< SCHEME= */
|
||||
TidyAttr_SCOPE, /**< SCOPE= */
|
||||
TidyAttr_SCROLLING, /**< SCROLLING= */
|
||||
TidyAttr_SELECTED, /**< SELECTED= */
|
||||
TidyAttr_SHAPE, /**< SHAPE= */
|
||||
TidyAttr_SHOWGRID, /**< SHOWGRID= */
|
||||
TidyAttr_SHOWGRIDX, /**< SHOWGRIDX= */
|
||||
TidyAttr_SHOWGRIDY, /**< SHOWGRIDY= */
|
||||
TidyAttr_SIZE, /**< SIZE= */
|
||||
TidyAttr_SPAN, /**< SPAN= */
|
||||
TidyAttr_SRC, /**< SRC= */
|
||||
TidyAttr_STANDBY, /**< STANDBY= */
|
||||
TidyAttr_START, /**< START= */
|
||||
TidyAttr_STYLE, /**< STYLE= */
|
||||
TidyAttr_SUMMARY, /**< SUMMARY= */
|
||||
TidyAttr_TABINDEX, /**< TABINDEX= */
|
||||
TidyAttr_TARGET, /**< TARGET= */
|
||||
TidyAttr_TEXT, /**< TEXT= */
|
||||
TidyAttr_TITLE, /**< TITLE= */
|
||||
TidyAttr_TOPMARGIN, /**< TOPMARGIN= */
|
||||
TidyAttr_TYPE, /**< TYPE= */
|
||||
TidyAttr_USEMAP, /**< USEMAP= */
|
||||
TidyAttr_VALIGN, /**< VALIGN= */
|
||||
TidyAttr_VALUE, /**< VALUE= */
|
||||
TidyAttr_VALUETYPE, /**< VALUETYPE= */
|
||||
TidyAttr_VERSION, /**< VERSION= */
|
||||
TidyAttr_VLINK, /**< VLINK= */
|
||||
TidyAttr_VSPACE, /**< VSPACE= */
|
||||
TidyAttr_WIDTH, /**< WIDTH= */
|
||||
TidyAttr_WRAP, /**< WRAP= */
|
||||
TidyAttr_XML_LANG, /**< XML_LANG= */
|
||||
TidyAttr_XML_SPACE, /**< XML_SPACE= */
|
||||
TidyAttr_XMLNS, /**< XMLNS= */
|
||||
|
||||
TidyAttr_EVENT, /**< EVENT= */
|
||||
TidyAttr_METHODS, /**< METHODS= */
|
||||
TidyAttr_N, /**< N= */
|
||||
TidyAttr_SDAFORM, /**< SDAFORM= */
|
||||
TidyAttr_SDAPREF, /**< SDAPREF= */
|
||||
TidyAttr_SDASUFF, /**< SDASUFF= */
|
||||
TidyAttr_URN, /**< URN= */
|
||||
|
||||
TidyAttr_ASYNC,
|
||||
TidyAttr_AUTOCOMPLETE,
|
||||
TidyAttr_AUTOFOCUS,
|
||||
TidyAttr_AUTOPLAY,
|
||||
TidyAttr_CHALLENGE,
|
||||
TidyAttr_CONTENTEDITABLE,
|
||||
TidyAttr_CONTEXTMENU,
|
||||
TidyAttr_CONTROLS,
|
||||
TidyAttr_DEFAULT,
|
||||
TidyAttr_DIRNAME,
|
||||
TidyAttr_DRAGGABLE,
|
||||
TidyAttr_DROPZONE,
|
||||
TidyAttr_FORM,
|
||||
TidyAttr_FORMACTION,
|
||||
TidyAttr_FORMENCTYPE,
|
||||
TidyAttr_FORMMETHOD,
|
||||
TidyAttr_FORMNOVALIDATE,
|
||||
TidyAttr_FORMTARGET,
|
||||
TidyAttr_HIDDEN,
|
||||
TidyAttr_HIGH,
|
||||
TidyAttr_ICON,
|
||||
TidyAttr_KEYTYPE,
|
||||
TidyAttr_KIND,
|
||||
TidyAttr_LIST,
|
||||
TidyAttr_LOOP,
|
||||
TidyAttr_LOW,
|
||||
TidyAttr_MANIFEST,
|
||||
TidyAttr_MAX,
|
||||
TidyAttr_MEDIAGROUP,
|
||||
TidyAttr_MIN,
|
||||
TidyAttr_NOVALIDATE,
|
||||
TidyAttr_OPEN,
|
||||
TidyAttr_OPTIMUM,
|
||||
TidyAttr_OnABORT,
|
||||
TidyAttr_OnAFTERPRINT,
|
||||
TidyAttr_OnBEFOREPRINT,
|
||||
TidyAttr_OnCANPLAY,
|
||||
TidyAttr_OnCANPLAYTHROUGH,
|
||||
TidyAttr_OnCONTEXTMENU,
|
||||
TidyAttr_OnCUECHANGE,
|
||||
TidyAttr_OnDRAG,
|
||||
TidyAttr_OnDRAGEND,
|
||||
TidyAttr_OnDRAGENTER,
|
||||
TidyAttr_OnDRAGLEAVE,
|
||||
TidyAttr_OnDRAGOVER,
|
||||
TidyAttr_OnDRAGSTART,
|
||||
TidyAttr_OnDROP,
|
||||
TidyAttr_OnDURATIONCHANGE,
|
||||
TidyAttr_OnEMPTIED,
|
||||
TidyAttr_OnENDED,
|
||||
TidyAttr_OnERROR,
|
||||
TidyAttr_OnHASHCHANGE,
|
||||
TidyAttr_OnINPUT,
|
||||
TidyAttr_OnINVALID,
|
||||
TidyAttr_OnLOADEDDATA,
|
||||
TidyAttr_OnLOADEDMETADATA,
|
||||
TidyAttr_OnLOADSTART,
|
||||
TidyAttr_OnMESSAGE,
|
||||
TidyAttr_OnMOUSEWHEEL,
|
||||
TidyAttr_OnOFFLINE,
|
||||
TidyAttr_OnONLINE,
|
||||
TidyAttr_OnPAGEHIDE,
|
||||
TidyAttr_OnPAGESHOW,
|
||||
TidyAttr_OnPAUSE,
|
||||
TidyAttr_OnPLAY,
|
||||
TidyAttr_OnPLAYING,
|
||||
TidyAttr_OnPOPSTATE,
|
||||
TidyAttr_OnPROGRESS,
|
||||
TidyAttr_OnRATECHANGE,
|
||||
TidyAttr_OnREADYSTATECHANGE,
|
||||
TidyAttr_OnREDO,
|
||||
TidyAttr_OnRESIZE,
|
||||
TidyAttr_OnSCROLL,
|
||||
TidyAttr_OnSEEKED,
|
||||
TidyAttr_OnSEEKING,
|
||||
TidyAttr_OnSHOW,
|
||||
TidyAttr_OnSTALLED,
|
||||
TidyAttr_OnSTORAGE,
|
||||
TidyAttr_OnSUSPEND,
|
||||
TidyAttr_OnTIMEUPDATE,
|
||||
TidyAttr_OnUNDO,
|
||||
TidyAttr_OnVOLUMECHANGE,
|
||||
TidyAttr_OnWAITING,
|
||||
TidyAttr_PATTERN,
|
||||
TidyAttr_PLACEHOLDER,
|
||||
TidyAttr_POSTER,
|
||||
TidyAttr_PRELOAD,
|
||||
TidyAttr_PUBDATE,
|
||||
TidyAttr_RADIOGROUP,
|
||||
TidyAttr_REQUIRED,
|
||||
TidyAttr_REVERSED,
|
||||
TidyAttr_SANDBOX,
|
||||
TidyAttr_SCOPED,
|
||||
TidyAttr_SEAMLESS,
|
||||
TidyAttr_SIZES,
|
||||
TidyAttr_SPELLCHECK,
|
||||
TidyAttr_SRCDOC,
|
||||
TidyAttr_SRCLANG,
|
||||
TidyAttr_STEP,
|
||||
|
||||
|
||||
N_TIDY_ATTRIBS /**< Must be last */
|
||||
} TidyAttrId;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
#endif /* __TIDYENUM_H__ */
|
3310
src/access.c
Normal file
3310
src/access.c
Normal file
File diff suppressed because it is too large
Load diff
279
src/access.h
Normal file
279
src/access.h
Normal file
|
@ -0,0 +1,279 @@
|
|||
#ifndef __ACCESS_H__
|
||||
#define __ACCESS_H__
|
||||
|
||||
/* access.h -- carry out accessibility checks
|
||||
|
||||
Copyright University of Toronto
|
||||
Portions (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.7 $
|
||||
|
||||
*/
|
||||
|
||||
/*********************************************************************
|
||||
* AccessibilityChecks
|
||||
*
|
||||
* Carries out processes for all accessibility checks. Traverses
|
||||
* through all the content within the tree and evaluates the tags for
|
||||
* accessibility.
|
||||
*
|
||||
* To perform the following checks, 'AccessibilityChecks' must be
|
||||
* called AFTER the tree structure has been formed.
|
||||
*
|
||||
* If, in the command prompt, there is no specification of which
|
||||
* accessibility priorities to check, no accessibility checks will be
|
||||
* performed. (ie. '1' for priority 1, '2' for priorities 1 and 2,
|
||||
* and '3') for priorities 1, 2 and 3.)
|
||||
*
|
||||
* Copyright University of Toronto
|
||||
* Programmed by: Mike Lam and Chris Ridpath
|
||||
* Modifications by : Terry Teague (TRT)
|
||||
*
|
||||
*********************************************************************/
|
||||
|
||||
|
||||
#include "forward.h"
|
||||
|
||||
#if SUPPORT_ACCESSIBILITY_CHECKS
|
||||
|
||||
/* The accessibility checks to perform depending on user's desire.
|
||||
|
||||
1. priority 1
|
||||
2. priority 1 & 2
|
||||
3. priority 1, 2, & 3
|
||||
*/
|
||||
|
||||
/* Determines if the client-side text link is found within the document
|
||||
typedef struct AreaLinks
|
||||
{
|
||||
struct AreaLinks* next;
|
||||
char* link;
|
||||
Bool HasBeenFound;
|
||||
} AreaLinks;
|
||||
*/
|
||||
|
||||
enum {
|
||||
TEXTBUF_SIZE=128u
|
||||
};
|
||||
|
||||
struct _TidyAccessImpl;
|
||||
typedef struct _TidyAccessImpl TidyAccessImpl;
|
||||
|
||||
struct _TidyAccessImpl
|
||||
{
|
||||
/* gets set from Tidy variable AccessibilityCheckLevel */
|
||||
int PRIORITYCHK;
|
||||
|
||||
/* Number of characters that are found within the concatenated text */
|
||||
int counter;
|
||||
|
||||
/* list of characters in the text nodes found within a container element */
|
||||
tmbchar textNode[ TEXTBUF_SIZE ];
|
||||
|
||||
/* The list of characters found within one text node */
|
||||
tmbchar text[ TEXTBUF_SIZE ];
|
||||
|
||||
/* Number of frame elements found within a frameset */
|
||||
int numFrames;
|
||||
|
||||
/* Number of 'longdesc' attributes found within a frameset */
|
||||
int HasCheckedLongDesc;
|
||||
|
||||
int CheckedHeaders;
|
||||
int ListElements;
|
||||
int OtherListElements;
|
||||
|
||||
/* For 'USEMAP' identifier */
|
||||
Bool HasUseMap;
|
||||
Bool HasName;
|
||||
Bool HasMap;
|
||||
|
||||
/* For tracking nodes that are deleted from the original parse tree - TRT */
|
||||
/* Node *access_tree; */
|
||||
|
||||
Bool HasTH;
|
||||
Bool HasValidFor;
|
||||
Bool HasValidId;
|
||||
Bool HasValidRowHeaders;
|
||||
Bool HasValidColumnHeaders;
|
||||
Bool HasInvalidRowHeader;
|
||||
Bool HasInvalidColumnHeader;
|
||||
int ForID;
|
||||
|
||||
/* List containing map-links
|
||||
AreaLinks* links;
|
||||
AreaLinks* start;
|
||||
AreaLinks* current;
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Determines which error/warning message should be displayed,
|
||||
depending on the error code that was called.
|
||||
|
||||
Offset accessibility error codes by FIRST_ACCESS_ERR to avoid conflict with
|
||||
other error codes defined in message.h and used in localize.c.
|
||||
*/
|
||||
enum accessErrorCodes
|
||||
{
|
||||
FIRST_ACCESS_ERR = 1000, /* must be first */
|
||||
|
||||
/* [1.1.1.1] */ IMG_MISSING_ALT,
|
||||
/* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME,
|
||||
/* [1.1.1.3] */ IMG_ALT_SUSPICIOUS_FILE_SIZE,
|
||||
/* [1.1.1.4] */ IMG_ALT_SUSPICIOUS_PLACEHOLDER,
|
||||
/* [1.1.1.10] */ IMG_ALT_SUSPICIOUS_TOO_LONG,
|
||||
/* [1.1.1.11] */ IMG_MISSING_ALT_BULLET,
|
||||
/* [1.1.1.12] */ IMG_MISSING_ALT_H_RULE,
|
||||
/* [1.1.2.1] */ IMG_MISSING_LONGDESC_DLINK,
|
||||
/* [1.1.2.2] */ IMG_MISSING_DLINK,
|
||||
/* [1.1.2.3] */ IMG_MISSING_LONGDESC,
|
||||
/* [1.1.2.5] */ LONGDESC_NOT_REQUIRED,
|
||||
/* [1.1.3.1] */ IMG_BUTTON_MISSING_ALT,
|
||||
/* [1.1.4.1] */ APPLET_MISSING_ALT,
|
||||
/* [1.1.5.1] */ OBJECT_MISSING_ALT,
|
||||
/* [1.1.6.1] */ AUDIO_MISSING_TEXT_WAV,
|
||||
/* [1.1.6.2] */ AUDIO_MISSING_TEXT_AU,
|
||||
/* [1.1.6.3] */ AUDIO_MISSING_TEXT_AIFF,
|
||||
/* [1.1.6.4] */ AUDIO_MISSING_TEXT_SND,
|
||||
/* [1.1.6.5] */ AUDIO_MISSING_TEXT_RA,
|
||||
/* [1.1.6.6] */ AUDIO_MISSING_TEXT_RM,
|
||||
/* [1.1.8.1] */ FRAME_MISSING_LONGDESC,
|
||||
/* [1.1.9.1] */ AREA_MISSING_ALT,
|
||||
/* [1.1.10.1] */ SCRIPT_MISSING_NOSCRIPT,
|
||||
/* [1.1.12.1] */ ASCII_REQUIRES_DESCRIPTION,
|
||||
/* [1.2.1.1] */ IMG_MAP_SERVER_REQUIRES_TEXT_LINKS,
|
||||
/* [1.4.1.1] */ MULTIMEDIA_REQUIRES_TEXT,
|
||||
/* [1.5.1.1] */ IMG_MAP_CLIENT_MISSING_TEXT_LINKS,
|
||||
/* [2.1.1.1] */ INFORMATION_NOT_CONVEYED_IMAGE,
|
||||
/* [2.1.1.2] */ INFORMATION_NOT_CONVEYED_APPLET,
|
||||
/* [2.1.1.3] */ INFORMATION_NOT_CONVEYED_OBJECT,
|
||||
/* [2.1.1.4] */ INFORMATION_NOT_CONVEYED_SCRIPT,
|
||||
/* [2.1.1.5] */ INFORMATION_NOT_CONVEYED_INPUT,
|
||||
/* [2.2.1.1] */ COLOR_CONTRAST_TEXT,
|
||||
/* [2.2.1.2] */ COLOR_CONTRAST_LINK,
|
||||
/* [2.2.1.3] */ COLOR_CONTRAST_ACTIVE_LINK,
|
||||
/* [2.2.1.4] */ COLOR_CONTRAST_VISITED_LINK,
|
||||
/* [3.2.1.1] */ DOCTYPE_MISSING,
|
||||
/* [3.3.1.1] */ STYLE_SHEET_CONTROL_PRESENTATION,
|
||||
/* [3.5.1.1] */ HEADERS_IMPROPERLY_NESTED,
|
||||
/* [3.5.2.1] */ POTENTIAL_HEADER_BOLD,
|
||||
/* [3.5.2.2] */ POTENTIAL_HEADER_ITALICS,
|
||||
/* [3.5.2.3] */ POTENTIAL_HEADER_UNDERLINE,
|
||||
/* [3.5.3.1] */ HEADER_USED_FORMAT_TEXT,
|
||||
/* [3.6.1.1] */ LIST_USAGE_INVALID_UL,
|
||||
/* [3.6.1.2] */ LIST_USAGE_INVALID_OL,
|
||||
/* [3.6.1.4] */ LIST_USAGE_INVALID_LI,
|
||||
/* [4.1.1.1] */ INDICATE_CHANGES_IN_LANGUAGE,
|
||||
/* [4.3.1.1] */ LANGUAGE_NOT_IDENTIFIED,
|
||||
/* [4.3.1.1] */ LANGUAGE_INVALID,
|
||||
/* [5.1.2.1] */ DATA_TABLE_MISSING_HEADERS,
|
||||
/* [5.1.2.2] */ DATA_TABLE_MISSING_HEADERS_COLUMN,
|
||||
/* [5.1.2.3] */ DATA_TABLE_MISSING_HEADERS_ROW,
|
||||
/* [5.2.1.1] */ DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS,
|
||||
/* [5.2.1.2] */ DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS,
|
||||
/* [5.3.1.1] */ LAYOUT_TABLES_LINEARIZE_PROPERLY,
|
||||
/* [5.4.1.1] */ LAYOUT_TABLE_INVALID_MARKUP,
|
||||
/* [5.5.1.1] */ TABLE_MISSING_SUMMARY,
|
||||
/* [5.5.1.2] */ TABLE_SUMMARY_INVALID_NULL,
|
||||
/* [5.5.1.3] */ TABLE_SUMMARY_INVALID_SPACES,
|
||||
/* [5.5.1.6] */ TABLE_SUMMARY_INVALID_PLACEHOLDER,
|
||||
/* [5.5.2.1] */ TABLE_MISSING_CAPTION,
|
||||
/* [5.6.1.1] */ TABLE_MAY_REQUIRE_HEADER_ABBR,
|
||||
/* [5.6.1.2] */ TABLE_MAY_REQUIRE_HEADER_ABBR_NULL,
|
||||
/* [5.6.1.3] */ TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES,
|
||||
/* [6.1.1.1] */ STYLESHEETS_REQUIRE_TESTING_LINK,
|
||||
/* [6.1.1.2] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT,
|
||||
/* [6.1.1.3] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR,
|
||||
/* [6.2.1.1] */ FRAME_SRC_INVALID,
|
||||
/* [6.2.2.1] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET,
|
||||
/* [6.2.2.2] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT,
|
||||
/* [6.2.2.3] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT,
|
||||
/* [6.3.1.1] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT,
|
||||
/* [6.3.1.2] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT,
|
||||
/* [6.3.1.3] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED,
|
||||
/* [6.3.1.4] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET,
|
||||
/* [6.5.1.1] */ FRAME_MISSING_NOFRAMES,
|
||||
/* [6.5.1.2] */ NOFRAMES_INVALID_NO_VALUE,
|
||||
/* [6.5.1.3] */ NOFRAMES_INVALID_CONTENT,
|
||||
/* [6.5.1.4] */ NOFRAMES_INVALID_LINK,
|
||||
/* [7.1.1.1] */ REMOVE_FLICKER_SCRIPT,
|
||||
/* [7.1.1.2] */ REMOVE_FLICKER_OBJECT,
|
||||
/* [7.1.1.3] */ REMOVE_FLICKER_EMBED,
|
||||
/* [7.1.1.4] */ REMOVE_FLICKER_APPLET,
|
||||
/* [7.1.1.5] */ REMOVE_FLICKER_ANIMATED_GIF,
|
||||
/* [7.2.1.1] */ REMOVE_BLINK_MARQUEE,
|
||||
/* [7.4.1.1] */ REMOVE_AUTO_REFRESH,
|
||||
/* [7.5.1.1] */ REMOVE_AUTO_REDIRECT,
|
||||
/* [8.1.1.1] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT,
|
||||
/* [8.1.1.2] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT,
|
||||
/* [8.1.1.3] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET,
|
||||
/* [8.1.1.4] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED,
|
||||
/* [9.1.1.1] */ IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION,
|
||||
/* [9.3.1.1] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN,
|
||||
/* [9.3.1.2] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP,
|
||||
/* [9.3.1.3] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK,
|
||||
/* [9.3.1.4] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER,
|
||||
/* [9.3.1.5] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT,
|
||||
/* [9.3.1.6] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE,
|
||||
/* [10.1.1.1] */ NEW_WINDOWS_REQUIRE_WARNING_NEW,
|
||||
/* [10.1.1.2] */ NEW_WINDOWS_REQUIRE_WARNING_BLANK,
|
||||
/* [10.2.1.1] */ LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT,
|
||||
/* [10.2.1.2] */ LABEL_NEEDS_REPOSITIONING_AFTER_INPUT,
|
||||
/* [10.4.1.1] */ FORM_CONTROL_REQUIRES_DEFAULT_TEXT,
|
||||
/* [10.4.1.2] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL,
|
||||
/* [10.4.1.3] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES,
|
||||
/* [11.2.1.1] */ REPLACE_DEPRECATED_HTML_APPLET,
|
||||
/* [11.2.1.2] */ REPLACE_DEPRECATED_HTML_BASEFONT,
|
||||
/* [11.2.1.3] */ REPLACE_DEPRECATED_HTML_CENTER,
|
||||
/* [11.2.1.4] */ REPLACE_DEPRECATED_HTML_DIR,
|
||||
/* [11.2.1.5] */ REPLACE_DEPRECATED_HTML_FONT,
|
||||
/* [11.2.1.6] */ REPLACE_DEPRECATED_HTML_ISINDEX,
|
||||
/* [11.2.1.7] */ REPLACE_DEPRECATED_HTML_MENU,
|
||||
/* [11.2.1.8] */ REPLACE_DEPRECATED_HTML_S,
|
||||
/* [11.2.1.9] */ REPLACE_DEPRECATED_HTML_STRIKE,
|
||||
/* [11.2.1.10] */ REPLACE_DEPRECATED_HTML_U,
|
||||
/* [12.1.1.1] */ FRAME_MISSING_TITLE,
|
||||
/* [12.1.1.2] */ FRAME_TITLE_INVALID_NULL,
|
||||
/* [12.1.1.3] */ FRAME_TITLE_INVALID_SPACES,
|
||||
/* [12.4.1.1] */ ASSOCIATE_LABELS_EXPLICITLY,
|
||||
/* [12.4.1.2] */ ASSOCIATE_LABELS_EXPLICITLY_FOR,
|
||||
/* [12.4.1.3] */ ASSOCIATE_LABELS_EXPLICITLY_ID,
|
||||
/* [13.1.1.1] */ LINK_TEXT_NOT_MEANINGFUL,
|
||||
/* [13.1.1.2] */ LINK_TEXT_MISSING,
|
||||
/* [13.1.1.3] */ LINK_TEXT_TOO_LONG,
|
||||
/* [13.1.1.4] */ LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE,
|
||||
/* [13.1.1.5] */ LINK_TEXT_NOT_MEANINGFUL_MORE,
|
||||
/* [13.1.1.6] */ LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS,
|
||||
/* [13.2.1.1] */ METADATA_MISSING,
|
||||
/* [13.2.1.2] */ METADATA_MISSING_LINK,
|
||||
/* [13.2.1.3] */ METADATA_MISSING_REDIRECT_AUTOREFRESH,
|
||||
/* [13.10.1.1] */ SKIPOVER_ASCII_ART,
|
||||
|
||||
LAST_ACCESS_ERR /* must be last */
|
||||
};
|
||||
|
||||
|
||||
void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc );
|
||||
void TY_(DisplayHTMLTableAlgorithm)( TidyDocImpl* doc );
|
||||
|
||||
/************************************************************
|
||||
* AccessibilityChecks
|
||||
*
|
||||
* Traverses through the individual nodes of the tree
|
||||
* and checks attributes and elements for accessibility.
|
||||
* after the tree structure has been formed.
|
||||
************************************************************/
|
||||
|
||||
void TY_(AccessibilityChecks)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
#endif /* SUPPORT_ACCESSIBILITY_CHECKS */
|
||||
#endif /* __ACCESS_H__ */
|
107
src/alloc.c
Normal file
107
src/alloc.c
Normal file
|
@ -0,0 +1,107 @@
|
|||
/* alloc.c -- Default memory allocation routines.
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/12/29 16:31:07 $
|
||||
$Revision: 1.7 $
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
#include "forward.h"
|
||||
|
||||
static TidyMalloc g_malloc = NULL;
|
||||
static TidyRealloc g_realloc = NULL;
|
||||
static TidyFree g_free = NULL;
|
||||
static TidyPanic g_panic = NULL;
|
||||
|
||||
Bool TIDY_CALL tidySetMallocCall( TidyMalloc fmalloc )
|
||||
{
|
||||
g_malloc = fmalloc;
|
||||
return yes;
|
||||
}
|
||||
Bool TIDY_CALL tidySetReallocCall( TidyRealloc frealloc )
|
||||
{
|
||||
g_realloc = frealloc;
|
||||
return yes;
|
||||
}
|
||||
Bool TIDY_CALL tidySetFreeCall( TidyFree ffree )
|
||||
{
|
||||
g_free = ffree;
|
||||
return yes;
|
||||
}
|
||||
Bool TIDY_CALL tidySetPanicCall( TidyPanic fpanic )
|
||||
{
|
||||
g_panic = fpanic;
|
||||
return yes;
|
||||
}
|
||||
|
||||
static void TIDY_CALL defaultPanic( TidyAllocator* ARG_UNUSED(allocator), ctmbstr msg )
|
||||
{
|
||||
if ( g_panic )
|
||||
g_panic( msg );
|
||||
else
|
||||
{
|
||||
/* 2 signifies a serious error */
|
||||
fprintf( stderr, "Fatal error: %s\n", msg );
|
||||
#ifdef _DEBUG
|
||||
assert(0);
|
||||
#endif
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
static void* TIDY_CALL defaultAlloc( TidyAllocator* allocator, size_t size )
|
||||
{
|
||||
void *p = ( g_malloc ? g_malloc(size) : malloc(size) );
|
||||
if ( !p )
|
||||
defaultPanic( allocator,"Out of memory!");
|
||||
return p;
|
||||
}
|
||||
|
||||
static void* TIDY_CALL defaultRealloc( TidyAllocator* allocator, void* mem, size_t newsize )
|
||||
{
|
||||
void *p;
|
||||
if ( mem == NULL )
|
||||
return defaultAlloc( allocator, newsize );
|
||||
|
||||
p = ( g_realloc ? g_realloc(mem, newsize) : realloc(mem, newsize) );
|
||||
if (!p)
|
||||
defaultPanic( allocator, "Out of memory!");
|
||||
return p;
|
||||
}
|
||||
|
||||
static void TIDY_CALL defaultFree( TidyAllocator* ARG_UNUSED(allocator), void* mem )
|
||||
{
|
||||
if ( mem )
|
||||
{
|
||||
if ( g_free )
|
||||
g_free( mem );
|
||||
else
|
||||
free( mem );
|
||||
}
|
||||
}
|
||||
|
||||
static const TidyAllocatorVtbl defaultVtbl = {
|
||||
defaultAlloc,
|
||||
defaultRealloc,
|
||||
defaultFree,
|
||||
defaultPanic
|
||||
};
|
||||
|
||||
TidyAllocator TY_(g_default_allocator) = {
|
||||
&defaultVtbl
|
||||
};
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
209
src/attrask.c
Normal file
209
src/attrask.c
Normal file
|
@ -0,0 +1,209 @@
|
|||
/* attrask.c -- Interrogate attribute type
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info:
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.5 $
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy-int.h"
|
||||
#include "tidy.h"
|
||||
#include "attrs.h"
|
||||
|
||||
Bool TIDY_CALL tidyAttrIsHREF( TidyAttr tattr )
|
||||
{
|
||||
return attrIsHREF( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsSRC( TidyAttr tattr )
|
||||
{
|
||||
return attrIsSRC( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsID( TidyAttr tattr )
|
||||
{
|
||||
return attrIsID( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsNAME( TidyAttr tattr )
|
||||
{
|
||||
return attrIsNAME( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsSUMMARY( TidyAttr tattr )
|
||||
{
|
||||
return attrIsSUMMARY( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsALT( TidyAttr tattr )
|
||||
{
|
||||
return attrIsALT( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsLONGDESC( TidyAttr tattr )
|
||||
{
|
||||
return attrIsLONGDESC( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsUSEMAP( TidyAttr tattr )
|
||||
{
|
||||
return attrIsUSEMAP( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsISMAP( TidyAttr tattr )
|
||||
{
|
||||
return attrIsISMAP( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsLANGUAGE( TidyAttr tattr )
|
||||
{
|
||||
return attrIsLANGUAGE( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsTYPE( TidyAttr tattr )
|
||||
{
|
||||
return attrIsTYPE( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsVALUE( TidyAttr tattr )
|
||||
{
|
||||
return attrIsVALUE( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsCONTENT( TidyAttr tattr )
|
||||
{
|
||||
return attrIsCONTENT( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsTITLE( TidyAttr tattr )
|
||||
{
|
||||
return attrIsTITLE( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsXMLNS( TidyAttr tattr )
|
||||
{
|
||||
return attrIsXMLNS( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsDATAFLD( TidyAttr tattr )
|
||||
{
|
||||
return attrIsDATAFLD( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsWIDTH( TidyAttr tattr )
|
||||
{
|
||||
return attrIsWIDTH( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsHEIGHT( TidyAttr tattr )
|
||||
{
|
||||
return attrIsHEIGHT( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsFOR( TidyAttr tattr )
|
||||
{
|
||||
return attrIsFOR( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsSELECTED( TidyAttr tattr )
|
||||
{
|
||||
return attrIsSELECTED( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsCHECKED( TidyAttr tattr )
|
||||
{
|
||||
return attrIsCHECKED( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsLANG( TidyAttr tattr )
|
||||
{
|
||||
return attrIsLANG( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsTARGET( TidyAttr tattr )
|
||||
{
|
||||
return attrIsTARGET( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsHTTP_EQUIV( TidyAttr tattr )
|
||||
{
|
||||
return attrIsHTTP_EQUIV( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsREL( TidyAttr tattr )
|
||||
{
|
||||
return attrIsREL( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsEvent( TidyAttr tattr )
|
||||
{
|
||||
return TY_(attrIsEvent)( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnMOUSEMOVE( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnMOUSEMOVE( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnMOUSEDOWN( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnMOUSEDOWN( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnMOUSEUP( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnMOUSEUP( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnCLICK( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnCLICK( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnMOUSEOVER( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnMOUSEOVER( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnMOUSEOUT( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnMOUSEOUT( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnKEYDOWN( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnKEYDOWN( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnKEYUP( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnKEYUP( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnKEYPRESS( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnKEYPRESS( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnFOCUS( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnFOCUS( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsOnBLUR( TidyAttr tattr )
|
||||
{
|
||||
return attrIsOnBLUR( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsBGCOLOR( TidyAttr tattr )
|
||||
{
|
||||
return attrIsBGCOLOR( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsLINK( TidyAttr tattr )
|
||||
{
|
||||
return attrIsLINK( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsALINK( TidyAttr tattr )
|
||||
{
|
||||
return attrIsALINK( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsVLINK( TidyAttr tattr )
|
||||
{
|
||||
return attrIsVLINK( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsTEXT( TidyAttr tattr )
|
||||
{
|
||||
return attrIsTEXT( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsSTYLE( TidyAttr tattr )
|
||||
{
|
||||
return attrIsSTYLE( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsABBR( TidyAttr tattr )
|
||||
{
|
||||
return attrIsABBR( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsCOLSPAN( TidyAttr tattr )
|
||||
{
|
||||
return attrIsCOLSPAN( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
Bool TIDY_CALL tidyAttrIsROWSPAN( TidyAttr tattr )
|
||||
{
|
||||
return attrIsROWSPAN( tidyAttrToImpl(tattr) );
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
8810
src/attrdict.c
Normal file
8810
src/attrdict.c
Normal file
File diff suppressed because it is too large
Load diff
146
src/attrdict.h
Normal file
146
src/attrdict.h
Normal file
|
@ -0,0 +1,146 @@
|
|||
#ifndef __ATTRDICT_H__
|
||||
#define __ATTRDICT_H__
|
||||
|
||||
/* attrdict.h -- extended attribute information
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: attrdict.h,v 1.4 2006/09/12 15:14:44 arnaud02 Exp $
|
||||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
|
||||
typedef struct _AttrVersion
|
||||
{
|
||||
TidyAttrId attribute;
|
||||
uint versions;
|
||||
} AttrVersion;
|
||||
|
||||
extern const AttrVersion TY_(W3CAttrsFor_A)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ABBR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ACRONYM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ADDRESS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_APPLET)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_AREA)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_B)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BASE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BASEFONT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BDO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BIG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BLOCKQUOTE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BODY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_BUTTON)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CAPTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CENTER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CITE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CODE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COLGROUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DEL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DFN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DIR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DIV)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_EM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FIELDSET)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FONT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FORM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FRAME)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FRAMESET)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H1)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H2)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H3)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H4)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H5)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_H6)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HEAD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HTML)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_I)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_IFRAME)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_IMG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_INPUT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_INS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ISINDEX)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_KBD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LABEL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LEGEND)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LI)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LINK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_LISTING)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MAP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MENU)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_META)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NEXTID)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NOFRAMES)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NOSCRIPT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OBJECT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OPTGROUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OPTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_P)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PARAM)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PLAINTEXT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PRE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_Q)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RB)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RBC)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RTC)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_RUBY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_S)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SAMP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SCRIPT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SELECT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SMALL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SPAN)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_STRIKE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_STRONG)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_STYLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SUB)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TABLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TBODY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TEXTAREA)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TFOOT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TH)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_THEAD)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TITLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_U)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_UL)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_VAR)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_XMP)[];
|
||||
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TRACK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SUMMARY)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FIGCAPTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HGROUP)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FIGURE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ARTICLE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_ASIDE)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_NAV)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SECTION)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_FOOTER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_HEADER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DETAILS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_COMMAND)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_MARK)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_OUTPUT)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_METER)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_PROGRESS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_TIME)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_DATALIST)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_VIDEO)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_CANVAS)[];
|
||||
extern const AttrVersion TY_(W3CAttrsFor_SOURCE)[];
|
||||
|
||||
#endif /* __ATTRDICT_H__ */
|
213
src/attrget.c
Normal file
213
src/attrget.c
Normal file
|
@ -0,0 +1,213 @@
|
|||
/* attrget.c -- Locate attribute value by type
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info:
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.6 $
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy-int.h"
|
||||
#include "tags.h"
|
||||
#include "attrs.h"
|
||||
#include "tidy.h"
|
||||
|
||||
TidyAttr TIDY_CALL tidyAttrGetById( TidyNode tnod, TidyAttrId attId )
|
||||
{
|
||||
Node* nimp = tidyNodeToImpl(tnod);
|
||||
return tidyImplToAttr( TY_(AttrGetById)( nimp, attId ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetHREF( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetHREF( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetSRC( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetSRC( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetID( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetID( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetNAME( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetNAME( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetSUMMARY( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetSUMMARY( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetALT( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetALT( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetLONGDESC( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetLONGDESC( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetUSEMAP( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetUSEMAP( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetISMAP( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetISMAP( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetLANGUAGE( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetLANGUAGE( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetTYPE( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetTYPE( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetVALUE( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetVALUE( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetCONTENT( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetCONTENT( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetTITLE( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetTITLE( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetXMLNS( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetXMLNS( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetDATAFLD( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetDATAFLD( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetWIDTH( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetWIDTH( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetHEIGHT( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetHEIGHT( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetFOR( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetFOR( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetSELECTED( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetSELECTED( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetCHECKED( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetCHECKED( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetLANG( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetLANG( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetTARGET( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetTARGET( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetHTTP_EQUIV( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetHTTP_EQUIV( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetREL( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetREL( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEMOVE( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnMOUSEMOVE( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEDOWN( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnMOUSEDOWN( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEUP( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnMOUSEUP( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnCLICK( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnCLICK( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEOVER( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnMOUSEOVER( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnMOUSEOUT( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnMOUSEOUT( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnKEYDOWN( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnKEYDOWN( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnKEYUP( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnKEYUP( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnKEYPRESS( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnKEYPRESS( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnFOCUS( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnFOCUS( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetOnBLUR( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetOnBLUR( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetBGCOLOR( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetBGCOLOR( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetLINK( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetLINK( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetALINK( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetALINK( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetVLINK( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetVLINK( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
|
||||
TidyAttr TIDY_CALL tidyAttrGetTEXT( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetTEXT( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetSTYLE( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetSTYLE( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetABBR( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetABBR( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetCOLSPAN( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetCOLSPAN( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
TidyAttr TIDY_CALL tidyAttrGetROWSPAN( TidyNode tnod )
|
||||
{
|
||||
return tidyImplToAttr( attrGetROWSPAN( tidyNodeToImpl(tnod) ) );
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
2013
src/attrs.c
Normal file
2013
src/attrs.c
Normal file
File diff suppressed because it is too large
Load diff
373
src/attrs.h
Normal file
373
src/attrs.h
Normal file
|
@ -0,0 +1,373 @@
|
|||
#ifndef __ATTRS_H__
|
||||
#define __ATTRS_H__
|
||||
|
||||
/* attrs.h -- recognize HTML attributes
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/06/14 09:36:06 $
|
||||
$Revision: 1.29 $
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
|
||||
/* declaration for methods that check attribute values */
|
||||
typedef void (AttrCheck)(TidyDocImpl* doc, Node *node, AttVal *attval);
|
||||
|
||||
struct _Attribute
|
||||
{
|
||||
TidyAttrId id;
|
||||
tmbstr name;
|
||||
AttrCheck* attrchk;
|
||||
|
||||
struct _Attribute* next;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Anchor/Node linked list
|
||||
*/
|
||||
|
||||
struct _Anchor
|
||||
{
|
||||
struct _Anchor *next;
|
||||
Node *node;
|
||||
char *name;
|
||||
};
|
||||
|
||||
typedef struct _Anchor Anchor;
|
||||
|
||||
#if !defined(ATTRIBUTE_HASH_LOOKUP)
|
||||
#define ATTRIBUTE_HASH_LOOKUP 1
|
||||
#endif
|
||||
|
||||
#if ATTRIBUTE_HASH_LOOKUP
|
||||
enum
|
||||
{
|
||||
ATTRIBUTE_HASH_SIZE=178u
|
||||
};
|
||||
|
||||
struct _AttrHash
|
||||
{
|
||||
Attribute const* attr;
|
||||
struct _AttrHash* next;
|
||||
};
|
||||
|
||||
typedef struct _AttrHash AttrHash;
|
||||
#endif
|
||||
|
||||
struct _TidyAttribImpl
|
||||
{
|
||||
/* anchor/node lookup */
|
||||
Anchor* anchor_list;
|
||||
|
||||
/* Declared literal attributes */
|
||||
Attribute* declared_attr_list;
|
||||
|
||||
#if ATTRIBUTE_HASH_LOOKUP
|
||||
AttrHash* hashtab[ATTRIBUTE_HASH_SIZE];
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct _TidyAttribImpl TidyAttribImpl;
|
||||
|
||||
#define XHTML_NAMESPACE "http://www.w3.org/1999/xhtml"
|
||||
|
||||
AttrCheck TY_(CheckUrl);
|
||||
|
||||
/* public method for finding attribute definition by name */
|
||||
const Attribute* TY_(CheckAttribute)( TidyDocImpl* doc, Node *node, AttVal *attval );
|
||||
|
||||
const Attribute* TY_(FindAttribute)( TidyDocImpl* doc, AttVal *attval );
|
||||
|
||||
AttVal* TY_(GetAttrByName)( Node *node, ctmbstr name );
|
||||
|
||||
AttVal* TY_(AddAttribute)( TidyDocImpl* doc,
|
||||
Node *node, ctmbstr name, ctmbstr value );
|
||||
|
||||
AttVal* TY_(RepairAttrValue)(TidyDocImpl* doc, Node* node, ctmbstr name, ctmbstr value);
|
||||
|
||||
Bool TY_(IsUrl)( TidyDocImpl* doc, ctmbstr attrname );
|
||||
|
||||
/* Bool IsBool( TidyDocImpl* doc, ctmbstr attrname ); */
|
||||
|
||||
Bool TY_(IsScript)( TidyDocImpl* doc, ctmbstr attrname );
|
||||
|
||||
/* may id or name serve as anchor? */
|
||||
Bool TY_(IsAnchorElement)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/*
|
||||
In CSS1, selectors can contain only the characters A-Z, 0-9, and
|
||||
Unicode characters 161-255, plus dash (-); they cannot start with
|
||||
a dash or a digit; they can also contain escaped characters and any
|
||||
Unicode character as a numeric code (see next item).
|
||||
|
||||
The backslash followed by at most four hexadecimal digits (0..9A..F)
|
||||
stands for the Unicode character with that number.
|
||||
|
||||
Any character except a hexadecimal digit can be escaped to remove its
|
||||
special meaning, by putting a backslash in front.
|
||||
|
||||
#508936 - CSS class naming for -clean option
|
||||
*/
|
||||
Bool TY_(IsCSS1Selector)( ctmbstr buf );
|
||||
|
||||
Bool TY_(IsValidHTMLID)(ctmbstr id);
|
||||
Bool TY_(IsValidXMLID)(ctmbstr id);
|
||||
|
||||
/* removes anchor for specific node */
|
||||
void TY_(RemoveAnchorByNode)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
/* free all anchors */
|
||||
void TY_(FreeAnchors)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/* public methods for inititializing/freeing attribute dictionary */
|
||||
void TY_(InitAttrs)( TidyDocImpl* doc );
|
||||
void TY_(FreeAttrTable)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(AppendToClassAttr)( TidyDocImpl* doc, AttVal *classattr, ctmbstr classname );
|
||||
/*
|
||||
the same attribute name can't be used
|
||||
more than once in each element
|
||||
*/
|
||||
void TY_(RepairDuplicateAttributes)( TidyDocImpl* doc, Node* node, Bool isXml );
|
||||
void TY_(SortAttributes)(Node* node, TidyAttrSortStrategy strat);
|
||||
|
||||
Bool TY_(IsBoolAttribute)( AttVal* attval );
|
||||
Bool TY_(attrIsEvent)( AttVal* attval );
|
||||
|
||||
AttVal* TY_(AttrGetById)( Node* node, TidyAttrId id );
|
||||
|
||||
uint TY_(NodeAttributeVersions)( Node* node, TidyAttrId id );
|
||||
|
||||
/* 0 == TidyAttr_UNKNOWN */
|
||||
#define AttrId(av) ((av) && (av)->dict ? (av)->dict->id : TidyAttr_UNKNOWN)
|
||||
#define AttrIsId(av, atid) ((av) && (av)->dict && ((av)->dict->id == atid))
|
||||
|
||||
#define AttrHasValue(attr) ((attr) && (attr)->value)
|
||||
#define AttrValueIs(attr, val) (AttrHasValue(attr) && \
|
||||
TY_(tmbstrcasecmp)((attr)->value, val) == 0)
|
||||
#define AttrContains(attr, val) (AttrHasValue(attr) && \
|
||||
TY_(tmbsubstr)((attr)->value, val) != NULL)
|
||||
#define AttrVersions(attr) ((attr) && (attr)->dict ? (attr)->dict->versions : VERS_PROPRIETARY)
|
||||
|
||||
#define AttrsHaveSameId(a, b) (a && b && a->dict && b->dict && a->dict->id && \
|
||||
b->dict->id && a->dict->id == b->dict->id)
|
||||
|
||||
#define attrIsABBR(av) AttrIsId( av, TidyAttr_ABBR )
|
||||
#define attrIsACCEPT(av) AttrIsId( av, TidyAttr_ACCEPT )
|
||||
#define attrIsACCEPT_CHARSET(av) AttrIsId( av, TidyAttr_ACCEPT_CHARSET )
|
||||
#define attrIsACCESSKEY(av) AttrIsId( av, TidyAttr_ACCESSKEY )
|
||||
#define attrIsACTION(av) AttrIsId( av, TidyAttr_ACTION )
|
||||
#define attrIsADD_DATE(av) AttrIsId( av, TidyAttr_ADD_DATE )
|
||||
#define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN )
|
||||
#define attrIsALINK(av) AttrIsId( av, TidyAttr_ALINK )
|
||||
#define attrIsALT(av) AttrIsId( av, TidyAttr_ALT )
|
||||
#define attrIsARCHIVE(av) AttrIsId( av, TidyAttr_ARCHIVE )
|
||||
#define attrIsAXIS(av) AttrIsId( av, TidyAttr_AXIS )
|
||||
#define attrIsBACKGROUND(av) AttrIsId( av, TidyAttr_BACKGROUND )
|
||||
#define attrIsBGCOLOR(av) AttrIsId( av, TidyAttr_BGCOLOR )
|
||||
#define attrIsBGPROPERTIES(av) AttrIsId( av, TidyAttr_BGPROPERTIES )
|
||||
#define attrIsBORDER(av) AttrIsId( av, TidyAttr_BORDER )
|
||||
#define attrIsBORDERCOLOR(av) AttrIsId( av, TidyAttr_BORDERCOLOR )
|
||||
#define attrIsBOTTOMMARGIN(av) AttrIsId( av, TidyAttr_BOTTOMMARGIN )
|
||||
#define attrIsCELLPADDING(av) AttrIsId( av, TidyAttr_CELLPADDING )
|
||||
#define attrIsCELLSPACING(av) AttrIsId( av, TidyAttr_CELLSPACING )
|
||||
#define attrIsCHAR(av) AttrIsId( av, TidyAttr_CHAR )
|
||||
#define attrIsCHAROFF(av) AttrIsId( av, TidyAttr_CHAROFF )
|
||||
#define attrIsCHARSET(av) AttrIsId( av, TidyAttr_CHARSET )
|
||||
#define attrIsCHECKED(av) AttrIsId( av, TidyAttr_CHECKED )
|
||||
#define attrIsCITE(av) AttrIsId( av, TidyAttr_CITE )
|
||||
#define attrIsCLASS(av) AttrIsId( av, TidyAttr_CLASS )
|
||||
#define attrIsCLASSID(av) AttrIsId( av, TidyAttr_CLASSID )
|
||||
#define attrIsCLEAR(av) AttrIsId( av, TidyAttr_CLEAR )
|
||||
#define attrIsCODE(av) AttrIsId( av, TidyAttr_CODE )
|
||||
#define attrIsCODEBASE(av) AttrIsId( av, TidyAttr_CODEBASE )
|
||||
#define attrIsCODETYPE(av) AttrIsId( av, TidyAttr_CODETYPE )
|
||||
#define attrIsCOLOR(av) AttrIsId( av, TidyAttr_COLOR )
|
||||
#define attrIsCOLS(av) AttrIsId( av, TidyAttr_COLS )
|
||||
#define attrIsCOLSPAN(av) AttrIsId( av, TidyAttr_COLSPAN )
|
||||
#define attrIsCOMPACT(av) AttrIsId( av, TidyAttr_COMPACT )
|
||||
#define attrIsCONTENT(av) AttrIsId( av, TidyAttr_CONTENT )
|
||||
#define attrIsCOORDS(av) AttrIsId( av, TidyAttr_COORDS )
|
||||
#define attrIsDATA(av) AttrIsId( av, TidyAttr_DATA )
|
||||
#define attrIsDATAFLD(av) AttrIsId( av, TidyAttr_DATAFLD )
|
||||
#define attrIsDATAFORMATAS(av) AttrIsId( av, TidyAttr_DATAFORMATAS )
|
||||
#define attrIsDATAPAGESIZE(av) AttrIsId( av, TidyAttr_DATAPAGESIZE )
|
||||
#define attrIsDATASRC(av) AttrIsId( av, TidyAttr_DATASRC )
|
||||
#define attrIsDATETIME(av) AttrIsId( av, TidyAttr_DATETIME )
|
||||
#define attrIsDECLARE(av) AttrIsId( av, TidyAttr_DECLARE )
|
||||
#define attrIsDEFER(av) AttrIsId( av, TidyAttr_DEFER )
|
||||
#define attrIsDIR(av) AttrIsId( av, TidyAttr_DIR )
|
||||
#define attrIsDISABLED(av) AttrIsId( av, TidyAttr_DISABLED )
|
||||
#define attrIsENCODING(av) AttrIsId( av, TidyAttr_ENCODING )
|
||||
#define attrIsENCTYPE(av) AttrIsId( av, TidyAttr_ENCTYPE )
|
||||
#define attrIsFACE(av) AttrIsId( av, TidyAttr_FACE )
|
||||
#define attrIsFOR(av) AttrIsId( av, TidyAttr_FOR )
|
||||
#define attrIsFRAME(av) AttrIsId( av, TidyAttr_FRAME )
|
||||
#define attrIsFRAMEBORDER(av) AttrIsId( av, TidyAttr_FRAMEBORDER )
|
||||
#define attrIsFRAMESPACING(av) AttrIsId( av, TidyAttr_FRAMESPACING )
|
||||
#define attrIsGRIDX(av) AttrIsId( av, TidyAttr_GRIDX )
|
||||
#define attrIsGRIDY(av) AttrIsId( av, TidyAttr_GRIDY )
|
||||
#define attrIsHEADERS(av) AttrIsId( av, TidyAttr_HEADERS )
|
||||
#define attrIsHEIGHT(av) AttrIsId( av, TidyAttr_HEIGHT )
|
||||
#define attrIsHREF(av) AttrIsId( av, TidyAttr_HREF )
|
||||
#define attrIsHREFLANG(av) AttrIsId( av, TidyAttr_HREFLANG )
|
||||
#define attrIsHSPACE(av) AttrIsId( av, TidyAttr_HSPACE )
|
||||
#define attrIsHTTP_EQUIV(av) AttrIsId( av, TidyAttr_HTTP_EQUIV )
|
||||
#define attrIsID(av) AttrIsId( av, TidyAttr_ID )
|
||||
#define attrIsISMAP(av) AttrIsId( av, TidyAttr_ISMAP )
|
||||
#define attrIsLABEL(av) AttrIsId( av, TidyAttr_LABEL )
|
||||
#define attrIsLANG(av) AttrIsId( av, TidyAttr_LANG )
|
||||
#define attrIsLANGUAGE(av) AttrIsId( av, TidyAttr_LANGUAGE )
|
||||
#define attrIsLAST_MODIFIED(av) AttrIsId( av, TidyAttr_LAST_MODIFIED )
|
||||
#define attrIsLAST_VISIT(av) AttrIsId( av, TidyAttr_LAST_VISIT )
|
||||
#define attrIsLEFTMARGIN(av) AttrIsId( av, TidyAttr_LEFTMARGIN )
|
||||
#define attrIsLINK(av) AttrIsId( av, TidyAttr_LINK )
|
||||
#define attrIsLONGDESC(av) AttrIsId( av, TidyAttr_LONGDESC )
|
||||
#define attrIsLOWSRC(av) AttrIsId( av, TidyAttr_LOWSRC )
|
||||
#define attrIsMARGINHEIGHT(av) AttrIsId( av, TidyAttr_MARGINHEIGHT )
|
||||
#define attrIsMARGINWIDTH(av) AttrIsId( av, TidyAttr_MARGINWIDTH )
|
||||
#define attrIsMAXLENGTH(av) AttrIsId( av, TidyAttr_MAXLENGTH )
|
||||
#define attrIsMEDIA(av) AttrIsId( av, TidyAttr_MEDIA )
|
||||
#define attrIsMETHOD(av) AttrIsId( av, TidyAttr_METHOD )
|
||||
#define attrIsMULTIPLE(av) AttrIsId( av, TidyAttr_MULTIPLE )
|
||||
#define attrIsNAME(av) AttrIsId( av, TidyAttr_NAME )
|
||||
#define attrIsNOHREF(av) AttrIsId( av, TidyAttr_NOHREF )
|
||||
#define attrIsNORESIZE(av) AttrIsId( av, TidyAttr_NORESIZE )
|
||||
#define attrIsNOSHADE(av) AttrIsId( av, TidyAttr_NOSHADE )
|
||||
#define attrIsNOWRAP(av) AttrIsId( av, TidyAttr_NOWRAP )
|
||||
#define attrIsOBJECT(av) AttrIsId( av, TidyAttr_OBJECT )
|
||||
#define attrIsOnAFTERUPDATE(av) AttrIsId( av, TidyAttr_OnAFTERUPDATE )
|
||||
#define attrIsOnBEFOREUNLOAD(av) AttrIsId( av, TidyAttr_OnBEFOREUNLOAD )
|
||||
#define attrIsOnBEFOREUPDATE(av) AttrIsId( av, TidyAttr_OnBEFOREUPDATE )
|
||||
#define attrIsOnBLUR(av) AttrIsId( av, TidyAttr_OnBLUR )
|
||||
#define attrIsOnCHANGE(av) AttrIsId( av, TidyAttr_OnCHANGE )
|
||||
#define attrIsOnCLICK(av) AttrIsId( av, TidyAttr_OnCLICK )
|
||||
#define attrIsOnDATAAVAILABLE(av) AttrIsId( av, TidyAttr_OnDATAAVAILABLE )
|
||||
#define attrIsOnDATASETCHANGED(av) AttrIsId( av, TidyAttr_OnDATASETCHANGED )
|
||||
#define attrIsOnDATASETCOMPLETE(av) AttrIsId( av, TidyAttr_OnDATASETCOMPLETE )
|
||||
#define attrIsOnDBLCLICK(av) AttrIsId( av, TidyAttr_OnDBLCLICK )
|
||||
#define attrIsOnERRORUPDATE(av) AttrIsId( av, TidyAttr_OnERRORUPDATE )
|
||||
#define attrIsOnFOCUS(av) AttrIsId( av, TidyAttr_OnFOCUS )
|
||||
#define attrIsOnKEYDOWN(av) AttrIsId( av, TidyAttr_OnKEYDOWN )
|
||||
#define attrIsOnKEYPRESS(av) AttrIsId( av, TidyAttr_OnKEYPRESS )
|
||||
#define attrIsOnKEYUP(av) AttrIsId( av, TidyAttr_OnKEYUP )
|
||||
#define attrIsOnLOAD(av) AttrIsId( av, TidyAttr_OnLOAD )
|
||||
#define attrIsOnMOUSEDOWN(av) AttrIsId( av, TidyAttr_OnMOUSEDOWN )
|
||||
#define attrIsOnMOUSEMOVE(av) AttrIsId( av, TidyAttr_OnMOUSEMOVE )
|
||||
#define attrIsOnMOUSEOUT(av) AttrIsId( av, TidyAttr_OnMOUSEOUT )
|
||||
#define attrIsOnMOUSEOVER(av) AttrIsId( av, TidyAttr_OnMOUSEOVER )
|
||||
#define attrIsOnMOUSEUP(av) AttrIsId( av, TidyAttr_OnMOUSEUP )
|
||||
#define attrIsOnRESET(av) AttrIsId( av, TidyAttr_OnRESET )
|
||||
#define attrIsOnROWENTER(av) AttrIsId( av, TidyAttr_OnROWENTER )
|
||||
#define attrIsOnROWEXIT(av) AttrIsId( av, TidyAttr_OnROWEXIT )
|
||||
#define attrIsOnSELECT(av) AttrIsId( av, TidyAttr_OnSELECT )
|
||||
#define attrIsOnSUBMIT(av) AttrIsId( av, TidyAttr_OnSUBMIT )
|
||||
#define attrIsOnUNLOAD(av) AttrIsId( av, TidyAttr_OnUNLOAD )
|
||||
#define attrIsPROFILE(av) AttrIsId( av, TidyAttr_PROFILE )
|
||||
#define attrIsPROMPT(av) AttrIsId( av, TidyAttr_PROMPT )
|
||||
#define attrIsRBSPAN(av) AttrIsId( av, TidyAttr_RBSPAN )
|
||||
#define attrIsREADONLY(av) AttrIsId( av, TidyAttr_READONLY )
|
||||
#define attrIsREL(av) AttrIsId( av, TidyAttr_REL )
|
||||
#define attrIsREV(av) AttrIsId( av, TidyAttr_REV )
|
||||
#define attrIsRIGHTMARGIN(av) AttrIsId( av, TidyAttr_RIGHTMARGIN )
|
||||
#define attrIsROWS(av) AttrIsId( av, TidyAttr_ROWS )
|
||||
#define attrIsROWSPAN(av) AttrIsId( av, TidyAttr_ROWSPAN )
|
||||
#define attrIsRULES(av) AttrIsId( av, TidyAttr_RULES )
|
||||
#define attrIsSCHEME(av) AttrIsId( av, TidyAttr_SCHEME )
|
||||
#define attrIsSCOPE(av) AttrIsId( av, TidyAttr_SCOPE )
|
||||
#define attrIsSCROLLING(av) AttrIsId( av, TidyAttr_SCROLLING )
|
||||
#define attrIsSELECTED(av) AttrIsId( av, TidyAttr_SELECTED )
|
||||
#define attrIsSHAPE(av) AttrIsId( av, TidyAttr_SHAPE )
|
||||
#define attrIsSHOWGRID(av) AttrIsId( av, TidyAttr_SHOWGRID )
|
||||
#define attrIsSHOWGRIDX(av) AttrIsId( av, TidyAttr_SHOWGRIDX )
|
||||
#define attrIsSHOWGRIDY(av) AttrIsId( av, TidyAttr_SHOWGRIDY )
|
||||
#define attrIsSIZE(av) AttrIsId( av, TidyAttr_SIZE )
|
||||
#define attrIsSPAN(av) AttrIsId( av, TidyAttr_SPAN )
|
||||
#define attrIsSRC(av) AttrIsId( av, TidyAttr_SRC )
|
||||
#define attrIsSTANDBY(av) AttrIsId( av, TidyAttr_STANDBY )
|
||||
#define attrIsSTART(av) AttrIsId( av, TidyAttr_START )
|
||||
#define attrIsSTYLE(av) AttrIsId( av, TidyAttr_STYLE )
|
||||
#define attrIsSUMMARY(av) AttrIsId( av, TidyAttr_SUMMARY )
|
||||
#define attrIsTABINDEX(av) AttrIsId( av, TidyAttr_TABINDEX )
|
||||
#define attrIsTARGET(av) AttrIsId( av, TidyAttr_TARGET )
|
||||
#define attrIsTEXT(av) AttrIsId( av, TidyAttr_TEXT )
|
||||
#define attrIsTITLE(av) AttrIsId( av, TidyAttr_TITLE )
|
||||
#define attrIsTOPMARGIN(av) AttrIsId( av, TidyAttr_TOPMARGIN )
|
||||
#define attrIsTYPE(av) AttrIsId( av, TidyAttr_TYPE )
|
||||
#define attrIsUSEMAP(av) AttrIsId( av, TidyAttr_USEMAP )
|
||||
#define attrIsVALIGN(av) AttrIsId( av, TidyAttr_VALIGN )
|
||||
#define attrIsVALUE(av) AttrIsId( av, TidyAttr_VALUE )
|
||||
#define attrIsVALUETYPE(av) AttrIsId( av, TidyAttr_VALUETYPE )
|
||||
#define attrIsVERSION(av) AttrIsId( av, TidyAttr_VERSION )
|
||||
#define attrIsVLINK(av) AttrIsId( av, TidyAttr_VLINK )
|
||||
#define attrIsVSPACE(av) AttrIsId( av, TidyAttr_VSPACE )
|
||||
#define attrIsWIDTH(av) AttrIsId( av, TidyAttr_WIDTH )
|
||||
#define attrIsWRAP(av) AttrIsId( av, TidyAttr_WRAP )
|
||||
#define attrIsXMLNS(av) AttrIsId( av, TidyAttr_XMLNS )
|
||||
#define attrIsXML_LANG(av) AttrIsId( av, TidyAttr_XML_LANG )
|
||||
#define attrIsXML_SPACE(av) AttrIsId( av, TidyAttr_XML_SPACE )
|
||||
|
||||
|
||||
/* Attribute Retrieval macros
|
||||
*/
|
||||
#define attrGetHREF( nod ) TY_(AttrGetById)( nod, TidyAttr_HREF )
|
||||
#define attrGetSRC( nod ) TY_(AttrGetById)( nod, TidyAttr_SRC )
|
||||
#define attrGetID( nod ) TY_(AttrGetById)( nod, TidyAttr_ID )
|
||||
#define attrGetNAME( nod ) TY_(AttrGetById)( nod, TidyAttr_NAME )
|
||||
#define attrGetSUMMARY( nod ) TY_(AttrGetById)( nod, TidyAttr_SUMMARY )
|
||||
#define attrGetALT( nod ) TY_(AttrGetById)( nod, TidyAttr_ALT )
|
||||
#define attrGetLONGDESC( nod ) TY_(AttrGetById)( nod, TidyAttr_LONGDESC )
|
||||
#define attrGetUSEMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_USEMAP )
|
||||
#define attrGetISMAP( nod ) TY_(AttrGetById)( nod, TidyAttr_ISMAP )
|
||||
#define attrGetLANGUAGE( nod ) TY_(AttrGetById)( nod, TidyAttr_LANGUAGE )
|
||||
#define attrGetTYPE( nod ) TY_(AttrGetById)( nod, TidyAttr_TYPE )
|
||||
#define attrGetVALUE( nod ) TY_(AttrGetById)( nod, TidyAttr_VALUE )
|
||||
#define attrGetCONTENT( nod ) TY_(AttrGetById)( nod, TidyAttr_CONTENT )
|
||||
#define attrGetTITLE( nod ) TY_(AttrGetById)( nod, TidyAttr_TITLE )
|
||||
#define attrGetXMLNS( nod ) TY_(AttrGetById)( nod, TidyAttr_XMLNS )
|
||||
#define attrGetDATAFLD( nod ) TY_(AttrGetById)( nod, TidyAttr_DATAFLD )
|
||||
#define attrGetWIDTH( nod ) TY_(AttrGetById)( nod, TidyAttr_WIDTH )
|
||||
#define attrGetHEIGHT( nod ) TY_(AttrGetById)( nod, TidyAttr_HEIGHT )
|
||||
#define attrGetFOR( nod ) TY_(AttrGetById)( nod, TidyAttr_FOR )
|
||||
#define attrGetSELECTED( nod ) TY_(AttrGetById)( nod, TidyAttr_SELECTED )
|
||||
#define attrGetCHECKED( nod ) TY_(AttrGetById)( nod, TidyAttr_CHECKED )
|
||||
#define attrGetLANG( nod ) TY_(AttrGetById)( nod, TidyAttr_LANG )
|
||||
#define attrGetTARGET( nod ) TY_(AttrGetById)( nod, TidyAttr_TARGET )
|
||||
#define attrGetHTTP_EQUIV( nod ) TY_(AttrGetById)( nod, TidyAttr_HTTP_EQUIV )
|
||||
#define attrGetREL( nod ) TY_(AttrGetById)( nod, TidyAttr_REL )
|
||||
|
||||
#define attrGetOnMOUSEMOVE( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEMOVE )
|
||||
#define attrGetOnMOUSEDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEDOWN )
|
||||
#define attrGetOnMOUSEUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEUP )
|
||||
#define attrGetOnCLICK( nod ) TY_(AttrGetById)( nod, TidyAttr_OnCLICK )
|
||||
#define attrGetOnMOUSEOVER( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOVER )
|
||||
#define attrGetOnMOUSEOUT( nod ) TY_(AttrGetById)( nod, TidyAttr_OnMOUSEOUT )
|
||||
#define attrGetOnKEYDOWN( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYDOWN )
|
||||
#define attrGetOnKEYUP( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYUP )
|
||||
#define attrGetOnKEYPRESS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnKEYPRESS )
|
||||
#define attrGetOnFOCUS( nod ) TY_(AttrGetById)( nod, TidyAttr_OnFOCUS )
|
||||
#define attrGetOnBLUR( nod ) TY_(AttrGetById)( nod, TidyAttr_OnBLUR )
|
||||
|
||||
#define attrGetBGCOLOR( nod ) TY_(AttrGetById)( nod, TidyAttr_BGCOLOR )
|
||||
|
||||
#define attrGetLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_LINK )
|
||||
#define attrGetALINK( nod ) TY_(AttrGetById)( nod, TidyAttr_ALINK )
|
||||
#define attrGetVLINK( nod ) TY_(AttrGetById)( nod, TidyAttr_VLINK )
|
||||
|
||||
#define attrGetTEXT( nod ) TY_(AttrGetById)( nod, TidyAttr_TEXT )
|
||||
#define attrGetSTYLE( nod ) TY_(AttrGetById)( nod, TidyAttr_STYLE )
|
||||
#define attrGetABBR( nod ) TY_(AttrGetById)( nod, TidyAttr_ABBR )
|
||||
#define attrGetCOLSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_COLSPAN )
|
||||
#define attrGetFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_FONT )
|
||||
#define attrGetBASEFONT( nod ) TY_(AttrGetById)( nod, TidyAttr_BASEFONT )
|
||||
#define attrGetROWSPAN( nod ) TY_(AttrGetById)( nod, TidyAttr_ROWSPAN )
|
||||
|
||||
#endif /* __ATTRS_H__ */
|
232
src/buffio.c
Normal file
232
src/buffio.c
Normal file
|
@ -0,0 +1,232 @@
|
|||
/* buffio.c -- Treat buffer as an I/O stream.
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/01/23 11:17:46 $
|
||||
$Revision: 1.14 $
|
||||
|
||||
Requires buffer to automatically grow as bytes are added.
|
||||
Must keep track of current read and write points.
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
#include "buffio.h"
|
||||
#include "forward.h"
|
||||
|
||||
/**************
|
||||
TIDY
|
||||
**************/
|
||||
|
||||
static int TIDY_CALL insrc_getByte( void* appData )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
return tidyBufGetByte( buf );
|
||||
}
|
||||
static Bool TIDY_CALL insrc_eof( void* appData )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
return tidyBufEndOfInput( buf );
|
||||
}
|
||||
static void TIDY_CALL insrc_ungetByte( void* appData, byte bv )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
tidyBufUngetByte( buf, bv );
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyInitInputBuffer( TidyInputSource* inp, TidyBuffer* buf )
|
||||
{
|
||||
inp->getByte = insrc_getByte;
|
||||
inp->eof = insrc_eof;
|
||||
inp->ungetByte = insrc_ungetByte;
|
||||
inp->sourceData = buf;
|
||||
}
|
||||
|
||||
static void TIDY_CALL outsink_putByte( void* appData, byte bv )
|
||||
{
|
||||
TidyBuffer* buf = (TidyBuffer*) appData;
|
||||
tidyBufPutByte( buf, bv );
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyInitOutputBuffer( TidyOutputSink* outp, TidyBuffer* buf )
|
||||
{
|
||||
outp->putByte = outsink_putByte;
|
||||
outp->sinkData = buf;
|
||||
}
|
||||
|
||||
|
||||
void TIDY_CALL tidyBufInit( TidyBuffer* buf )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
tidyBufInitWithAllocator( buf, NULL );
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufAlloc( TidyBuffer* buf, uint allocSize )
|
||||
{
|
||||
tidyBufAllocWithAllocator( buf, NULL, allocSize );
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufInitWithAllocator( TidyBuffer* buf,
|
||||
TidyAllocator *allocator )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
TidyClearMemory( buf, sizeof(TidyBuffer) );
|
||||
buf->allocator = allocator ? allocator : &TY_(g_default_allocator);
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufAllocWithAllocator( TidyBuffer* buf,
|
||||
TidyAllocator *allocator,
|
||||
uint allocSize )
|
||||
{
|
||||
tidyBufInitWithAllocator( buf, allocator );
|
||||
tidyBufCheckAlloc( buf, allocSize, 0 );
|
||||
buf->next = 0;
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufFree( TidyBuffer* buf )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
TidyFree( buf->allocator, buf->bp );
|
||||
tidyBufInitWithAllocator( buf, buf->allocator );
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufClear( TidyBuffer* buf )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
if ( buf->bp )
|
||||
{
|
||||
TidyClearMemory( buf->bp, buf->allocated );
|
||||
buf->size = 0;
|
||||
}
|
||||
buf->next = 0;
|
||||
}
|
||||
|
||||
/* Many users do not call tidyBufInit() or tidyBufAlloc() or their allocator
|
||||
counterparts. So by default, set the default allocator.
|
||||
*/
|
||||
static void setDefaultAllocator( TidyBuffer* buf )
|
||||
{
|
||||
buf->allocator = &TY_(g_default_allocator);
|
||||
}
|
||||
|
||||
/* Avoid thrashing memory by doubling buffer size
|
||||
** until larger than requested size.
|
||||
buf->allocated is bigger than allocSize+1 so that a trailing null byte is
|
||||
always available.
|
||||
*/
|
||||
void TIDY_CALL tidyBufCheckAlloc( TidyBuffer* buf, uint allocSize, uint chunkSize )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
|
||||
if ( !buf->allocator )
|
||||
setDefaultAllocator( buf );
|
||||
|
||||
if ( 0 == chunkSize )
|
||||
chunkSize = 256;
|
||||
if ( allocSize+1 > buf->allocated )
|
||||
{
|
||||
byte* bp;
|
||||
uint allocAmt = chunkSize;
|
||||
if ( buf->allocated > 0 )
|
||||
allocAmt = buf->allocated;
|
||||
while ( allocAmt < allocSize+1 )
|
||||
allocAmt *= 2;
|
||||
|
||||
bp = (byte*)TidyRealloc( buf->allocator, buf->bp, allocAmt );
|
||||
if ( bp != NULL )
|
||||
{
|
||||
TidyClearMemory( bp + buf->allocated, allocAmt - buf->allocated );
|
||||
buf->bp = bp;
|
||||
buf->allocated = allocAmt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Attach buffer to a chunk O' memory w/out allocation */
|
||||
void TIDY_CALL tidyBufAttach( TidyBuffer* buf, byte* bp, uint size )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
buf->bp = bp;
|
||||
buf->size = buf->allocated = size;
|
||||
buf->next = 0;
|
||||
if ( !buf->allocator )
|
||||
setDefaultAllocator( buf );
|
||||
}
|
||||
|
||||
/* Clear pointer to memory w/out deallocation */
|
||||
void TIDY_CALL tidyBufDetach( TidyBuffer* buf )
|
||||
{
|
||||
tidyBufInitWithAllocator( buf, buf->allocator );
|
||||
}
|
||||
|
||||
|
||||
/**************
|
||||
OUTPUT
|
||||
**************/
|
||||
|
||||
void TIDY_CALL tidyBufAppend( TidyBuffer* buf, void* vp, uint size )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
if ( vp != NULL && size > 0 )
|
||||
{
|
||||
tidyBufCheckAlloc( buf, buf->size + size, 0 );
|
||||
memcpy( buf->bp + buf->size, vp, size );
|
||||
buf->size += size;
|
||||
}
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufPutByte( TidyBuffer* buf, byte bv )
|
||||
{
|
||||
assert( buf != NULL );
|
||||
tidyBufCheckAlloc( buf, buf->size + 1, 0 );
|
||||
buf->bp[ buf->size++ ] = bv;
|
||||
}
|
||||
|
||||
|
||||
int TIDY_CALL tidyBufPopByte( TidyBuffer* buf )
|
||||
{
|
||||
int bv = EOF;
|
||||
assert( buf != NULL );
|
||||
if ( buf->size > 0 )
|
||||
bv = buf->bp[ --buf->size ];
|
||||
return bv;
|
||||
}
|
||||
|
||||
/**************
|
||||
INPUT
|
||||
**************/
|
||||
|
||||
int TIDY_CALL tidyBufGetByte( TidyBuffer* buf )
|
||||
{
|
||||
int bv = EOF;
|
||||
if ( ! tidyBufEndOfInput(buf) )
|
||||
bv = buf->bp[ buf->next++ ];
|
||||
return bv;
|
||||
}
|
||||
|
||||
Bool TIDY_CALL tidyBufEndOfInput( TidyBuffer* buf )
|
||||
{
|
||||
return ( buf->next >= buf->size );
|
||||
}
|
||||
|
||||
void TIDY_CALL tidyBufUngetByte( TidyBuffer* buf, byte bv )
|
||||
{
|
||||
if ( buf->next > 0 )
|
||||
{
|
||||
--buf->next;
|
||||
assert( bv == buf->bp[ buf->next ] );
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
1032
src/charsets.c
Normal file
1032
src/charsets.c
Normal file
File diff suppressed because it is too large
Load diff
14
src/charsets.h
Normal file
14
src/charsets.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* charsets.h -- character set information and mappings
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: charsets.h,v 1.2 2006/09/12 15:14:44 arnaud02 Exp $
|
||||
*/
|
||||
|
||||
uint TY_(GetEncodingIdFromName)(ctmbstr name);
|
||||
uint TY_(GetEncodingIdFromCodePage)(uint cp);
|
||||
uint TY_(GetEncodingCodePageFromName)(ctmbstr name);
|
||||
uint TY_(GetEncodingCodePageFromId)(uint id);
|
||||
ctmbstr TY_(GetEncodingNameFromId)(uint id);
|
||||
ctmbstr TY_(GetEncodingNameFromCodePage)(uint cp);
|
2674
src/clean.c
Normal file
2674
src/clean.c
Normal file
File diff suppressed because it is too large
Load diff
87
src/clean.h
Normal file
87
src/clean.h
Normal file
|
@ -0,0 +1,87 @@
|
|||
#ifndef __CLEAN_H__
|
||||
#define __CLEAN_H__
|
||||
|
||||
/* clean.h -- clean up misuse of presentation markup
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info:
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.14 $
|
||||
|
||||
*/
|
||||
|
||||
void TY_(FixNodeLinks)(Node *node);
|
||||
|
||||
void TY_(FreeStyles)( TidyDocImpl* doc );
|
||||
|
||||
/* Add class="foo" to node
|
||||
*/
|
||||
void TY_(AddStyleAsClass)( TidyDocImpl* doc, Node *node, ctmbstr stylevalue );
|
||||
void TY_(AddStyleProperty)(TidyDocImpl* doc, Node *node, ctmbstr property );
|
||||
|
||||
void TY_(CleanDocument)( TidyDocImpl* doc );
|
||||
|
||||
/* simplifies <b><b> ... </b> ...</b> etc. */
|
||||
void TY_(NestedEmphasis)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/* replace i by em and b by strong */
|
||||
void TY_(EmFromI)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/*
|
||||
Some people use dir or ul without an li
|
||||
to indent the content. The pattern to
|
||||
look for is a list with a single implicit
|
||||
li. This is recursively replaced by an
|
||||
implicit blockquote.
|
||||
*/
|
||||
void TY_(List2BQ)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/*
|
||||
Replace implicit blockquote by div with an indent
|
||||
taking care to reduce nested blockquotes to a single
|
||||
div with the indent set to match the nesting depth
|
||||
*/
|
||||
void TY_(BQ2Div)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
void TY_(DropSections)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
|
||||
/*
|
||||
This is a major clean up to strip out all the extra stuff you get
|
||||
when you save as web page from Word 2000. It doesn't yet know what
|
||||
to do with VML tags, but these will appear as errors unless you
|
||||
declare them as new tags, such as o:p which needs to be declared
|
||||
as inline.
|
||||
*/
|
||||
void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node);
|
||||
|
||||
Bool TY_(IsWord2000)( TidyDocImpl* doc );
|
||||
|
||||
/* where appropriate move object elements from head to body */
|
||||
void TY_(BumpObject)( TidyDocImpl* doc, Node *html );
|
||||
|
||||
/* This is disabled due to http://tidy.sf.net/bug/681116 */
|
||||
#if 0
|
||||
void TY_(FixBrakes)( TidyDocImpl* pDoc, Node *pParent );
|
||||
#endif
|
||||
|
||||
void TY_(VerifyHTTPEquiv)( TidyDocImpl* pDoc, Node *pParent );
|
||||
|
||||
void TY_(DropComments)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(DropFontElements)(TidyDocImpl* doc, Node* node, Node **pnode);
|
||||
void TY_(WbrToSpace)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(DowngradeTypography)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(ReplacePreformattedSpaces)(TidyDocImpl* doc, Node* node);
|
||||
void TY_(NormalizeSpaces)(Lexer *lexer, Node *node);
|
||||
void TY_(ConvertCDATANodes)(TidyDocImpl* doc, Node* node);
|
||||
|
||||
void TY_(FixAnchors)(TidyDocImpl* doc, Node *node, Bool wantName, Bool wantId);
|
||||
void TY_(FixXhtmlNamespace)(TidyDocImpl* doc, Bool wantXmlns);
|
||||
void TY_(FixLanguageInformation)(TidyDocImpl* doc, Node* node, Bool wantXmlLang, Bool wantLang);
|
||||
|
||||
|
||||
#endif /* __CLEAN_H__ */
|
1746
src/config.c
Normal file
1746
src/config.c
Normal file
File diff suppressed because it is too large
Load diff
153
src/config.h
Normal file
153
src/config.h
Normal file
|
@ -0,0 +1,153 @@
|
|||
#ifndef __CONFIG_H__
|
||||
#define __CONFIG_H__
|
||||
|
||||
/* config.h -- read config file and manage config properties
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/12/29 16:31:08 $
|
||||
$Revision: 1.14 $
|
||||
|
||||
config files associate a property name with a value.
|
||||
|
||||
// comments can start at the beginning of a line
|
||||
# comments can start at the beginning of a line
|
||||
name: short values fit onto one line
|
||||
name: a really long value that
|
||||
continues on the next line
|
||||
|
||||
property names are case insensitive and should be less than
|
||||
60 characters in length and must start at the begining of
|
||||
the line, as whitespace at the start of a line signifies a
|
||||
line continuation.
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
#include "tidy.h"
|
||||
#include "streamio.h"
|
||||
|
||||
struct _tidy_option;
|
||||
typedef struct _tidy_option TidyOptionImpl;
|
||||
|
||||
typedef Bool (ParseProperty)( TidyDocImpl* doc, const TidyOptionImpl* opt );
|
||||
|
||||
struct _tidy_option
|
||||
{
|
||||
TidyOptionId id;
|
||||
TidyConfigCategory category; /* put 'em in groups */
|
||||
ctmbstr name; /* property name */
|
||||
TidyOptionType type; /* string, int or bool */
|
||||
ulong dflt; /* default for TidyInteger and TidyBoolean */
|
||||
ParseProperty* parser; /* parsing method, read-only if NULL */
|
||||
const ctmbstr* pickList; /* pick list */
|
||||
ctmbstr pdflt; /* default for TidyString */
|
||||
};
|
||||
|
||||
typedef union
|
||||
{
|
||||
ulong v; /* Value for TidyInteger and TidyBoolean */
|
||||
char *p; /* Value for TidyString */
|
||||
} TidyOptionValue;
|
||||
|
||||
typedef struct _tidy_config
|
||||
{
|
||||
TidyOptionValue value[ N_TIDY_OPTIONS + 1 ]; /* current config values */
|
||||
TidyOptionValue snapshot[ N_TIDY_OPTIONS + 1 ]; /* Snapshot of values to be restored later */
|
||||
|
||||
/* track what tags user has defined to eliminate unnecessary searches */
|
||||
uint defined_tags;
|
||||
|
||||
uint c; /* current char in input stream */
|
||||
StreamIn* cfgIn; /* current input source */
|
||||
|
||||
} TidyConfigImpl;
|
||||
|
||||
|
||||
typedef struct {
|
||||
TidyOptionId opt; /**< Identifier. */
|
||||
ctmbstr doc; /**< HTML text */
|
||||
TidyOptionId const *links; /**< Cross references.
|
||||
Last element must be 'TidyUnknownOption'. */
|
||||
} TidyOptionDoc;
|
||||
|
||||
|
||||
const TidyOptionImpl* TY_(lookupOption)( ctmbstr optnam );
|
||||
const TidyOptionImpl* TY_(getOption)( TidyOptionId optId );
|
||||
|
||||
TidyIterator TY_(getOptionList)( TidyDocImpl* doc );
|
||||
const TidyOptionImpl* TY_(getNextOption)( TidyDocImpl* doc, TidyIterator* iter );
|
||||
|
||||
TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option );
|
||||
ctmbstr TY_(getNextOptionPick)( const TidyOptionImpl* option, TidyIterator* iter );
|
||||
|
||||
const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId );
|
||||
|
||||
void TY_(InitConfig)( TidyDocImpl* doc );
|
||||
void TY_(FreeConfig)( TidyDocImpl* doc );
|
||||
|
||||
/* Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val ); */
|
||||
Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val );
|
||||
Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val );
|
||||
|
||||
Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
void TY_(ResetConfigToDefault)( TidyDocImpl* doc );
|
||||
void TY_(TakeConfigSnapshot)( TidyDocImpl* doc );
|
||||
void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom );
|
||||
|
||||
int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
|
||||
int TY_(ParseConfigFileEnc)( TidyDocImpl* doc,
|
||||
ctmbstr cfgfil, ctmbstr charenc );
|
||||
|
||||
int TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil );
|
||||
int TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink );
|
||||
|
||||
/* returns false if unknown option, missing parameter, or
|
||||
option doesn't use parameter
|
||||
*/
|
||||
Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optVal );
|
||||
Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optVal );
|
||||
|
||||
/* ensure that char encodings are self consistent */
|
||||
Bool TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding );
|
||||
|
||||
Bool TY_(ConfigDiffThanDefault)( TidyDocImpl* doc );
|
||||
Bool TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc );
|
||||
|
||||
int TY_(CharEncodingId)( TidyDocImpl* doc, ctmbstr charenc );
|
||||
ctmbstr TY_(CharEncodingName)( int encoding );
|
||||
ctmbstr TY_(CharEncodingOptName)( int encoding );
|
||||
|
||||
/* void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename ); */
|
||||
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
/* Debug lookup functions will be type-safe and assert option type match */
|
||||
ulong TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
Bool TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
TidyTriState TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId );
|
||||
|
||||
#define cfg(doc, id) TY_(_cfgGet)( (doc), (id) )
|
||||
#define cfgBool(doc, id) TY_(_cfgGetBool)( (doc), (id) )
|
||||
#define cfgAutoBool(doc, id) TY_(_cfgGetAutoBool)( (doc), (id) )
|
||||
#define cfgStr(doc, id) TY_(_cfgGetString)( (doc), (id) )
|
||||
|
||||
#else
|
||||
|
||||
/* Release build macros for speed */
|
||||
#define cfg(doc, id) ((doc)->config.value[ (id) ].v)
|
||||
#define cfgBool(doc, id) ((Bool) cfg(doc, id))
|
||||
#define cfgAutoBool(doc, id) ((TidyTriState) cfg(doc, id))
|
||||
#define cfgStr(doc, id) ((ctmbstr) (doc)->config.value[ (id) ].p)
|
||||
|
||||
#endif /* _DEBUG */
|
||||
|
||||
#endif /* __CONFIG_H__ */
|
419
src/entities.c
Normal file
419
src/entities.c
Normal file
|
@ -0,0 +1,419 @@
|
|||
/* entities.c -- recognize HTML ISO entities
|
||||
|
||||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: hoehrmann $
|
||||
$Date: 2008/08/09 11:55:27 $
|
||||
$Revision: 1.19 $
|
||||
|
||||
Entity handling can be static because there are no config or
|
||||
document-specific values. Lookup table is 100% defined at
|
||||
compile time.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "entities.h"
|
||||
#include "tidy-int.h"
|
||||
#include "tmbstr.h"
|
||||
|
||||
struct _entity;
|
||||
typedef struct _entity entity;
|
||||
|
||||
struct _entity
|
||||
{
|
||||
ctmbstr name;
|
||||
uint versions;
|
||||
uint code;
|
||||
};
|
||||
|
||||
|
||||
static const entity entities[] =
|
||||
{
|
||||
/*
|
||||
** Markup pre-defined character entities
|
||||
*/
|
||||
{ "quot", VERS_ALL|VERS_XML, 34 },
|
||||
{ "amp", VERS_ALL|VERS_XML, 38 },
|
||||
{ "apos", VERS_FROM40|VERS_XML, 39 },
|
||||
{ "lt", VERS_ALL|VERS_XML, 60 },
|
||||
{ "gt", VERS_ALL|VERS_XML, 62 },
|
||||
|
||||
/*
|
||||
** Latin-1 character entities
|
||||
*/
|
||||
{ "nbsp", VERS_ALL, 160 },
|
||||
{ "iexcl", VERS_ALL, 161 },
|
||||
{ "cent", VERS_ALL, 162 },
|
||||
{ "pound", VERS_ALL, 163 },
|
||||
{ "curren", VERS_ALL, 164 },
|
||||
{ "yen", VERS_ALL, 165 },
|
||||
{ "brvbar", VERS_ALL, 166 },
|
||||
{ "sect", VERS_ALL, 167 },
|
||||
{ "uml", VERS_ALL, 168 },
|
||||
{ "copy", VERS_ALL, 169 },
|
||||
{ "ordf", VERS_ALL, 170 },
|
||||
{ "laquo", VERS_ALL, 171 },
|
||||
{ "not", VERS_ALL, 172 },
|
||||
{ "shy", VERS_ALL, 173 },
|
||||
{ "reg", VERS_ALL, 174 },
|
||||
{ "macr", VERS_ALL, 175 },
|
||||
{ "deg", VERS_ALL, 176 },
|
||||
{ "plusmn", VERS_ALL, 177 },
|
||||
{ "sup2", VERS_ALL, 178 },
|
||||
{ "sup3", VERS_ALL, 179 },
|
||||
{ "acute", VERS_ALL, 180 },
|
||||
{ "micro", VERS_ALL, 181 },
|
||||
{ "para", VERS_ALL, 182 },
|
||||
{ "middot", VERS_ALL, 183 },
|
||||
{ "cedil", VERS_ALL, 184 },
|
||||
{ "sup1", VERS_ALL, 185 },
|
||||
{ "ordm", VERS_ALL, 186 },
|
||||
{ "raquo", VERS_ALL, 187 },
|
||||
{ "frac14", VERS_ALL, 188 },
|
||||
{ "frac12", VERS_ALL, 189 },
|
||||
{ "frac34", VERS_ALL, 190 },
|
||||
{ "iquest", VERS_ALL, 191 },
|
||||
{ "Agrave", VERS_ALL, 192 },
|
||||
{ "Aacute", VERS_ALL, 193 },
|
||||
{ "Acirc", VERS_ALL, 194 },
|
||||
{ "Atilde", VERS_ALL, 195 },
|
||||
{ "Auml", VERS_ALL, 196 },
|
||||
{ "Aring", VERS_ALL, 197 },
|
||||
{ "AElig", VERS_ALL, 198 },
|
||||
{ "Ccedil", VERS_ALL, 199 },
|
||||
{ "Egrave", VERS_ALL, 200 },
|
||||
{ "Eacute", VERS_ALL, 201 },
|
||||
{ "Ecirc", VERS_ALL, 202 },
|
||||
{ "Euml", VERS_ALL, 203 },
|
||||
{ "Igrave", VERS_ALL, 204 },
|
||||
{ "Iacute", VERS_ALL, 205 },
|
||||
{ "Icirc", VERS_ALL, 206 },
|
||||
{ "Iuml", VERS_ALL, 207 },
|
||||
{ "ETH", VERS_ALL, 208 },
|
||||
{ "Ntilde", VERS_ALL, 209 },
|
||||
{ "Ograve", VERS_ALL, 210 },
|
||||
{ "Oacute", VERS_ALL, 211 },
|
||||
{ "Ocirc", VERS_ALL, 212 },
|
||||
{ "Otilde", VERS_ALL, 213 },
|
||||
{ "Ouml", VERS_ALL, 214 },
|
||||
{ "times", VERS_ALL, 215 },
|
||||
{ "Oslash", VERS_ALL, 216 },
|
||||
{ "Ugrave", VERS_ALL, 217 },
|
||||
{ "Uacute", VERS_ALL, 218 },
|
||||
{ "Ucirc", VERS_ALL, 219 },
|
||||
{ "Uuml", VERS_ALL, 220 },
|
||||
{ "Yacute", VERS_ALL, 221 },
|
||||
{ "THORN", VERS_ALL, 222 },
|
||||
{ "szlig", VERS_ALL, 223 },
|
||||
{ "agrave", VERS_ALL, 224 },
|
||||
{ "aacute", VERS_ALL, 225 },
|
||||
{ "acirc", VERS_ALL, 226 },
|
||||
{ "atilde", VERS_ALL, 227 },
|
||||
{ "auml", VERS_ALL, 228 },
|
||||
{ "aring", VERS_ALL, 229 },
|
||||
{ "aelig", VERS_ALL, 230 },
|
||||
{ "ccedil", VERS_ALL, 231 },
|
||||
{ "egrave", VERS_ALL, 232 },
|
||||
{ "eacute", VERS_ALL, 233 },
|
||||
{ "ecirc", VERS_ALL, 234 },
|
||||
{ "euml", VERS_ALL, 235 },
|
||||
{ "igrave", VERS_ALL, 236 },
|
||||
{ "iacute", VERS_ALL, 237 },
|
||||
{ "icirc", VERS_ALL, 238 },
|
||||
{ "iuml", VERS_ALL, 239 },
|
||||
{ "eth", VERS_ALL, 240 },
|
||||
{ "ntilde", VERS_ALL, 241 },
|
||||
{ "ograve", VERS_ALL, 242 },
|
||||
{ "oacute", VERS_ALL, 243 },
|
||||
{ "ocirc", VERS_ALL, 244 },
|
||||
{ "otilde", VERS_ALL, 245 },
|
||||
{ "ouml", VERS_ALL, 246 },
|
||||
{ "divide", VERS_ALL, 247 },
|
||||
{ "oslash", VERS_ALL, 248 },
|
||||
{ "ugrave", VERS_ALL, 249 },
|
||||
{ "uacute", VERS_ALL, 250 },
|
||||
{ "ucirc", VERS_ALL, 251 },
|
||||
{ "uuml", VERS_ALL, 252 },
|
||||
{ "yacute", VERS_ALL, 253 },
|
||||
{ "thorn", VERS_ALL, 254 },
|
||||
{ "yuml", VERS_ALL, 255 },
|
||||
|
||||
/*
|
||||
** Extended Entities defined in HTML 4: Symbols
|
||||
*/
|
||||
{ "fnof", VERS_FROM40, 402 },
|
||||
{ "Alpha", VERS_FROM40, 913 },
|
||||
{ "Beta", VERS_FROM40, 914 },
|
||||
{ "Gamma", VERS_FROM40, 915 },
|
||||
{ "Delta", VERS_FROM40, 916 },
|
||||
{ "Epsilon", VERS_FROM40, 917 },
|
||||
{ "Zeta", VERS_FROM40, 918 },
|
||||
{ "Eta", VERS_FROM40, 919 },
|
||||
{ "Theta", VERS_FROM40, 920 },
|
||||
{ "Iota", VERS_FROM40, 921 },
|
||||
{ "Kappa", VERS_FROM40, 922 },
|
||||
{ "Lambda", VERS_FROM40, 923 },
|
||||
{ "Mu", VERS_FROM40, 924 },
|
||||
{ "Nu", VERS_FROM40, 925 },
|
||||
{ "Xi", VERS_FROM40, 926 },
|
||||
{ "Omicron", VERS_FROM40, 927 },
|
||||
{ "Pi", VERS_FROM40, 928 },
|
||||
{ "Rho", VERS_FROM40, 929 },
|
||||
{ "Sigma", VERS_FROM40, 931 },
|
||||
{ "Tau", VERS_FROM40, 932 },
|
||||
{ "Upsilon", VERS_FROM40, 933 },
|
||||
{ "Phi", VERS_FROM40, 934 },
|
||||
{ "Chi", VERS_FROM40, 935 },
|
||||
{ "Psi", VERS_FROM40, 936 },
|
||||
{ "Omega", VERS_FROM40, 937 },
|
||||
{ "alpha", VERS_FROM40, 945 },
|
||||
{ "beta", VERS_FROM40, 946 },
|
||||
{ "gamma", VERS_FROM40, 947 },
|
||||
{ "delta", VERS_FROM40, 948 },
|
||||
{ "epsilon", VERS_FROM40, 949 },
|
||||
{ "zeta", VERS_FROM40, 950 },
|
||||
{ "eta", VERS_FROM40, 951 },
|
||||
{ "theta", VERS_FROM40, 952 },
|
||||
{ "iota", VERS_FROM40, 953 },
|
||||
{ "kappa", VERS_FROM40, 954 },
|
||||
{ "lambda", VERS_FROM40, 955 },
|
||||
{ "mu", VERS_FROM40, 956 },
|
||||
{ "nu", VERS_FROM40, 957 },
|
||||
{ "xi", VERS_FROM40, 958 },
|
||||
{ "omicron", VERS_FROM40, 959 },
|
||||
{ "pi", VERS_FROM40, 960 },
|
||||
{ "rho", VERS_FROM40, 961 },
|
||||
{ "sigmaf", VERS_FROM40, 962 },
|
||||
{ "sigma", VERS_FROM40, 963 },
|
||||
{ "tau", VERS_FROM40, 964 },
|
||||
{ "upsilon", VERS_FROM40, 965 },
|
||||
{ "phi", VERS_FROM40, 966 },
|
||||
{ "chi", VERS_FROM40, 967 },
|
||||
{ "psi", VERS_FROM40, 968 },
|
||||
{ "omega", VERS_FROM40, 969 },
|
||||
{ "thetasym", VERS_FROM40, 977 },
|
||||
{ "upsih", VERS_FROM40, 978 },
|
||||
{ "piv", VERS_FROM40, 982 },
|
||||
{ "bull", VERS_FROM40, 8226 },
|
||||
{ "hellip", VERS_FROM40, 8230 },
|
||||
{ "prime", VERS_FROM40, 8242 },
|
||||
{ "Prime", VERS_FROM40, 8243 },
|
||||
{ "oline", VERS_FROM40, 8254 },
|
||||
{ "frasl", VERS_FROM40, 8260 },
|
||||
{ "weierp", VERS_FROM40, 8472 },
|
||||
{ "image", VERS_FROM40, 8465 },
|
||||
{ "real", VERS_FROM40, 8476 },
|
||||
{ "trade", VERS_FROM40, 8482 },
|
||||
{ "alefsym", VERS_FROM40, 8501 },
|
||||
{ "larr", VERS_FROM40, 8592 },
|
||||
{ "uarr", VERS_FROM40, 8593 },
|
||||
{ "rarr", VERS_FROM40, 8594 },
|
||||
{ "darr", VERS_FROM40, 8595 },
|
||||
{ "harr", VERS_FROM40, 8596 },
|
||||
{ "crarr", VERS_FROM40, 8629 },
|
||||
{ "lArr", VERS_FROM40, 8656 },
|
||||
{ "uArr", VERS_FROM40, 8657 },
|
||||
{ "rArr", VERS_FROM40, 8658 },
|
||||
{ "dArr", VERS_FROM40, 8659 },
|
||||
{ "hArr", VERS_FROM40, 8660 },
|
||||
{ "forall", VERS_FROM40, 8704 },
|
||||
{ "part", VERS_FROM40, 8706 },
|
||||
{ "exist", VERS_FROM40, 8707 },
|
||||
{ "empty", VERS_FROM40, 8709 },
|
||||
{ "nabla", VERS_FROM40, 8711 },
|
||||
{ "isin", VERS_FROM40, 8712 },
|
||||
{ "notin", VERS_FROM40, 8713 },
|
||||
{ "ni", VERS_FROM40, 8715 },
|
||||
{ "prod", VERS_FROM40, 8719 },
|
||||
{ "sum", VERS_FROM40, 8721 },
|
||||
{ "minus", VERS_FROM40, 8722 },
|
||||
{ "lowast", VERS_FROM40, 8727 },
|
||||
{ "radic", VERS_FROM40, 8730 },
|
||||
{ "prop", VERS_FROM40, 8733 },
|
||||
{ "infin", VERS_FROM40, 8734 },
|
||||
{ "ang", VERS_FROM40, 8736 },
|
||||
{ "and", VERS_FROM40, 8743 },
|
||||
{ "or", VERS_FROM40, 8744 },
|
||||
{ "cap", VERS_FROM40, 8745 },
|
||||
{ "cup", VERS_FROM40, 8746 },
|
||||
{ "int", VERS_FROM40, 8747 },
|
||||
{ "there4", VERS_FROM40, 8756 },
|
||||
{ "sim", VERS_FROM40, 8764 },
|
||||
{ "cong", VERS_FROM40, 8773 },
|
||||
{ "asymp", VERS_FROM40, 8776 },
|
||||
{ "ne", VERS_FROM40, 8800 },
|
||||
{ "equiv", VERS_FROM40, 8801 },
|
||||
{ "le", VERS_FROM40, 8804 },
|
||||
{ "ge", VERS_FROM40, 8805 },
|
||||
{ "sub", VERS_FROM40, 8834 },
|
||||
{ "sup", VERS_FROM40, 8835 },
|
||||
{ "nsub", VERS_FROM40, 8836 },
|
||||
{ "sube", VERS_FROM40, 8838 },
|
||||
{ "supe", VERS_FROM40, 8839 },
|
||||
{ "oplus", VERS_FROM40, 8853 },
|
||||
{ "otimes", VERS_FROM40, 8855 },
|
||||
{ "perp", VERS_FROM40, 8869 },
|
||||
{ "sdot", VERS_FROM40, 8901 },
|
||||
{ "lceil", VERS_FROM40, 8968 },
|
||||
{ "rceil", VERS_FROM40, 8969 },
|
||||
{ "lfloor", VERS_FROM40, 8970 },
|
||||
{ "rfloor", VERS_FROM40, 8971 },
|
||||
{ "lang", VERS_FROM40, 9001 },
|
||||
{ "rang", VERS_FROM40, 9002 },
|
||||
{ "loz", VERS_FROM40, 9674 },
|
||||
{ "spades", VERS_FROM40, 9824 },
|
||||
{ "clubs", VERS_FROM40, 9827 },
|
||||
{ "hearts", VERS_FROM40, 9829 },
|
||||
{ "diams", VERS_FROM40, 9830 },
|
||||
|
||||
/*
|
||||
** Extended Entities defined in HTML 4: Special (less Markup at top)
|
||||
*/
|
||||
{ "OElig", VERS_FROM40, 338 },
|
||||
{ "oelig", VERS_FROM40, 339 },
|
||||
{ "Scaron", VERS_FROM40, 352 },
|
||||
{ "scaron", VERS_FROM40, 353 },
|
||||
{ "Yuml", VERS_FROM40, 376 },
|
||||
{ "circ", VERS_FROM40, 710 },
|
||||
{ "tilde", VERS_FROM40, 732 },
|
||||
{ "ensp", VERS_FROM40, 8194 },
|
||||
{ "emsp", VERS_FROM40, 8195 },
|
||||
{ "thinsp", VERS_FROM40, 8201 },
|
||||
{ "zwnj", VERS_FROM40, 8204 },
|
||||
{ "zwj", VERS_FROM40, 8205 },
|
||||
{ "lrm", VERS_FROM40, 8206 },
|
||||
{ "rlm", VERS_FROM40, 8207 },
|
||||
{ "ndash", VERS_FROM40, 8211 },
|
||||
{ "mdash", VERS_FROM40, 8212 },
|
||||
{ "lsquo", VERS_FROM40, 8216 },
|
||||
{ "rsquo", VERS_FROM40, 8217 },
|
||||
{ "sbquo", VERS_FROM40, 8218 },
|
||||
{ "ldquo", VERS_FROM40, 8220 },
|
||||
{ "rdquo", VERS_FROM40, 8221 },
|
||||
{ "bdquo", VERS_FROM40, 8222 },
|
||||
{ "dagger", VERS_FROM40, 8224 },
|
||||
{ "Dagger", VERS_FROM40, 8225 },
|
||||
{ "permil", VERS_FROM40, 8240 },
|
||||
{ "lsaquo", VERS_FROM40, 8249 },
|
||||
{ "rsaquo", VERS_FROM40, 8250 },
|
||||
{ "euro", VERS_FROM40, 8364 },
|
||||
{ NULL, VERS_UNKNOWN, 0 }
|
||||
};
|
||||
|
||||
|
||||
/* Pure static implementation. Trades off lookup speed
|
||||
** for faster setup time (well, none actually).
|
||||
** Optimization of comparing 1st character buys enough
|
||||
** speed that hash doesn't improve things without > 500
|
||||
** items in list.
|
||||
*/
|
||||
static const entity* entitiesLookup( ctmbstr s )
|
||||
{
|
||||
tmbchar ch = (tmbchar)( s ? *s : 0 );
|
||||
const entity *np;
|
||||
for ( np = entities; ch && np && np->name; ++np )
|
||||
if ( ch == *np->name && TY_(tmbstrcmp)(s, np->name) == 0 )
|
||||
return np;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* entity starting with "&" returns zero on error */
|
||||
uint EntityCode( ctmbstr name, uint versions )
|
||||
{
|
||||
const entity* np;
|
||||
assert( name && name[0] == '&' );
|
||||
|
||||
/* numeric entitity: name = "&#" followed by number */
|
||||
if ( name[1] == '#' )
|
||||
{
|
||||
uint c = 0; /* zero on missing/bad number */
|
||||
Bool isXml = ( (versions & VERS_XML) == VERS_XML );
|
||||
|
||||
/* 'x' prefix denotes hexadecimal number format */
|
||||
if ( name[2] == 'x' || (!isXml && name[2] == 'X') )
|
||||
sscanf( name+3, "%x", &c );
|
||||
else
|
||||
sscanf( name+2, "%u", &c );
|
||||
|
||||
return (uint) c;
|
||||
}
|
||||
|
||||
/* Named entity: name ="&" followed by a name */
|
||||
if ( NULL != (np = entitiesLookup(name+1)) )
|
||||
{
|
||||
/* Only recognize entity name if version supports it. */
|
||||
if ( np->versions & versions )
|
||||
return np->code;
|
||||
}
|
||||
|
||||
return 0; /* zero signifies unknown entity name */
|
||||
}
|
||||
#endif
|
||||
|
||||
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions )
|
||||
{
|
||||
const entity* np;
|
||||
assert( name && name[0] == '&' );
|
||||
assert( code != NULL );
|
||||
assert( versions != NULL );
|
||||
|
||||
/* numeric entitity: name = "&#" followed by number */
|
||||
if ( name[1] == '#' )
|
||||
{
|
||||
uint c = 0; /* zero on missing/bad number */
|
||||
|
||||
/* 'x' prefix denotes hexadecimal number format */
|
||||
if ( name[2] == 'x' || (!isXml && name[2] == 'X') )
|
||||
sscanf( name+3, "%x", &c );
|
||||
else
|
||||
sscanf( name+2, "%u", &c );
|
||||
|
||||
*code = c;
|
||||
*versions = VERS_ALL;
|
||||
return yes;
|
||||
}
|
||||
|
||||
/* Named entity: name ="&" followed by a name */
|
||||
if ( NULL != (np = entitiesLookup(name+1)) )
|
||||
{
|
||||
*code = np->code;
|
||||
*versions = np->versions;
|
||||
return yes;
|
||||
}
|
||||
|
||||
*code = 0;
|
||||
*versions = ( isXml ? VERS_XML : VERS_PROPRIETARY );
|
||||
return no;
|
||||
}
|
||||
|
||||
|
||||
ctmbstr TY_(EntityName)( uint ch, uint versions )
|
||||
{
|
||||
ctmbstr entnam = NULL;
|
||||
const entity *ep;
|
||||
|
||||
for ( ep = entities; ep->name != NULL; ++ep )
|
||||
{
|
||||
if ( ep->code == ch )
|
||||
{
|
||||
if ( ep->versions & versions )
|
||||
entnam = ep->name;
|
||||
break; /* Found code. Stop search. */
|
||||
}
|
||||
}
|
||||
return entnam;
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
24
src/entities.h
Normal file
24
src/entities.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef __ENTITIES_H__
|
||||
#define __ENTITIES_H__
|
||||
|
||||
/* entities.h -- recognize character entities
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.8 $
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
|
||||
/* entity starting with "&" returns zero on error */
|
||||
/* uint EntityCode( ctmbstr name, uint versions ); */
|
||||
ctmbstr TY_(EntityName)( uint charCode, uint versions );
|
||||
Bool TY_(EntityInfo)( ctmbstr name, Bool isXml, uint* code, uint* versions );
|
||||
|
||||
#endif /* __ENTITIES_H__ */
|
106
src/fileio.c
Normal file
106
src/fileio.c
Normal file
|
@ -0,0 +1,106 @@
|
|||
/* fileio.c -- does standard I/O
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/05/30 16:47:31 $
|
||||
$Revision: 1.17 $
|
||||
|
||||
Default implementations of Tidy input sources
|
||||
and output sinks based on standard C FILE*.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "forward.h"
|
||||
#include "fileio.h"
|
||||
#include "tidy.h"
|
||||
|
||||
typedef struct _fp_input_source
|
||||
{
|
||||
FILE* fp;
|
||||
TidyBuffer unget;
|
||||
} FileSource;
|
||||
|
||||
static int TIDY_CALL filesrc_getByte( void* sourceData )
|
||||
{
|
||||
FileSource* fin = (FileSource*) sourceData;
|
||||
int bv;
|
||||
if ( fin->unget.size > 0 )
|
||||
bv = tidyBufPopByte( &fin->unget );
|
||||
else
|
||||
bv = fgetc( fin->fp );
|
||||
return bv;
|
||||
}
|
||||
|
||||
static Bool TIDY_CALL filesrc_eof( void* sourceData )
|
||||
{
|
||||
FileSource* fin = (FileSource*) sourceData;
|
||||
Bool isEOF = ( fin->unget.size == 0 );
|
||||
if ( isEOF )
|
||||
isEOF = feof( fin->fp ) != 0;
|
||||
return isEOF;
|
||||
}
|
||||
|
||||
static void TIDY_CALL filesrc_ungetByte( void* sourceData, byte bv )
|
||||
{
|
||||
FileSource* fin = (FileSource*) sourceData;
|
||||
tidyBufPutByte( &fin->unget, bv );
|
||||
}
|
||||
|
||||
#if SUPPORT_POSIX_MAPPED_FILES
|
||||
#define initFileSource initStdIOFileSource
|
||||
#define freeFileSource freeStdIOFileSource
|
||||
#endif
|
||||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
|
||||
{
|
||||
FileSource* fin = NULL;
|
||||
|
||||
fin = (FileSource*) TidyAlloc( allocator, sizeof(FileSource) );
|
||||
if ( !fin )
|
||||
return -1;
|
||||
TidyClearMemory( fin, sizeof(FileSource) );
|
||||
fin->unget.allocator = allocator;
|
||||
fin->fp = fp;
|
||||
|
||||
inp->getByte = filesrc_getByte;
|
||||
inp->eof = filesrc_eof;
|
||||
inp->ungetByte = filesrc_ungetByte;
|
||||
inp->sourceData = fin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
|
||||
{
|
||||
FileSource* fin = (FileSource*) inp->sourceData;
|
||||
if ( closeIt && fin && fin->fp )
|
||||
fclose( fin->fp );
|
||||
tidyBufFree( &fin->unget );
|
||||
TidyFree( fin->unget.allocator, fin );
|
||||
}
|
||||
|
||||
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv )
|
||||
{
|
||||
FILE* fout = (FILE*) sinkData;
|
||||
fputc( bv, fout );
|
||||
}
|
||||
|
||||
void TY_(initFileSink)( TidyOutputSink* outp, FILE* fp )
|
||||
{
|
||||
outp->putByte = TY_(filesink_putByte);
|
||||
outp->sinkData = fp;
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
46
src/fileio.h
Normal file
46
src/fileio.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
#ifndef __FILEIO_H__
|
||||
#define __FILEIO_H__
|
||||
|
||||
/** @file fileio.h - does standard C I/O
|
||||
|
||||
Implementation of a FILE* based TidyInputSource and
|
||||
TidyOutputSink.
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info:
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/05/30 16:47:31 $
|
||||
$Revision: 1.8 $
|
||||
*/
|
||||
|
||||
#include "buffio.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Allocate and initialize file input source */
|
||||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
|
||||
|
||||
/** Free file input source */
|
||||
void TY_(freeFileSource)( TidyInputSource* source, Bool closeIt );
|
||||
|
||||
#if SUPPORT_POSIX_MAPPED_FILES
|
||||
/** Allocate and initialize file input source using Standard C I/O */
|
||||
int TY_(initStdIOFileSource)( TidyAllocator *allocator, TidyInputSource* source, FILE* fp );
|
||||
|
||||
/** Free file input source using Standard C I/O */
|
||||
void TY_(freeStdIOFileSource)( TidyInputSource* source, Bool closeIt );
|
||||
#endif
|
||||
|
||||
/** Initialize file output sink */
|
||||
void TY_(initFileSink)( TidyOutputSink* sink, FILE* fp );
|
||||
|
||||
/* Needed for internal declarations */
|
||||
void TIDY_CALL TY_(filesink_putByte)( void* sinkData, byte bv );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* __FILEIO_H__ */
|
69
src/forward.h
Normal file
69
src/forward.h
Normal file
|
@ -0,0 +1,69 @@
|
|||
#ifndef __FORWARD_H__
|
||||
#define __FORWARD_H__
|
||||
|
||||
/* forward.h -- Forward declarations for major Tidy structures
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/02/11 09:45:52 $
|
||||
$Revision: 1.7 $
|
||||
|
||||
Avoids many include file circular dependencies.
|
||||
|
||||
Try to keep this file down to the minimum to avoid
|
||||
cross-talk between modules.
|
||||
|
||||
Header files include this file. C files include tidy-int.h.
|
||||
|
||||
*/
|
||||
|
||||
#include "platform.h"
|
||||
#include "tidy.h"
|
||||
|
||||
/* Internal symbols are prefixed to avoid clashes with other libraries */
|
||||
#define TYDYAPPEND(str1,str2) str1##str2
|
||||
#define TY_(str) TYDYAPPEND(prvTidy,str)
|
||||
|
||||
struct _StreamIn;
|
||||
typedef struct _StreamIn StreamIn;
|
||||
|
||||
struct _StreamOut;
|
||||
typedef struct _StreamOut StreamOut;
|
||||
|
||||
struct _TidyDocImpl;
|
||||
typedef struct _TidyDocImpl TidyDocImpl;
|
||||
|
||||
|
||||
struct _Dict;
|
||||
typedef struct _Dict Dict;
|
||||
|
||||
struct _Attribute;
|
||||
typedef struct _Attribute Attribute;
|
||||
|
||||
struct _AttVal;
|
||||
typedef struct _AttVal AttVal;
|
||||
|
||||
struct _Node;
|
||||
typedef struct _Node Node;
|
||||
|
||||
struct _IStack;
|
||||
typedef struct _IStack IStack;
|
||||
|
||||
struct _Lexer;
|
||||
typedef struct _Lexer Lexer;
|
||||
|
||||
extern TidyAllocator TY_(g_default_allocator);
|
||||
|
||||
/** Wrappers for easy memory allocation using an allocator */
|
||||
#define TidyAlloc(allocator, size) ((allocator)->vtbl->alloc((allocator), (size)))
|
||||
#define TidyRealloc(allocator, block, size) ((allocator)->vtbl->realloc((allocator), (block), (size)))
|
||||
#define TidyFree(allocator, block) ((allocator)->vtbl->free((allocator), (block)))
|
||||
#define TidyPanic(allocator, msg) ((allocator)->vtbl->panic((allocator), (msg)))
|
||||
#define TidyClearMemory(block, size) memset((block), 0, (size))
|
||||
|
||||
|
||||
#endif /* __FORWARD_H__ */
|
105
src/iconvtc.c
Normal file
105
src/iconvtc.c
Normal file
|
@ -0,0 +1,105 @@
|
|||
/* iconvtc.c -- Interface to iconv transcoding routines
|
||||
|
||||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: iconvtc.c,v 1.2 2008/08/09 11:55:27 hoehrmann Exp $
|
||||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
#include "forward.h"
|
||||
#include "streamio.h"
|
||||
|
||||
#ifdef TIDY_ICONV_SUPPORT
|
||||
|
||||
#include <iconv.h>
|
||||
|
||||
/* maximum number of bytes for a single character */
|
||||
#define TC_INBUFSIZE 16
|
||||
|
||||
/* maximum number of characters per byte sequence */
|
||||
#define TC_OUTBUFSIZE 16
|
||||
|
||||
Bool IconvInitInputTranscoder(void)
|
||||
{
|
||||
return no;
|
||||
}
|
||||
|
||||
void IconvUninitInputTranscoder(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
|
||||
{
|
||||
iconv_t cd;
|
||||
TidyInputSource * source;
|
||||
char inbuf[TC_INBUFSIZE] = { 0 };
|
||||
char outbuf[TC_OUTBUFSIZE] = { 0 };
|
||||
size_t inbufsize = 0;
|
||||
|
||||
assert( in != NULL );
|
||||
assert( &in->source != NULL );
|
||||
assert( bytesRead != NULL );
|
||||
assert( in->iconvptr != 0 );
|
||||
|
||||
cd = (iconv_t)in->iconvptr;
|
||||
source = &in->source;
|
||||
|
||||
inbuf[inbufsize++] = (char)firstByte;
|
||||
|
||||
while(inbufsize < TC_INBUFSIZE)
|
||||
{
|
||||
char * outbufptr = (char*)outbuf;
|
||||
char * inbufptr = (char*)inbuf;
|
||||
size_t readNow = inbufsize;
|
||||
size_t writeNow = TC_OUTBUFSIZE;
|
||||
size_t result = 0;
|
||||
int iconv_errno = 0;
|
||||
int nextByte = EndOfStream;
|
||||
|
||||
result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
|
||||
iconv_errno = errno;
|
||||
|
||||
if (result != (size_t)(-1))
|
||||
{
|
||||
int c;
|
||||
|
||||
/* create codepoint from UTF-32LE octets */
|
||||
c = (unsigned char)outbuf[0];
|
||||
c += (unsigned char)outbuf[1] << 8;
|
||||
c += (unsigned char)outbuf[2] << 16;
|
||||
c += (unsigned char)outbuf[3] << 32;
|
||||
|
||||
/* set number of read bytes */
|
||||
*bytesRead = inbufsize;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
|
||||
assert( iconv_errno != E2BIG ); /* not enough memory */
|
||||
assert( iconv_errno == EINVAL ); /* incomplete sequence */
|
||||
|
||||
/* we need more bytes */
|
||||
nextByte = source->getByte(source->sourceData);
|
||||
|
||||
if (nextByte == EndOfStream)
|
||||
{
|
||||
/* todo: error message for broken stream? */
|
||||
|
||||
*bytesRead = inbufsize;
|
||||
return EndOfStream;
|
||||
}
|
||||
|
||||
inbuf[inbufsize++] = (char)nextByte;
|
||||
}
|
||||
|
||||
/* No full character found after reading TC_INBUFSIZE bytes, */
|
||||
/* give up to read this stream, it's obviously unreadable. */
|
||||
|
||||
/* todo: error message for broken stream? */
|
||||
return EndOfStream;
|
||||
}
|
||||
|
||||
#endif /* TIDY_ICONV_SUPPORT */
|
15
src/iconvtc.h
Normal file
15
src/iconvtc.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef __ICONVTC_H__
|
||||
#define __ICONVTC_H__
|
||||
#ifdef TIDY_ICONV_SUPPORT
|
||||
|
||||
/* iconvtc.h -- Interface to iconv transcoding routines
|
||||
|
||||
(c) 1998-2003 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: iconvtc.h,v 1.1 2003/04/28 22:59:41 hoehrmann Exp $
|
||||
*/
|
||||
|
||||
|
||||
#endif /* TIDY_ICONV_SUPPORT */
|
||||
#endif /* __ICONVTC_H__ */
|
373
src/istack.c
Normal file
373
src/istack.c
Normal file
|
@ -0,0 +1,373 @@
|
|||
/* istack.c -- inline stack for compatibility with Mosaic
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/12/29 16:31:08 $
|
||||
$Revision: 1.21 $
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy-int.h"
|
||||
#include "lexer.h"
|
||||
#include "attrs.h"
|
||||
#include "streamio.h"
|
||||
#include "tmbstr.h"
|
||||
|
||||
/* duplicate attributes */
|
||||
AttVal *TY_(DupAttrs)( TidyDocImpl* doc, AttVal *attrs)
|
||||
{
|
||||
AttVal *newattrs;
|
||||
|
||||
if (attrs == NULL)
|
||||
return attrs;
|
||||
|
||||
newattrs = TY_(NewAttribute)(doc);
|
||||
*newattrs = *attrs;
|
||||
newattrs->next = TY_(DupAttrs)( doc, attrs->next );
|
||||
newattrs->attribute = TY_(tmbstrdup)(doc->allocator, attrs->attribute);
|
||||
newattrs->value = TY_(tmbstrdup)(doc->allocator, attrs->value);
|
||||
newattrs->dict = TY_(FindAttribute)(doc, newattrs);
|
||||
newattrs->asp = attrs->asp ? TY_(CloneNode)(doc, attrs->asp) : NULL;
|
||||
newattrs->php = attrs->php ? TY_(CloneNode)(doc, attrs->php) : NULL;
|
||||
return newattrs;
|
||||
}
|
||||
|
||||
static Bool IsNodePushable( Node *node )
|
||||
{
|
||||
if (node->tag == NULL)
|
||||
return no;
|
||||
|
||||
if (!(node->tag->model & CM_INLINE))
|
||||
return no;
|
||||
|
||||
if (node->tag->model & CM_OBJECT)
|
||||
return no;
|
||||
|
||||
return yes;
|
||||
}
|
||||
|
||||
/*
|
||||
push a copy of an inline node onto stack
|
||||
but don't push if implicit or OBJECT or APPLET
|
||||
(implicit tags are ones generated from the istack)
|
||||
|
||||
One issue arises with pushing inlines when
|
||||
the tag is already pushed. For instance:
|
||||
|
||||
<p><em>text
|
||||
<p><em>more text
|
||||
|
||||
Shouldn't be mapped to
|
||||
|
||||
<p><em>text</em></p>
|
||||
<p><em><em>more text</em></em>
|
||||
*/
|
||||
void TY_(PushInline)( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
IStack *istack;
|
||||
|
||||
if (node->implicit)
|
||||
return;
|
||||
|
||||
if ( !IsNodePushable(node) )
|
||||
return;
|
||||
|
||||
if ( !nodeIsFONT(node) && TY_(IsPushed)(doc, node) )
|
||||
return;
|
||||
|
||||
/* make sure there is enough space for the stack */
|
||||
if (lexer->istacksize + 1 > lexer->istacklength)
|
||||
{
|
||||
if (lexer->istacklength == 0)
|
||||
lexer->istacklength = 6; /* this is perhaps excessive */
|
||||
|
||||
lexer->istacklength = lexer->istacklength * 2;
|
||||
lexer->istack = (IStack *)TidyDocRealloc(doc, lexer->istack,
|
||||
sizeof(IStack)*(lexer->istacklength));
|
||||
}
|
||||
|
||||
istack = &(lexer->istack[lexer->istacksize]);
|
||||
istack->tag = node->tag;
|
||||
|
||||
istack->element = TY_(tmbstrdup)(doc->allocator, node->element);
|
||||
istack->attributes = TY_(DupAttrs)( doc, node->attributes );
|
||||
++(lexer->istacksize);
|
||||
}
|
||||
|
||||
static void PopIStack( TidyDocImpl* doc )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
IStack *istack;
|
||||
AttVal *av;
|
||||
|
||||
--(lexer->istacksize);
|
||||
istack = &(lexer->istack[lexer->istacksize]);
|
||||
|
||||
while (istack->attributes)
|
||||
{
|
||||
av = istack->attributes;
|
||||
istack->attributes = av->next;
|
||||
TY_(FreeAttribute)( doc, av );
|
||||
}
|
||||
TidyDocFree(doc, istack->element);
|
||||
}
|
||||
|
||||
static void PopIStackUntil( TidyDocImpl* doc, TidyTagId tid )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
IStack *istack;
|
||||
|
||||
while (lexer->istacksize > 0)
|
||||
{
|
||||
PopIStack( doc );
|
||||
istack = &(lexer->istack[lexer->istacksize]);
|
||||
if ( istack->tag->id == tid )
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* pop inline stack */
|
||||
void TY_(PopInline)( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
|
||||
if (node)
|
||||
{
|
||||
if ( !IsNodePushable(node) )
|
||||
return;
|
||||
|
||||
/* if node is </a> then pop until we find an <a> */
|
||||
if ( nodeIsA(node) )
|
||||
{
|
||||
PopIStackUntil( doc, TidyTag_A );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (lexer->istacksize > 0)
|
||||
{
|
||||
PopIStack( doc );
|
||||
|
||||
/* #427822 - fix by Randy Waki 7 Aug 00 */
|
||||
if (lexer->insert >= lexer->istack + lexer->istacksize)
|
||||
lexer->insert = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Bool TY_(IsPushed)( TidyDocImpl* doc, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
int i;
|
||||
|
||||
for (i = lexer->istacksize - 1; i >= 0; --i)
|
||||
{
|
||||
if (lexer->istack[i].tag == node->tag)
|
||||
return yes;
|
||||
}
|
||||
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
Test whether the last element on the stack has the same type than "node".
|
||||
*/
|
||||
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
|
||||
if ( element && !IsNodePushable(element) )
|
||||
return no;
|
||||
|
||||
if (lexer->istacksize > 0) {
|
||||
if (lexer->istack[lexer->istacksize - 1].tag == node->tag) {
|
||||
return yes;
|
||||
}
|
||||
}
|
||||
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
This has the effect of inserting "missing" inline
|
||||
elements around the contents of blocklevel elements
|
||||
such as P, TD, TH, DIV, PRE etc. This procedure is
|
||||
called at the start of ParseBlock. when the inline
|
||||
stack is not empty, as will be the case in:
|
||||
|
||||
<i><h1>italic heading</h1></i>
|
||||
|
||||
which is then treated as equivalent to
|
||||
|
||||
<h1><i>italic heading</i></h1>
|
||||
|
||||
This is implemented by setting the lexer into a mode
|
||||
where it gets tokens from the inline stack rather than
|
||||
from the input stream.
|
||||
*/
|
||||
int TY_(InlineDup)( TidyDocImpl* doc, Node* node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
int n;
|
||||
|
||||
if ((n = lexer->istacksize - lexer->istackbase) > 0)
|
||||
{
|
||||
lexer->insert = &(lexer->istack[lexer->istackbase]);
|
||||
lexer->inode = node;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
defer duplicates when entering a table or other
|
||||
element where the inlines shouldn't be duplicated
|
||||
*/
|
||||
void TY_(DeferDup)( TidyDocImpl* doc )
|
||||
{
|
||||
doc->lexer->insert = NULL;
|
||||
doc->lexer->inode = NULL;
|
||||
}
|
||||
|
||||
Node *TY_(InsertedToken)( TidyDocImpl* doc )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
Node *node;
|
||||
IStack *istack;
|
||||
uint n;
|
||||
|
||||
/* this will only be NULL if inode != NULL */
|
||||
if (lexer->insert == NULL)
|
||||
{
|
||||
node = lexer->inode;
|
||||
lexer->inode = NULL;
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
If this is the "latest" node then update
|
||||
the position, otherwise use current values
|
||||
*/
|
||||
|
||||
if (lexer->inode == NULL)
|
||||
{
|
||||
lexer->lines = doc->docIn->curline;
|
||||
lexer->columns = doc->docIn->curcol;
|
||||
}
|
||||
|
||||
node = TY_(NewNode)(doc->allocator, lexer);
|
||||
node->type = StartTag;
|
||||
node->implicit = yes;
|
||||
node->start = lexer->txtstart;
|
||||
/* #431734 [JTidy bug #226261 (was 126261)] - fix by Gary Peskin 20 Dec 00 */
|
||||
node->end = lexer->txtend; /* was : lexer->txtstart; */
|
||||
istack = lexer->insert;
|
||||
|
||||
#if 0 && defined(_DEBUG)
|
||||
if ( lexer->istacksize == 0 )
|
||||
fprintf( stderr, "0-size istack!\n" );
|
||||
#endif
|
||||
|
||||
node->element = TY_(tmbstrdup)(doc->allocator, istack->element);
|
||||
node->tag = istack->tag;
|
||||
node->attributes = TY_(DupAttrs)( doc, istack->attributes );
|
||||
|
||||
/* advance lexer to next item on the stack */
|
||||
n = (uint)(lexer->insert - &(lexer->istack[0]));
|
||||
|
||||
/* and recover state if we have reached the end */
|
||||
if (++n < lexer->istacksize)
|
||||
lexer->insert = &(lexer->istack[n]);
|
||||
else
|
||||
lexer->insert = NULL;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
We have two CM_INLINE elements pushed ... the first is closing,
|
||||
but, like the browser, the second should be retained ...
|
||||
Like <b>bold <i>bold and italics</b> italics only</i>
|
||||
This function switches the tag positions on the stack,
|
||||
returning 'yes' if both were found in the expected order.
|
||||
*/
|
||||
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
if ( lexer
|
||||
&& element && element->tag
|
||||
&& node && node->tag
|
||||
&& TY_(IsPushed)( doc, element )
|
||||
&& TY_(IsPushed)( doc, node )
|
||||
&& ((lexer->istacksize - lexer->istackbase) >= 2) )
|
||||
{
|
||||
/* we have a chance of succeeding ... */
|
||||
int i;
|
||||
for (i = (lexer->istacksize - lexer->istackbase - 1); i >= 0; --i)
|
||||
{
|
||||
if (lexer->istack[i].tag == element->tag) {
|
||||
/* found the element tag - phew */
|
||||
IStack *istack1 = &lexer->istack[i];
|
||||
IStack *istack2 = NULL;
|
||||
--i; /* back one more, and continue */
|
||||
for ( ; i >= 0; --i)
|
||||
{
|
||||
if (lexer->istack[i].tag == node->tag)
|
||||
{
|
||||
/* found the element tag - phew */
|
||||
istack2 = &lexer->istack[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( istack2 )
|
||||
{
|
||||
/* perform the swap */
|
||||
IStack tmp_istack = *istack2;
|
||||
*istack2 = *istack1;
|
||||
*istack1 = tmp_istack;
|
||||
return yes;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
We want to push a specific a specific element on the stack,
|
||||
but it may not be the last element, which InlineDup()
|
||||
would handle. Return yes, if found and inserted.
|
||||
*/
|
||||
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element )
|
||||
{
|
||||
Lexer* lexer = doc->lexer;
|
||||
int n, i;
|
||||
if ( element
|
||||
&& (element->tag != NULL)
|
||||
&& ((n = lexer->istacksize - lexer->istackbase) > 0) )
|
||||
{
|
||||
for ( i = n - 1; i >=0; --i ) {
|
||||
if (lexer->istack[i].tag == element->tag) {
|
||||
/* found our element tag - insert it */
|
||||
lexer->insert = &(lexer->istack[i]);
|
||||
lexer->inode = node;
|
||||
return yes;
|
||||
}
|
||||
}
|
||||
}
|
||||
return no;
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
3832
src/lexer.c
Normal file
3832
src/lexer.c
Normal file
File diff suppressed because it is too large
Load diff
621
src/lexer.h
Normal file
621
src/lexer.h
Normal file
|
@ -0,0 +1,621 @@
|
|||
#ifndef __LEXER_H__
|
||||
#define __LEXER_H__
|
||||
|
||||
/* lexer.h -- Lexer for html parser
|
||||
|
||||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info:
|
||||
$Author: arnaud02 $
|
||||
$Date: 2008/03/22 21:06:11 $
|
||||
$Revision: 1.41 $
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
Given an input source, it returns a sequence of tokens.
|
||||
|
||||
GetToken(source) gets the next token
|
||||
UngetToken(source) provides one level undo
|
||||
|
||||
The tags include an attribute list:
|
||||
|
||||
- linked list of attribute/value nodes
|
||||
- each node has 2 NULL-terminated strings.
|
||||
- entities are replaced in attribute values
|
||||
|
||||
white space is compacted if not in preformatted mode
|
||||
If not in preformatted mode then leading white space
|
||||
is discarded and subsequent white space sequences
|
||||
compacted to single space characters.
|
||||
|
||||
If XmlTags is no then Tag names are folded to upper
|
||||
case and attribute names to lower case.
|
||||
|
||||
Not yet done:
|
||||
- Doctype subset and marked sections
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "forward.h"
|
||||
|
||||
/* lexer character types
|
||||
*/
|
||||
#define digit 1u
|
||||
#define letter 2u
|
||||
#define namechar 4u
|
||||
#define white 8u
|
||||
#define newline 16u
|
||||
#define lowercase 32u
|
||||
#define uppercase 64u
|
||||
#define digithex 128u
|
||||
|
||||
|
||||
/* node->type is one of these values
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
RootNode,
|
||||
DocTypeTag,
|
||||
CommentTag,
|
||||
ProcInsTag,
|
||||
TextNode,
|
||||
StartTag,
|
||||
EndTag,
|
||||
StartEndTag,
|
||||
CDATATag,
|
||||
SectionTag,
|
||||
AspTag,
|
||||
JsteTag,
|
||||
PhpTag,
|
||||
XmlDecl
|
||||
} NodeType;
|
||||
|
||||
|
||||
|
||||
/* lexer GetToken states
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
LEX_CONTENT,
|
||||
LEX_GT,
|
||||
LEX_ENDTAG,
|
||||
LEX_STARTTAG,
|
||||
LEX_COMMENT,
|
||||
LEX_DOCTYPE,
|
||||
LEX_PROCINSTR,
|
||||
LEX_CDATA,
|
||||
LEX_SECTION,
|
||||
LEX_ASP,
|
||||
LEX_JSTE,
|
||||
LEX_PHP,
|
||||
LEX_XMLDECL
|
||||
} LexerState;
|
||||
|
||||
/* ParseDocTypeDecl state constants */
|
||||
typedef enum
|
||||
{
|
||||
DT_INTERMEDIATE,
|
||||
DT_DOCTYPENAME,
|
||||
DT_PUBLICSYSTEM,
|
||||
DT_QUOTEDSTRING,
|
||||
DT_INTSUBSET
|
||||
} ParseDocTypeDeclState;
|
||||
|
||||
/* content model shortcut encoding
|
||||
|
||||
Descriptions are tentative.
|
||||
*/
|
||||
#define CM_UNKNOWN 0
|
||||
/* Elements with no content. Map to HTML specification. */
|
||||
#define CM_EMPTY (1 << 0)
|
||||
/* Elements that appear outside of "BODY". */
|
||||
#define CM_HTML (1 << 1)
|
||||
/* Elements that can appear within HEAD. */
|
||||
#define CM_HEAD (1 << 2)
|
||||
/* HTML "block" elements. */
|
||||
#define CM_BLOCK (1 << 3)
|
||||
/* HTML "inline" elements. */
|
||||
#define CM_INLINE (1 << 4)
|
||||
/* Elements that mark list item ("LI"). */
|
||||
#define CM_LIST (1 << 5)
|
||||
/* Elements that mark definition list item ("DL", "DT"). */
|
||||
#define CM_DEFLIST (1 << 6)
|
||||
/* Elements that can appear inside TABLE. */
|
||||
#define CM_TABLE (1 << 7)
|
||||
/* Used for "THEAD", "TFOOT" or "TBODY". */
|
||||
#define CM_ROWGRP (1 << 8)
|
||||
/* Used for "TD", "TH" */
|
||||
#define CM_ROW (1 << 9)
|
||||
/* Elements whose content must be protected against white space movement.
|
||||
Includes some elements that can found in forms. */
|
||||
#define CM_FIELD (1 << 10)
|
||||
/* Used to avoid propagating inline emphasis inside some elements
|
||||
such as OBJECT or APPLET. */
|
||||
#define CM_OBJECT (1 << 11)
|
||||
/* Elements that allows "PARAM". */
|
||||
#define CM_PARAM (1 << 12)
|
||||
/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
|
||||
#define CM_FRAMES (1 << 13)
|
||||
/* Heading elements (h1, h2, ...). */
|
||||
#define CM_HEADING (1 << 14)
|
||||
/* Elements with an optional end tag. */
|
||||
#define CM_OPT (1 << 15)
|
||||
/* Elements that use "align" attribute for vertical position. */
|
||||
#define CM_IMG (1 << 16)
|
||||
/* Elements with inline and block model. Used to avoid calling InlineDup. */
|
||||
#define CM_MIXED (1 << 17)
|
||||
/* Elements whose content needs to be indented only if containing one
|
||||
CM_BLOCK element. */
|
||||
#define CM_NO_INDENT (1 << 18)
|
||||
/* Elements that are obsolete (such as "dir", "menu"). */
|
||||
#define CM_OBSOLETE (1 << 19)
|
||||
/* User defined elements. Used to determine how attributes wihout value
|
||||
should be printed. */
|
||||
#define CM_NEW (1 << 20)
|
||||
/* Elements that cannot be omitted. */
|
||||
#define CM_OMITST (1 << 21)
|
||||
|
||||
/* If the document uses just HTML 2.0 tags and attributes described
|
||||
** it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
|
||||
** If there are proprietary tags and attributes then describe it as
|
||||
** HTML Proprietary. If it includes the xml-lang or xmlns attributes
|
||||
** but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
|
||||
** flavors of Voyager (strict, loose or frameset).
|
||||
*/
|
||||
|
||||
/* unknown */
|
||||
#define xxxx 0u
|
||||
|
||||
/* W3C defined HTML/XHTML family document types */
|
||||
#define HT20 1u
|
||||
#define HT32 2u
|
||||
#define H40S 4u
|
||||
#define H40T 8u
|
||||
#define H40F 16u
|
||||
#define H41S 32u
|
||||
#define H41T 64u
|
||||
#define H41F 128u
|
||||
#define X10S 256u
|
||||
#define X10T 512u
|
||||
#define X10F 1024u
|
||||
#define XH11 2048u
|
||||
#define XB10 4096u
|
||||
|
||||
/* proprietary stuff */
|
||||
#define VERS_SUN 8192u
|
||||
#define VERS_NETSCAPE 16384u
|
||||
#define VERS_MICROSOFT 32768u
|
||||
|
||||
/* special flag */
|
||||
#define VERS_XML 65536u
|
||||
|
||||
/* "HTML5" */
|
||||
#define HT50 131072u
|
||||
#define XH50 262144u
|
||||
|
||||
/* compatibility symbols */
|
||||
#define VERS_UNKNOWN (xxxx)
|
||||
#define VERS_HTML20 (HT20)
|
||||
#define VERS_HTML32 (HT32)
|
||||
#define VERS_HTML40_STRICT (H40S|H41S|X10S)
|
||||
#define VERS_HTML40_LOOSE (H40T|H41T|X10T)
|
||||
#define VERS_FRAMESET (H40F|H41F|X10F)
|
||||
#define VERS_XHTML11 (XH11)
|
||||
#define VERS_BASIC (XB10)
|
||||
|
||||
/* meta symbols */
|
||||
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
|
||||
#define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET)
|
||||
#define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME)
|
||||
#define VERS_EVENTS (VERS_HTML40|VERS_XHTML11)
|
||||
#define VERS_FROM32 (VERS_HTML32|VERS_HTML40)
|
||||
#define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC)
|
||||
#define VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50)
|
||||
|
||||
/* all W3C defined document types */
|
||||
#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50)
|
||||
|
||||
/* all proprietary types */
|
||||
#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
|
||||
|
||||
/* Linked list of class names and styles
|
||||
*/
|
||||
struct _Style;
|
||||
typedef struct _Style TagStyle;
|
||||
|
||||
struct _Style
|
||||
{
|
||||
tmbstr tag;
|
||||
tmbstr tag_class;
|
||||
tmbstr properties;
|
||||
TagStyle *next;
|
||||
};
|
||||
|
||||
|
||||
/* Linked list of style properties
|
||||
*/
|
||||
struct _StyleProp;
|
||||
typedef struct _StyleProp StyleProp;
|
||||
|
||||
struct _StyleProp
|
||||
{
|
||||
tmbstr name;
|
||||
tmbstr value;
|
||||
StyleProp *next;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/* Attribute/Value linked list node
|
||||
*/
|
||||
|
||||
struct _AttVal
|
||||
{
|
||||
AttVal* next;
|
||||
const Attribute* dict;
|
||||
Node* asp;
|
||||
Node* php;
|
||||
int delim;
|
||||
tmbstr attribute;
|
||||
tmbstr value;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Mosaic handles inlines via a separate stack from other elements
|
||||
We duplicate this to recover from inline markup errors such as:
|
||||
|
||||
<i>italic text
|
||||
<p>more italic text</b> normal text
|
||||
|
||||
which for compatibility with Mosaic is mapped to:
|
||||
|
||||
<i>italic text</i>
|
||||
<p><i>more italic text</i> normal text
|
||||
|
||||
Note that any inline end tag pop's the effect of the current
|
||||
inline start tag, so that </b> pop's <i> in the above example.
|
||||
*/
|
||||
struct _IStack
|
||||
{
|
||||
IStack* next;
|
||||
const Dict* tag; /* tag's dictionary definition */
|
||||
tmbstr element; /* name (NULL for text nodes) */
|
||||
AttVal* attributes;
|
||||
};
|
||||
|
||||
|
||||
/* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl,
|
||||
** etc. etc.
|
||||
*/
|
||||
|
||||
struct _Node
|
||||
{
|
||||
Node* parent; /* tree structure */
|
||||
Node* prev;
|
||||
Node* next;
|
||||
Node* content;
|
||||
Node* last;
|
||||
|
||||
AttVal* attributes;
|
||||
const Dict* was; /* old tag when it was changed */
|
||||
const Dict* tag; /* tag's dictionary definition */
|
||||
|
||||
tmbstr element; /* name (NULL for text nodes) */
|
||||
|
||||
uint start; /* start of span onto text array */
|
||||
uint end; /* end of span onto text array */
|
||||
NodeType type; /* TextNode, StartTag, EndTag etc. */
|
||||
|
||||
uint line; /* current line of document */
|
||||
uint column; /* current column of document */
|
||||
|
||||
Bool closed; /* true if closed by explicit end tag */
|
||||
Bool implicit; /* true if inferred */
|
||||
Bool linebreak; /* true if followed by a line break */
|
||||
|
||||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
tmbstr otext;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
The following are private to the lexer
|
||||
Use NewLexer() to create a lexer, and
|
||||
FreeLexer() to free it.
|
||||
*/
|
||||
|
||||
struct _Lexer
|
||||
{
|
||||
#if 0 /* Move to TidyDocImpl */
|
||||
StreamIn* in; /* document content input */
|
||||
StreamOut* errout; /* error output stream */
|
||||
|
||||
uint badAccess; /* for accessibility errors */
|
||||
uint badLayout; /* for bad style errors */
|
||||
uint badChars; /* for bad character encodings */
|
||||
uint badForm; /* for mismatched/mispositioned form tags */
|
||||
uint warnings; /* count of warnings in this document */
|
||||
uint errors; /* count of errors */
|
||||
#endif
|
||||
|
||||
uint lines; /* lines seen */
|
||||
uint columns; /* at start of current token */
|
||||
Bool waswhite; /* used to collapse contiguous white space */
|
||||
Bool pushed; /* true after token has been pushed back */
|
||||
Bool insertspace; /* when space is moved after end tag */
|
||||
Bool excludeBlocks; /* Netscape compatibility */
|
||||
Bool exiled; /* true if moved out of table */
|
||||
Bool isvoyager; /* true if xmlns attribute on html element */
|
||||
uint versions; /* bit vector of HTML versions */
|
||||
uint doctype; /* version as given by doctype (if any) */
|
||||
uint versionEmitted; /* version of doctype emitted */
|
||||
Bool bad_doctype; /* e.g. if html or PUBLIC is missing */
|
||||
uint txtstart; /* start of current node */
|
||||
uint txtend; /* end of current node */
|
||||
LexerState state; /* state of lexer's finite state machine */
|
||||
|
||||
Node* token; /* last token returned by GetToken() */
|
||||
Node* itoken; /* last duplicate inline returned by GetToken() */
|
||||
Node* root; /* remember root node of the document */
|
||||
Node* parent; /* remember parent node for CDATA elements */
|
||||
|
||||
Bool seenEndBody; /* true if a </body> tag has been encountered */
|
||||
Bool seenEndHtml; /* true if a </html> tag has been encountered */
|
||||
|
||||
/*
|
||||
Lexer character buffer
|
||||
|
||||
Parse tree nodes span onto this buffer
|
||||
which contains the concatenated text
|
||||
contents of all of the elements.
|
||||
|
||||
lexsize must be reset for each file.
|
||||
*/
|
||||
tmbstr lexbuf; /* MB character buffer */
|
||||
uint lexlength; /* allocated */
|
||||
uint lexsize; /* used */
|
||||
|
||||
/* Inline stack for compatibility with Mosaic */
|
||||
Node* inode; /* for deferring text node */
|
||||
IStack* insert; /* for inferring inline tags */
|
||||
IStack* istack;
|
||||
uint istacklength; /* allocated */
|
||||
uint istacksize; /* used */
|
||||
uint istackbase; /* start of frame */
|
||||
|
||||
TagStyle *styles; /* used for cleaning up presentation markup */
|
||||
|
||||
TidyAllocator* allocator; /* allocator */
|
||||
|
||||
#if 0
|
||||
TidyDocImpl* doc; /* Pointer back to doc for error reporting */
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/* Lexer Functions
|
||||
*/
|
||||
|
||||
/* choose what version to use for new doctype */
|
||||
int TY_(HTMLVersion)( TidyDocImpl* doc );
|
||||
|
||||
/* everything is allowed in proprietary version of HTML */
|
||||
/* this is handled here rather than in the tag/attr dicts */
|
||||
|
||||
void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
|
||||
|
||||
Bool TY_(IsWhite)(uint c);
|
||||
Bool TY_(IsDigit)(uint c);
|
||||
Bool TY_(IsLetter)(uint c);
|
||||
Bool TY_(IsNewline)(uint c);
|
||||
Bool TY_(IsNamechar)(uint c);
|
||||
Bool TY_(IsXMLLetter)(uint c);
|
||||
Bool TY_(IsXMLNamechar)(uint c);
|
||||
|
||||
/* Bool IsLower(uint c); */
|
||||
Bool TY_(IsUpper)(uint c);
|
||||
uint TY_(ToLower)(uint c);
|
||||
uint TY_(ToUpper)(uint c);
|
||||
|
||||
Lexer* TY_(NewLexer)( TidyDocImpl* doc );
|
||||
void TY_(FreeLexer)( TidyDocImpl* doc );
|
||||
|
||||
/* store character c as UTF-8 encoded byte stream */
|
||||
void TY_(AddCharToLexer)( Lexer *lexer, uint c );
|
||||
|
||||
/*
|
||||
Used for elements and text nodes
|
||||
element name is NULL for text nodes
|
||||
start and end are offsets into lexbuf
|
||||
which contains the textual content of
|
||||
all elements in the parse tree.
|
||||
|
||||
parent and content allow traversal
|
||||
of the parse tree in any direction.
|
||||
attributes are represented as a linked
|
||||
list of AttVal nodes which hold the
|
||||
strings for attribute/value pairs.
|
||||
*/
|
||||
Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer );
|
||||
|
||||
|
||||
/* used to clone heading nodes when split by an <HR> */
|
||||
Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element );
|
||||
|
||||
/* free node's attributes */
|
||||
void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
/* doesn't repair attribute list linkage */
|
||||
void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av );
|
||||
|
||||
/* detach attribute from node */
|
||||
void TY_(DetachAttribute)( Node *node, AttVal *attr );
|
||||
|
||||
/* detach attribute from node then free it
|
||||
*/
|
||||
void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
|
||||
|
||||
/*
|
||||
Free document nodes by iterating through peers and recursing
|
||||
through children. Set next to NULL before calling FreeNode()
|
||||
to avoid freeing peer nodes. Doesn't patch up prev/next links.
|
||||
*/
|
||||
void TY_(FreeNode)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
Node* TY_(TextToken)( Lexer *lexer );
|
||||
|
||||
/* used for creating preformatted text from Word2000 */
|
||||
Node* TY_(NewLineNode)( Lexer *lexer );
|
||||
|
||||
/* used for adding a for Word2000 */
|
||||
Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt );
|
||||
|
||||
void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
|
||||
/* void AddStringLiteralLen( Lexer* lexer, ctmbstr str, int len ); */
|
||||
|
||||
/* find element */
|
||||
Node* TY_(FindDocType)( TidyDocImpl* doc );
|
||||
Node* TY_(FindHTML)( TidyDocImpl* doc );
|
||||
Node* TY_(FindHEAD)( TidyDocImpl* doc );
|
||||
Node* TY_(FindTITLE)(TidyDocImpl* doc);
|
||||
Node* TY_(FindBody)( TidyDocImpl* doc );
|
||||
Node* TY_(FindXmlDecl)(TidyDocImpl* doc);
|
||||
|
||||
/* Returns containing block element, if any */
|
||||
Node* TY_(FindContainer)( Node* node );
|
||||
|
||||
/* add meta element for Tidy */
|
||||
Bool TY_(AddGenerator)( TidyDocImpl* doc );
|
||||
|
||||
uint TY_(ApparentVersion)( TidyDocImpl* doc );
|
||||
|
||||
ctmbstr TY_(HTMLVersionNameFromCode)( uint vers, Bool isXhtml );
|
||||
|
||||
Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
|
||||
|
||||
Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/* fixup doctype if missing */
|
||||
Bool TY_(FixDocType)( TidyDocImpl* doc );
|
||||
|
||||
/* ensure XML document starts with <?xml version="1.0"?> */
|
||||
/* add encoding attribute if not using ASCII or UTF-8 output */
|
||||
Bool TY_(FixXmlDecl)( TidyDocImpl* doc );
|
||||
|
||||
Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id);
|
||||
|
||||
void TY_(UngetToken)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/*
|
||||
modes for GetToken()
|
||||
|
||||
MixedContent -- for elements which don't accept PCDATA
|
||||
Preformatted -- white space preserved as is
|
||||
IgnoreMarkup -- for CDATA elements such as script, style
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
IgnoreWhitespace,
|
||||
MixedContent,
|
||||
Preformatted,
|
||||
IgnoreMarkup,
|
||||
CdataContent
|
||||
} GetTokenMode;
|
||||
|
||||
Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode );
|
||||
|
||||
void TY_(InitMap)(void);
|
||||
|
||||
|
||||
/* create a new attribute */
|
||||
AttVal* TY_(NewAttribute)( TidyDocImpl* doc );
|
||||
|
||||
/* create a new attribute with given name and value */
|
||||
AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value,
|
||||
int delim );
|
||||
|
||||
/* insert attribute at the end of attribute list of a node */
|
||||
void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av );
|
||||
|
||||
/* insert attribute at the start of attribute list of a node */
|
||||
void TY_(InsertAttributeAtStart)( Node *node, AttVal *av );
|
||||
|
||||
/*************************************
|
||||
In-line Stack functions
|
||||
*************************************/
|
||||
|
||||
|
||||
/* duplicate attributes */
|
||||
AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
|
||||
|
||||
/*
|
||||
push a copy of an inline node onto stack
|
||||
but don't push if implicit or OBJECT or APPLET
|
||||
(implicit tags are ones generated from the istack)
|
||||
|
||||
One issue arises with pushing inlines when
|
||||
the tag is already pushed. For instance:
|
||||
|
||||
<p><em>text
|
||||
<p><em>more text
|
||||
|
||||
Shouldn't be mapped to
|
||||
|
||||
<p><em>text</em></p>
|
||||
<p><em><em>more text</em></em>
|
||||
*/
|
||||
void TY_(PushInline)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
/* pop inline stack */
|
||||
void TY_(PopInline)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node );
|
||||
Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node );
|
||||
|
||||
/*
|
||||
This has the effect of inserting "missing" inline
|
||||
elements around the contents of blocklevel elements
|
||||
such as P, TD, TH, DIV, PRE etc. This procedure is
|
||||
called at the start of ParseBlock. when the inline
|
||||
stack is not empty, as will be the case in:
|
||||
|
||||
<i><h1>italic heading</h1></i>
|
||||
|
||||
which is then treated as equivalent to
|
||||
|
||||
<h1><i>italic heading</i></h1>
|
||||
|
||||
This is implemented by setting the lexer into a mode
|
||||
where it gets tokens from the inline stack rather than
|
||||
from the input stream.
|
||||
*/
|
||||
int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
/*
|
||||
defer duplicates when entering a table or other
|
||||
element where the inlines shouldn't be duplicated
|
||||
*/
|
||||
void TY_(DeferDup)( TidyDocImpl* doc );
|
||||
Node* TY_(InsertedToken)( TidyDocImpl* doc );
|
||||
|
||||
/* stack manipulation for inline elements */
|
||||
Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node );
|
||||
Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __LEXER_H__ */
|
1882
src/localize.c
Normal file
1882
src/localize.c
Normal file
File diff suppressed because it is too large
Load diff
329
src/mappedio.c
Executable file
329
src/mappedio.c
Executable file
|
@ -0,0 +1,329 @@
|
|||
/* Interface to mmap style I/O
|
||||
|
||||
(c) 2006-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
Originally contributed by Cory Nelson and Nuno Lopes
|
||||
|
||||
$Id: mappedio.c,v 1.14 2008/03/18 20:19:35 arnaud02 Exp $
|
||||
*/
|
||||
|
||||
/* keep these here to keep file non-empty */
|
||||
#include "forward.h"
|
||||
#include "mappedio.h"
|
||||
|
||||
#if SUPPORT_POSIX_MAPPED_FILES
|
||||
|
||||
#include "fileio.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TidyAllocator *allocator;
|
||||
const byte *base;
|
||||
size_t pos, size;
|
||||
} MappedFileSource;
|
||||
|
||||
static int TIDY_CALL mapped_getByte( void* sourceData )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) sourceData;
|
||||
return fin->base[fin->pos++];
|
||||
}
|
||||
|
||||
static Bool TIDY_CALL mapped_eof( void* sourceData )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) sourceData;
|
||||
return (fin->pos >= fin->size);
|
||||
}
|
||||
|
||||
static void TIDY_CALL mapped_ungetByte( void* sourceData, byte ARG_UNUSED(bv) )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) sourceData;
|
||||
fin->pos--;
|
||||
}
|
||||
|
||||
int TY_(initFileSource)( TidyAllocator *allocator, TidyInputSource* inp, FILE* fp )
|
||||
{
|
||||
MappedFileSource* fin;
|
||||
struct stat sbuf;
|
||||
int fd;
|
||||
|
||||
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
|
||||
if ( !fin )
|
||||
return -1;
|
||||
|
||||
fd = fileno(fp);
|
||||
if ( fstat(fd, &sbuf) == -1
|
||||
|| sbuf.st_size == 0
|
||||
|| (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
|
||||
MAP_SHARED, fd, 0)) == MAP_FAILED)
|
||||
{
|
||||
TidyFree( allocator, fin );
|
||||
/* Fallback on standard I/O */
|
||||
return TY_(initStdIOFileSource)( allocator, inp, fp );
|
||||
}
|
||||
|
||||
fin->pos = 0;
|
||||
fin->allocator = allocator;
|
||||
fclose(fp);
|
||||
|
||||
inp->getByte = mapped_getByte;
|
||||
inp->eof = mapped_eof;
|
||||
inp->ungetByte = mapped_ungetByte;
|
||||
inp->sourceData = fin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void TY_(freeFileSource)( TidyInputSource* inp, Bool closeIt )
|
||||
{
|
||||
if ( inp->getByte == mapped_getByte )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
|
||||
munmap( (void*)fin->base, fin->size );
|
||||
TidyFree( fin->allocator, fin );
|
||||
}
|
||||
else
|
||||
TY_(freeStdIOFileSource)( inp, closeIt );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "streamio.h"
|
||||
#include "tidy-int.h"
|
||||
#include "message.h"
|
||||
|
||||
#include <errno.h>
|
||||
#if _MSC_VER < 1300 /* less than msvc++ 7.0 */
|
||||
#pragma warning(disable:4115) /* named type definition in parentheses in windows headers */
|
||||
#endif
|
||||
#include <windows.h>
|
||||
|
||||
typedef struct _fp_input_mapped_source
|
||||
{
|
||||
TidyAllocator *allocator;
|
||||
LONGLONG size, pos;
|
||||
HANDLE file, map;
|
||||
byte *view, *iter, *end;
|
||||
unsigned int gran;
|
||||
} MappedFileSource;
|
||||
|
||||
static int mapped_openView( MappedFileSource *data )
|
||||
{
|
||||
DWORD numb = ( ( data->size - data->pos ) > data->gran ) ?
|
||||
data->gran : (DWORD)( data->size - data->pos );
|
||||
|
||||
if ( data->view )
|
||||
{
|
||||
UnmapViewOfFile( data->view );
|
||||
data->view = NULL;
|
||||
}
|
||||
|
||||
data->view = MapViewOfFile( data->map, FILE_MAP_READ,
|
||||
(DWORD)( data->pos >> 32 ),
|
||||
(DWORD)data->pos, numb );
|
||||
|
||||
if ( !data->view ) return -1;
|
||||
|
||||
data->iter = data->view;
|
||||
data->end = data->iter + numb;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int TIDY_CALL mapped_getByte( void *sourceData )
|
||||
{
|
||||
MappedFileSource *data = sourceData;
|
||||
|
||||
if ( !data->view || data->iter >= data->end )
|
||||
{
|
||||
data->pos += data->gran;
|
||||
|
||||
if ( data->pos >= data->size || mapped_openView(data) != 0 )
|
||||
return EndOfStream;
|
||||
}
|
||||
|
||||
return *( data->iter++ );
|
||||
}
|
||||
|
||||
static Bool TIDY_CALL mapped_eof( void *sourceData )
|
||||
{
|
||||
MappedFileSource *data = sourceData;
|
||||
return ( data->pos >= data->size );
|
||||
}
|
||||
|
||||
static void TIDY_CALL mapped_ungetByte( void *sourceData, byte ARG_UNUSED(bt) )
|
||||
{
|
||||
MappedFileSource *data = sourceData;
|
||||
|
||||
if ( data->iter >= data->view )
|
||||
{
|
||||
--data->iter;
|
||||
return;
|
||||
}
|
||||
|
||||
if ( data->pos < data->gran )
|
||||
{
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
data->pos -= data->gran;
|
||||
mapped_openView( data );
|
||||
}
|
||||
|
||||
static int initMappedFileSource( TidyAllocator *allocator, TidyInputSource* inp, HANDLE fp )
|
||||
{
|
||||
MappedFileSource* fin = NULL;
|
||||
|
||||
inp->getByte = mapped_getByte;
|
||||
inp->eof = mapped_eof;
|
||||
inp->ungetByte = mapped_ungetByte;
|
||||
|
||||
fin = (MappedFileSource*) TidyAlloc( allocator, sizeof(MappedFileSource) );
|
||||
if ( !fin )
|
||||
return -1;
|
||||
|
||||
#if _MSC_VER < 1300 /* less than msvc++ 7.0 */
|
||||
{
|
||||
LARGE_INTEGER* pli = (LARGE_INTEGER *)&fin->size;
|
||||
(DWORD)pli->LowPart = GetFileSize( fp, (DWORD *)&pli->HighPart );
|
||||
if ( GetLastError() != NO_ERROR || fin->size <= 0 )
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if ( !GetFileSizeEx( fp, (LARGE_INTEGER*)&fin->size )
|
||||
|| fin->size <= 0 )
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
fin->map = CreateFileMapping( fp, NULL, PAGE_READONLY, 0, 0, NULL );
|
||||
|
||||
if ( !fin->map )
|
||||
{
|
||||
TidyFree(allocator, fin);
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
SYSTEM_INFO info;
|
||||
GetSystemInfo( &info );
|
||||
fin->gran = info.dwAllocationGranularity;
|
||||
}
|
||||
|
||||
fin->allocator = allocator;
|
||||
fin->pos = 0;
|
||||
fin->view = NULL;
|
||||
fin->iter = NULL;
|
||||
fin->end = NULL;
|
||||
|
||||
if ( mapped_openView( fin ) != 0 )
|
||||
{
|
||||
CloseHandle( fin->map );
|
||||
TidyFree( allocator, fin );
|
||||
return -1;
|
||||
}
|
||||
|
||||
fin->file = fp;
|
||||
inp->sourceData = fin;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void freeMappedFileSource( TidyInputSource* inp, Bool closeIt )
|
||||
{
|
||||
MappedFileSource* fin = (MappedFileSource*) inp->sourceData;
|
||||
if ( closeIt && fin && fin->file != INVALID_HANDLE_VALUE )
|
||||
{
|
||||
if ( fin->view )
|
||||
UnmapViewOfFile( fin->view );
|
||||
|
||||
CloseHandle( fin->map );
|
||||
CloseHandle( fin->file );
|
||||
}
|
||||
TidyFree( fin->allocator, fin );
|
||||
}
|
||||
|
||||
StreamIn* MappedFileInput ( TidyDocImpl* doc, HANDLE fp, int encoding )
|
||||
{
|
||||
StreamIn *in = TY_(initStreamIn)( doc, encoding );
|
||||
if ( initMappedFileSource( doc->allocator, &in->source, fp ) != 0 )
|
||||
{
|
||||
TY_(freeStreamIn)( in );
|
||||
return NULL;
|
||||
}
|
||||
in->iotype = FileIO;
|
||||
return in;
|
||||
}
|
||||
|
||||
|
||||
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam ) {
|
||||
int status = -ENOENT;
|
||||
HANDLE fin = CreateFileA( filnam, GENERIC_READ, FILE_SHARE_READ, NULL,
|
||||
OPEN_EXISTING, 0, NULL );
|
||||
|
||||
#if PRESERVE_FILE_TIMES
|
||||
LONGLONG actime, modtime;
|
||||
TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
|
||||
|
||||
if ( fin != INVALID_HANDLE_VALUE && cfgBool(doc,TidyKeepFileTimes) &&
|
||||
GetFileTime(fin, NULL, (FILETIME*)&actime, (FILETIME*)&modtime) )
|
||||
{
|
||||
#define TY_I64(str) TYDYAPPEND(str,LL)
|
||||
#if _MSC_VER < 1300 && !defined(__GNUC__) /* less than msvc++ 7.0 */
|
||||
# undef TY_I64
|
||||
# define TY_I64(str) TYDYAPPEND(str,i64)
|
||||
#endif
|
||||
doc->filetimes.actime =
|
||||
(time_t)( ( actime - TY_I64(116444736000000000)) / 10000000 );
|
||||
|
||||
doc->filetimes.modtime =
|
||||
(time_t)( ( modtime - TY_I64(116444736000000000)) / 10000000 );
|
||||
}
|
||||
#endif
|
||||
|
||||
if ( fin != INVALID_HANDLE_VALUE )
|
||||
{
|
||||
StreamIn* in = MappedFileInput( doc, fin,
|
||||
cfg( doc, TidyInCharEncoding ) );
|
||||
if ( !in )
|
||||
{
|
||||
CloseHandle( fin );
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
status = TY_(DocParseStream)( doc, in );
|
||||
freeMappedFileSource( &in->source, yes );
|
||||
TY_(freeStreamIn)( in );
|
||||
}
|
||||
else /* Error message! */
|
||||
TY_(FileError)( doc, filnam, TidyError );
|
||||
return status;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
16
src/mappedio.h
Executable file
16
src/mappedio.h
Executable file
|
@ -0,0 +1,16 @@
|
|||
#ifndef __TIDY_MAPPED_IO_H__
|
||||
#define __TIDY_MAPPED_IO_H__
|
||||
|
||||
/* Interface to mmap style I/O
|
||||
|
||||
(c) 2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: mappedio.h,v 1.2 2006/09/15 16:50:37 arnaud02 Exp $
|
||||
*/
|
||||
|
||||
#if defined(_WIN32)
|
||||
int TY_(DocParseFileWithMappedFile)( TidyDocImpl* doc, ctmbstr filnam );
|
||||
#endif
|
||||
|
||||
#endif /* __TIDY_MAPPED_IO_H__ */
|
207
src/message.h
Normal file
207
src/message.h
Normal file
|
@ -0,0 +1,207 @@
|
|||
#ifndef __MESSAGE_H__
|
||||
#define __MESSAGE_H__
|
||||
|
||||
/* message.h -- general message writing routines
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/05/30 16:47:31 $
|
||||
$Revision: 1.29 $
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
#include "tidy.h" /* For TidyReportLevel */
|
||||
|
||||
/* General message writing routines.
|
||||
** Each message is a single warning, error, etc.
|
||||
**
|
||||
** This routine will keep track of counts and,
|
||||
** if the caller has set a filter, it will be
|
||||
** called. The new preferred way of handling
|
||||
** Tidy diagnostics output is either a) define
|
||||
** a new output sink or b) install a message
|
||||
** filter routine.
|
||||
**
|
||||
** Keeps track of ShowWarnings, ShowErrors, etc.
|
||||
*/
|
||||
|
||||
ctmbstr TY_(ReleaseDate)(void);
|
||||
|
||||
/* void TY_(ShowVersion)( TidyDocImpl* doc ); */
|
||||
void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option );
|
||||
void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option );
|
||||
void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc );
|
||||
|
||||
/* void TY_(HelloMessage)( TidyDocImpl* doc, ctmbstr date, ctmbstr filename ); */
|
||||
void TY_(ReportMarkupVersion)( TidyDocImpl* doc );
|
||||
void TY_(ReportNumWarnings)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(GeneralInfo)( TidyDocImpl* doc );
|
||||
/* void TY_(UnknownOption)( TidyDocImpl* doc, char c ); */
|
||||
/* void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file ); */
|
||||
void TY_(FileError)( TidyDocImpl* doc, ctmbstr file, TidyReportLevel level );
|
||||
|
||||
void TY_(ErrorSummary)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding);
|
||||
void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded);
|
||||
void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity, int c );
|
||||
void TY_(ReportAttrError)( TidyDocImpl* doc, Node* node, AttVal* av, uint code );
|
||||
void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name );
|
||||
|
||||
#if SUPPORT_ACCESSIBILITY_CHECKS
|
||||
|
||||
void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code );
|
||||
void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code );
|
||||
|
||||
#endif
|
||||
|
||||
void TY_(ReportNotice)(TidyDocImpl* doc, Node *element, Node *node, uint code);
|
||||
void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code);
|
||||
void TY_(ReportError)(TidyDocImpl* doc, Node* element, Node* node, uint code);
|
||||
void TY_(ReportFatal)(TidyDocImpl* doc, Node* element, Node* node, uint code);
|
||||
|
||||
/* error codes for entities/numeric character references */
|
||||
|
||||
#define MISSING_SEMICOLON 1
|
||||
#define MISSING_SEMICOLON_NCR 2
|
||||
#define UNKNOWN_ENTITY 3
|
||||
#define UNESCAPED_AMPERSAND 4
|
||||
#define APOS_UNDEFINED 5
|
||||
|
||||
/* error codes for element messages */
|
||||
|
||||
#define MISSING_ENDTAG_FOR 6
|
||||
#define MISSING_ENDTAG_BEFORE 7
|
||||
#define DISCARDING_UNEXPECTED 8
|
||||
#define NESTED_EMPHASIS 9
|
||||
#define NON_MATCHING_ENDTAG 10
|
||||
#define TAG_NOT_ALLOWED_IN 11
|
||||
#define MISSING_STARTTAG 12
|
||||
#define UNEXPECTED_ENDTAG 13
|
||||
#define USING_BR_INPLACE_OF 14
|
||||
#define INSERTING_TAG 15
|
||||
#define SUSPECTED_MISSING_QUOTE 16
|
||||
#define MISSING_TITLE_ELEMENT 17
|
||||
#define DUPLICATE_FRAMESET 18
|
||||
#define CANT_BE_NESTED 19
|
||||
#define OBSOLETE_ELEMENT 20
|
||||
#define PROPRIETARY_ELEMENT 21
|
||||
#define UNKNOWN_ELEMENT 22
|
||||
#define TRIM_EMPTY_ELEMENT 23
|
||||
#define COERCE_TO_ENDTAG 24
|
||||
#define ILLEGAL_NESTING 25
|
||||
#define NOFRAMES_CONTENT 26
|
||||
#define CONTENT_AFTER_BODY 27
|
||||
#define INCONSISTENT_VERSION 28
|
||||
#define MALFORMED_COMMENT 29
|
||||
#define BAD_COMMENT_CHARS 30
|
||||
#define BAD_XML_COMMENT 31
|
||||
#define BAD_CDATA_CONTENT 32
|
||||
#define INCONSISTENT_NAMESPACE 33
|
||||
#define DOCTYPE_AFTER_TAGS 34
|
||||
#define MALFORMED_DOCTYPE 35
|
||||
#define UNEXPECTED_END_OF_FILE 36
|
||||
#define DTYPE_NOT_UPPER_CASE 37
|
||||
#define TOO_MANY_ELEMENTS 38
|
||||
#define UNESCAPED_ELEMENT 39
|
||||
#define NESTED_QUOTATION 40
|
||||
#define ELEMENT_NOT_EMPTY 41
|
||||
#define ENCODING_IO_CONFLICT 42
|
||||
#define MIXED_CONTENT_IN_BLOCK 43
|
||||
#define MISSING_DOCTYPE 44
|
||||
#define SPACE_PRECEDING_XMLDECL 45
|
||||
#define TOO_MANY_ELEMENTS_IN 46
|
||||
#define UNEXPECTED_ENDTAG_IN 47
|
||||
#define REPLACING_ELEMENT 83
|
||||
#define REPLACING_UNEX_ELEMENT 84
|
||||
#define COERCE_TO_ENDTAG_WARN 85
|
||||
|
||||
/* error codes used for attribute messages */
|
||||
|
||||
#define UNKNOWN_ATTRIBUTE 48
|
||||
#define INSERTING_ATTRIBUTE 49
|
||||
#define MISSING_ATTR_VALUE 50
|
||||
#define BAD_ATTRIBUTE_VALUE 51
|
||||
#define UNEXPECTED_GT 52
|
||||
#define PROPRIETARY_ATTRIBUTE 53
|
||||
#define PROPRIETARY_ATTR_VALUE 54
|
||||
#define REPEATED_ATTRIBUTE 55
|
||||
#define MISSING_IMAGEMAP 56
|
||||
#define XML_ATTRIBUTE_VALUE 57
|
||||
#define UNEXPECTED_QUOTEMARK 58
|
||||
#define MISSING_QUOTEMARK 59
|
||||
#define ID_NAME_MISMATCH 60
|
||||
|
||||
#define BACKSLASH_IN_URI 61
|
||||
#define FIXED_BACKSLASH 62
|
||||
#define ILLEGAL_URI_REFERENCE 63
|
||||
#define ESCAPED_ILLEGAL_URI 64
|
||||
|
||||
#define NEWLINE_IN_URI 65
|
||||
#define ANCHOR_NOT_UNIQUE 66
|
||||
|
||||
#define JOINING_ATTRIBUTE 68
|
||||
#define UNEXPECTED_EQUALSIGN 69
|
||||
#define ATTR_VALUE_NOT_LCASE 70
|
||||
#define XML_ID_SYNTAX 71
|
||||
|
||||
#define INVALID_ATTRIBUTE 72
|
||||
|
||||
#define BAD_ATTRIBUTE_VALUE_REPLACED 73
|
||||
|
||||
#define INVALID_XML_ID 74
|
||||
#define UNEXPECTED_END_OF_FILE_ATTR 75
|
||||
#define MISSING_ATTRIBUTE 86
|
||||
#define WHITE_IN_URI 87
|
||||
|
||||
#define PREVIOUS_LOCATION 88 /* last */
|
||||
|
||||
/* character encoding errors */
|
||||
|
||||
#define VENDOR_SPECIFIC_CHARS 76
|
||||
#define INVALID_SGML_CHARS 77
|
||||
#define INVALID_UTF8 78
|
||||
#define INVALID_UTF16 79
|
||||
#define ENCODING_MISMATCH 80
|
||||
#define INVALID_URI 81
|
||||
#define INVALID_NCR 82
|
||||
|
||||
/* accessibility flaws */
|
||||
|
||||
#define BA_MISSING_IMAGE_ALT 1
|
||||
#define BA_MISSING_LINK_ALT 2
|
||||
#define BA_MISSING_SUMMARY 4
|
||||
#define BA_MISSING_IMAGE_MAP 8
|
||||
#define BA_USING_FRAMES 16
|
||||
#define BA_USING_NOFRAMES 32
|
||||
#define BA_INVALID_LINK_NOFRAMES 64 /* WAI [6.5.1.4] */
|
||||
#define BA_WAI (1 << 31)
|
||||
|
||||
/* presentation flaws */
|
||||
|
||||
#define USING_SPACER 1
|
||||
#define USING_LAYER 2
|
||||
#define USING_NOBR 4
|
||||
#define USING_FONT 8
|
||||
#define USING_BODY 16
|
||||
|
||||
#define REPLACED_CHAR 0
|
||||
#define DISCARDED_CHAR 1
|
||||
|
||||
/* badchar bit field */
|
||||
|
||||
#define BC_VENDOR_SPECIFIC_CHARS 1
|
||||
#define BC_INVALID_SGML_CHARS 2
|
||||
#define BC_INVALID_UTF8 4
|
||||
#define BC_INVALID_UTF16 8
|
||||
#define BC_ENCODING_MISMATCH 16 /* fatal error */
|
||||
#define BC_INVALID_URI 32
|
||||
#define BC_INVALID_NCR 64
|
||||
|
||||
#endif /* __MESSAGE_H__ */
|
4408
src/parser.c
Normal file
4408
src/parser.c
Normal file
File diff suppressed because it is too large
Load diff
76
src/parser.h
Normal file
76
src/parser.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
#ifndef __PARSER_H__
|
||||
#define __PARSER_H__
|
||||
|
||||
/* parser.h -- HTML Parser
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/05/30 16:47:31 $
|
||||
$Revision: 1.14 $
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
|
||||
Bool TY_(CheckNodeIntegrity)(Node *node);
|
||||
|
||||
Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
|
||||
|
||||
/*
|
||||
used to determine how attributes
|
||||
without values should be printed
|
||||
this was introduced to deal with
|
||||
user defined tags e.g. Cold Fusion
|
||||
*/
|
||||
Bool TY_(IsNewNode)(Node *node);
|
||||
|
||||
void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
|
||||
|
||||
/* extract a node and its children from a markup tree */
|
||||
Node *TY_(RemoveNode)(Node *node);
|
||||
|
||||
/* remove node from markup tree and discard it */
|
||||
Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
|
||||
|
||||
/* insert node into markup tree as the firt element
|
||||
of content of element */
|
||||
void TY_(InsertNodeAtStart)(Node *element, Node *node);
|
||||
|
||||
/* insert node into markup tree as the last element
|
||||
of content of "element" */
|
||||
void TY_(InsertNodeAtEnd)(Node *element, Node *node);
|
||||
|
||||
/* insert node into markup tree before element */
|
||||
void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
|
||||
|
||||
/* insert node into markup tree after element */
|
||||
void TY_(InsertNodeAfterElement)(Node *element, Node *node);
|
||||
|
||||
Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
|
||||
Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
|
||||
|
||||
|
||||
/* assumes node is a text node */
|
||||
Bool TY_(IsBlank)(Lexer *lexer, Node *node);
|
||||
|
||||
Bool TY_(IsJavaScript)(Node *node);
|
||||
|
||||
/*
|
||||
HTML is the top level element
|
||||
*/
|
||||
void TY_(ParseDocument)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
|
||||
/*
|
||||
XML documents
|
||||
*/
|
||||
Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
|
||||
|
||||
void TY_(ParseXMLDocument)( TidyDocImpl* doc );
|
||||
|
||||
#endif /* __PARSER_H__ */
|
2276
src/pprint.c
Normal file
2276
src/pprint.c
Normal file
File diff suppressed because it is too large
Load diff
93
src/pprint.h
Normal file
93
src/pprint.h
Normal file
|
@ -0,0 +1,93 @@
|
|||
#ifndef __PPRINT_H__
|
||||
#define __PPRINT_H__
|
||||
|
||||
/* pprint.h -- pretty print parse tree
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info:
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/02/11 09:45:08 $
|
||||
$Revision: 1.9 $
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
|
||||
/*
|
||||
Block-level and unknown elements are printed on
|
||||
new lines and their contents indented 2 spaces
|
||||
|
||||
Inline elements are printed inline.
|
||||
|
||||
Inline content is wrapped on spaces (except in
|
||||
attribute values or preformatted text, after
|
||||
start tags and before end tags
|
||||
*/
|
||||
|
||||
#define NORMAL 0u
|
||||
#define PREFORMATTED 1u
|
||||
#define COMMENT 2u
|
||||
#define ATTRIBVALUE 4u
|
||||
#define NOWRAP 8u
|
||||
#define CDATA 16u
|
||||
|
||||
|
||||
/* The pretty printer keeps at most two lines of text in the
|
||||
** buffer before flushing output. We need to capture the
|
||||
** indent state (indent level) at the _beginning_ of _each_
|
||||
** line, not the end of just the second line.
|
||||
**
|
||||
** We must also keep track "In Attribute" and "In String"
|
||||
** states at the _end_ of each line,
|
||||
*/
|
||||
|
||||
typedef struct _TidyIndent
|
||||
{
|
||||
int spaces;
|
||||
int attrValStart;
|
||||
int attrStringStart;
|
||||
} TidyIndent;
|
||||
|
||||
typedef struct _TidyPrintImpl
|
||||
{
|
||||
TidyAllocator *allocator; /* Allocator */
|
||||
|
||||
uint *linebuf;
|
||||
uint lbufsize;
|
||||
uint linelen;
|
||||
uint wraphere;
|
||||
|
||||
uint ixInd;
|
||||
TidyIndent indent[2]; /* Two lines worth of indent state */
|
||||
} TidyPrintImpl;
|
||||
|
||||
|
||||
#if 0 && SUPPORT_ASIAN_ENCODINGS
|
||||
/* #431953 - start RJ Wraplen adjusted for smooth international ride */
|
||||
uint CWrapLen( TidyDocImpl* doc, uint ind );
|
||||
#endif
|
||||
|
||||
void TY_(InitPrintBuf)( TidyDocImpl* doc );
|
||||
void TY_(FreePrintBuf)( TidyDocImpl* doc );
|
||||
|
||||
void TY_(PFlushLine)( TidyDocImpl* doc, uint indent );
|
||||
|
||||
|
||||
/* print just the content of the body element.
|
||||
** useful when you want to reuse material from
|
||||
** other documents.
|
||||
**
|
||||
** -- Sebastiano Vigna <vigna@dsi.unimi.it>
|
||||
*/
|
||||
|
||||
void TY_(PrintBody)( TidyDocImpl* doc ); /* you can print an entire document */
|
||||
/* node as body using PPrintTree() */
|
||||
|
||||
void TY_(PPrintTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
|
||||
|
||||
void TY_(PPrintXMLTree)( TidyDocImpl* doc, uint mode, uint indent, Node *node );
|
||||
|
||||
|
||||
#endif /* __PPRINT_H__ */
|
1407
src/streamio.c
Normal file
1407
src/streamio.c
Normal file
File diff suppressed because it is too large
Load diff
222
src/streamio.h
Normal file
222
src/streamio.h
Normal file
|
@ -0,0 +1,222 @@
|
|||
#ifndef __STREAMIO_H__
|
||||
#define __STREAMIO_H__
|
||||
|
||||
/* streamio.h -- handles character stream I/O
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/07/22 09:33:26 $
|
||||
$Revision: 1.21 $
|
||||
|
||||
Wrapper around Tidy input source and output sink
|
||||
that calls appropriate interfaces, and applies
|
||||
necessary char encoding transformations: to/from
|
||||
ISO-10646 and/or UTF-8.
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
#include "buffio.h"
|
||||
#include "fileio.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
typedef enum
|
||||
{
|
||||
FileIO,
|
||||
BufferIO,
|
||||
UserIO
|
||||
} IOType;
|
||||
|
||||
/* states for ISO 2022
|
||||
|
||||
A document in ISO-2022 based encoding uses some ESC sequences called
|
||||
"designator" to switch character sets. The designators defined and
|
||||
used in ISO-2022-JP are:
|
||||
|
||||
"ESC" + "(" + ? for ISO646 variants
|
||||
|
||||
"ESC" + "$" + ? and
|
||||
"ESC" + "$" + "(" + ? for multibyte character sets
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
FSM_ASCII,
|
||||
FSM_ESC,
|
||||
FSM_ESCD,
|
||||
FSM_ESCDP,
|
||||
FSM_ESCP,
|
||||
FSM_NONASCII
|
||||
} ISO2022State;
|
||||
|
||||
/************************
|
||||
** Source
|
||||
************************/
|
||||
|
||||
enum
|
||||
{
|
||||
CHARBUF_SIZE=5,
|
||||
LASTPOS_SIZE=64
|
||||
};
|
||||
|
||||
/* non-raw input is cleaned up*/
|
||||
struct _StreamIn
|
||||
{
|
||||
ISO2022State state; /* FSM for ISO2022 */
|
||||
Bool pushed;
|
||||
TidyAllocator *allocator;
|
||||
tchar* charbuf;
|
||||
uint bufpos;
|
||||
uint bufsize;
|
||||
int tabs;
|
||||
int lastcols[LASTPOS_SIZE];
|
||||
unsigned short curlastpos; /* current last position in lastcols */
|
||||
unsigned short firstlastpos; /* first valid last position in lastcols */
|
||||
int curcol;
|
||||
int curline;
|
||||
int encoding;
|
||||
IOType iotype;
|
||||
|
||||
TidyInputSource source;
|
||||
|
||||
#ifdef TIDY_WIN32_MLANG_SUPPORT
|
||||
void* mlang;
|
||||
#endif
|
||||
|
||||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
tmbstr otextbuf;
|
||||
size_t otextsize;
|
||||
uint otextlen;
|
||||
#endif
|
||||
|
||||
/* Pointer back to document for error reporting */
|
||||
TidyDocImpl* doc;
|
||||
};
|
||||
|
||||
StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
|
||||
void TY_(freeStreamIn)(StreamIn* in);
|
||||
|
||||
StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
|
||||
StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
|
||||
StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
|
||||
|
||||
int TY_(ReadBOMEncoding)(StreamIn *in);
|
||||
uint TY_(ReadChar)( StreamIn* in );
|
||||
void TY_(UngetChar)( uint c, StreamIn* in );
|
||||
Bool TY_(IsEOF)( StreamIn* in );
|
||||
|
||||
|
||||
/************************
|
||||
** Sink
|
||||
************************/
|
||||
|
||||
struct _StreamOut
|
||||
{
|
||||
int encoding;
|
||||
ISO2022State state; /* for ISO 2022 */
|
||||
uint nl;
|
||||
|
||||
#ifdef TIDY_WIN32_MLANG_SUPPORT
|
||||
void* mlang;
|
||||
#endif
|
||||
|
||||
IOType iotype;
|
||||
TidyOutputSink sink;
|
||||
};
|
||||
|
||||
StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
|
||||
StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
|
||||
StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
|
||||
|
||||
StreamOut* TY_(StdErrOutput)(void);
|
||||
/* StreamOut* StdOutOutput(void); */
|
||||
void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
|
||||
|
||||
void TY_(WriteChar)( uint c, StreamOut* out );
|
||||
void TY_(outBOM)( StreamOut *out );
|
||||
|
||||
ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
|
||||
ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
|
||||
int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
|
||||
|
||||
/************************
|
||||
** Misc
|
||||
************************/
|
||||
|
||||
/* character encodings
|
||||
*/
|
||||
#define RAW 0
|
||||
#define ASCII 1
|
||||
#define LATIN0 2
|
||||
#define LATIN1 3
|
||||
#define UTF8 4
|
||||
#define ISO2022 5
|
||||
#define MACROMAN 6
|
||||
#define WIN1252 7
|
||||
#define IBM858 8
|
||||
|
||||
#if SUPPORT_UTF16_ENCODINGS
|
||||
#define UTF16LE 9
|
||||
#define UTF16BE 10
|
||||
#define UTF16 11
|
||||
#endif
|
||||
|
||||
/* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints
|
||||
** (i.e., to Unicode) before being recoded into UTF-8. This may be
|
||||
** confusing: usually UTF-8 implies ISO10646 codepoints.
|
||||
*/
|
||||
#if SUPPORT_ASIAN_ENCODINGS
|
||||
#if SUPPORT_UTF16_ENCODINGS
|
||||
#define BIG5 12
|
||||
#define SHIFTJIS 13
|
||||
#else
|
||||
#define BIG5 9
|
||||
#define SHIFTJIS 10
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef TIDY_WIN32_MLANG_SUPPORT
|
||||
/* hack: windows code page numbers start at 37 */
|
||||
#define WIN32MLANG 36
|
||||
#endif
|
||||
|
||||
|
||||
/* char encoding used when replacing illegal SGML chars,
|
||||
** regardless of specified encoding. Set at compile time
|
||||
** to either Windows or Mac.
|
||||
*/
|
||||
extern const int TY_(ReplacementCharEncoding);
|
||||
|
||||
/* Function for conversion from Windows-1252 to Unicode */
|
||||
uint TY_(DecodeWin1252)(uint c);
|
||||
|
||||
/* Function to convert from MacRoman to Unicode */
|
||||
uint TY_(DecodeMacRoman)(uint c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Use numeric constants as opposed to escape chars (\r, \n)
|
||||
** to avoid conflict Mac compilers that may re-define these.
|
||||
*/
|
||||
#define CR 0xD
|
||||
#define LF 0xA
|
||||
|
||||
#if defined(MAC_OS_CLASSIC)
|
||||
#define DEFAULT_NL_CONFIG TidyCR
|
||||
#elif defined(_WIN32) || defined(OS2_OS)
|
||||
#define DEFAULT_NL_CONFIG TidyCRLF
|
||||
#else
|
||||
#define DEFAULT_NL_CONFIG TidyLF
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __STREAMIO_H__ */
|
286
src/tagask.c
Normal file
286
src/tagask.c
Normal file
|
@ -0,0 +1,286 @@
|
|||
/* tagask.c -- Interrogate node type
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.6 $
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy-int.h"
|
||||
#include "tags.h"
|
||||
#include "tidy.h"
|
||||
|
||||
Bool TIDY_CALL tidyNodeIsText( TidyNode tnod )
|
||||
{ return TY_(nodeIsText)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeCMIsBlock( TidyNode tnod ); /* not exported yet */
|
||||
Bool tidyNodeCMIsBlock( TidyNode tnod )
|
||||
{ return TY_(nodeCMIsBlock)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeCMIsInline( TidyNode tnod ); /* not exported yet */
|
||||
Bool tidyNodeCMIsInline( TidyNode tnod )
|
||||
{ return TY_(nodeCMIsInline)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool tidyNodeCMIsEmpty( TidyNode tnod ); /* not exported yet */
|
||||
Bool tidyNodeCMIsEmpty( TidyNode tnod )
|
||||
{ return TY_(nodeCMIsEmpty)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsHeader( TidyNode tnod )
|
||||
{ return TY_(nodeIsHeader)( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
|
||||
Bool TIDY_CALL tidyNodeIsHTML( TidyNode tnod )
|
||||
{ return nodeIsHTML( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsHEAD( TidyNode tnod )
|
||||
{ return nodeIsHEAD( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsTITLE( TidyNode tnod )
|
||||
{ return nodeIsTITLE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBASE( TidyNode tnod )
|
||||
{ return nodeIsBASE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsMETA( TidyNode tnod )
|
||||
{ return nodeIsMETA( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBODY( TidyNode tnod )
|
||||
{ return nodeIsBODY( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsFRAMESET( TidyNode tnod )
|
||||
{ return nodeIsFRAMESET( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsFRAME( TidyNode tnod )
|
||||
{ return nodeIsFRAME( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsIFRAME( TidyNode tnod )
|
||||
{ return nodeIsIFRAME( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsNOFRAMES( TidyNode tnod )
|
||||
{ return nodeIsNOFRAMES( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsHR( TidyNode tnod )
|
||||
{ return nodeIsHR( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsH1( TidyNode tnod )
|
||||
{ return nodeIsH1( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsH2( TidyNode tnod )
|
||||
{ return nodeIsH2( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsPRE( TidyNode tnod )
|
||||
{ return nodeIsPRE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsLISTING( TidyNode tnod )
|
||||
{ return nodeIsLISTING( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsP( TidyNode tnod )
|
||||
{ return nodeIsP( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsUL( TidyNode tnod )
|
||||
{ return nodeIsUL( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsOL( TidyNode tnod )
|
||||
{ return nodeIsOL( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsDL( TidyNode tnod )
|
||||
{ return nodeIsDL( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsDIR( TidyNode tnod )
|
||||
{ return nodeIsDIR( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsLI( TidyNode tnod )
|
||||
{ return nodeIsLI( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsDT( TidyNode tnod )
|
||||
{ return nodeIsDT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsDD( TidyNode tnod )
|
||||
{ return nodeIsDD( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsTABLE( TidyNode tnod )
|
||||
{ return nodeIsTABLE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsCAPTION( TidyNode tnod )
|
||||
{ return nodeIsCAPTION( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsTD( TidyNode tnod )
|
||||
{ return nodeIsTD( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsTH( TidyNode tnod )
|
||||
{ return nodeIsTH( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsTR( TidyNode tnod )
|
||||
{ return nodeIsTR( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsCOL( TidyNode tnod )
|
||||
{ return nodeIsCOL( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsCOLGROUP( TidyNode tnod )
|
||||
{ return nodeIsCOLGROUP( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBR( TidyNode tnod )
|
||||
{ return nodeIsBR( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsA( TidyNode tnod )
|
||||
{ return nodeIsA( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsLINK( TidyNode tnod )
|
||||
{ return nodeIsLINK( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsB( TidyNode tnod )
|
||||
{ return nodeIsB( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsI( TidyNode tnod )
|
||||
{ return nodeIsI( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSTRONG( TidyNode tnod )
|
||||
{ return nodeIsSTRONG( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsEM( TidyNode tnod )
|
||||
{ return nodeIsEM( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBIG( TidyNode tnod )
|
||||
{ return nodeIsBIG( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSMALL( TidyNode tnod )
|
||||
{ return nodeIsSMALL( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsPARAM( TidyNode tnod )
|
||||
{ return nodeIsPARAM( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsOPTION( TidyNode tnod )
|
||||
{ return nodeIsOPTION( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsOPTGROUP( TidyNode tnod )
|
||||
{ return nodeIsOPTGROUP( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsIMG( TidyNode tnod )
|
||||
{ return nodeIsIMG( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsMAP( TidyNode tnod )
|
||||
{ return nodeIsMAP( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsAREA( TidyNode tnod )
|
||||
{ return nodeIsAREA( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsNOBR( TidyNode tnod )
|
||||
{ return nodeIsNOBR( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsWBR( TidyNode tnod )
|
||||
{ return nodeIsWBR( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsFONT( TidyNode tnod )
|
||||
{ return nodeIsFONT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsLAYER( TidyNode tnod )
|
||||
{ return nodeIsLAYER( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSPACER( TidyNode tnod )
|
||||
{ return nodeIsSPACER( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsCENTER( TidyNode tnod )
|
||||
{ return nodeIsCENTER( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSTYLE( TidyNode tnod )
|
||||
{ return nodeIsSTYLE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSCRIPT( TidyNode tnod )
|
||||
{ return nodeIsSCRIPT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsNOSCRIPT( TidyNode tnod )
|
||||
{ return nodeIsNOSCRIPT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsFORM( TidyNode tnod )
|
||||
{ return nodeIsFORM( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsTEXTAREA( TidyNode tnod )
|
||||
{ return nodeIsTEXTAREA( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBLOCKQUOTE( TidyNode tnod )
|
||||
{ return nodeIsBLOCKQUOTE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsAPPLET( TidyNode tnod )
|
||||
{ return nodeIsAPPLET( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsOBJECT( TidyNode tnod )
|
||||
{ return nodeIsOBJECT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsDIV( TidyNode tnod )
|
||||
{ return nodeIsDIV( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSPAN( TidyNode tnod )
|
||||
{ return nodeIsSPAN( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsINPUT( TidyNode tnod )
|
||||
{ return nodeIsINPUT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsQ( TidyNode tnod )
|
||||
{ return nodeIsQ( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsLABEL( TidyNode tnod )
|
||||
{ return nodeIsLABEL( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsH3( TidyNode tnod )
|
||||
{ return nodeIsH3( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsH4( TidyNode tnod )
|
||||
{ return nodeIsH4( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsH5( TidyNode tnod )
|
||||
{ return nodeIsH5( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsH6( TidyNode tnod )
|
||||
{ return nodeIsH6( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsADDRESS( TidyNode tnod )
|
||||
{ return nodeIsADDRESS( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsXMP( TidyNode tnod )
|
||||
{ return nodeIsXMP( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSELECT( TidyNode tnod )
|
||||
{ return nodeIsSELECT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBLINK( TidyNode tnod )
|
||||
{ return nodeIsBLINK( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsMARQUEE( TidyNode tnod )
|
||||
{ return nodeIsMARQUEE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsEMBED( TidyNode tnod )
|
||||
{ return nodeIsEMBED( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsBASEFONT( TidyNode tnod )
|
||||
{ return nodeIsBASEFONT( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsISINDEX( TidyNode tnod )
|
||||
{ return nodeIsISINDEX( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsS( TidyNode tnod )
|
||||
{ return nodeIsS( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod )
|
||||
{ return nodeIsSTRIKE( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsU( TidyNode tnod )
|
||||
{ return nodeIsU( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod )
|
||||
{ return nodeIsMENU( tidyNodeToImpl(tnod) );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
1004
src/tags.c
Normal file
1004
src/tags.c
Normal file
File diff suppressed because it is too large
Load diff
235
src/tags.h
Normal file
235
src/tags.h
Normal file
|
@ -0,0 +1,235 @@
|
|||
#ifndef __TAGS_H__
|
||||
#define __TAGS_H__
|
||||
|
||||
/* tags.h -- recognize HTML tags
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/12/15 10:17:55 $
|
||||
$Revision: 1.20 $
|
||||
|
||||
The HTML tags are stored as 8 bit ASCII strings.
|
||||
Use lookupw() to find a tag given a wide char string.
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
#include "attrdict.h"
|
||||
|
||||
typedef void (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
|
||||
typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node );
|
||||
|
||||
/*
|
||||
Tag dictionary node
|
||||
*/
|
||||
|
||||
/* types of tags that the user can define */
|
||||
typedef enum
|
||||
{
|
||||
tagtype_null = 0,
|
||||
tagtype_empty = 1,
|
||||
tagtype_inline = 2,
|
||||
tagtype_block = 4,
|
||||
tagtype_pre = 8
|
||||
} UserTagType;
|
||||
|
||||
struct _Dict
|
||||
{
|
||||
TidyTagId id;
|
||||
tmbstr name;
|
||||
uint versions;
|
||||
AttrVersion const * attrvers;
|
||||
uint model;
|
||||
Parser* parser;
|
||||
CheckAttribs* chkattrs;
|
||||
Dict* next;
|
||||
};
|
||||
|
||||
#if !defined(ELEMENT_HASH_LOOKUP)
|
||||
#define ELEMENT_HASH_LOOKUP 1
|
||||
#endif
|
||||
|
||||
#if ELEMENT_HASH_LOOKUP
|
||||
enum
|
||||
{
|
||||
ELEMENT_HASH_SIZE=178u
|
||||
};
|
||||
|
||||
struct _DictHash
|
||||
{
|
||||
Dict const* tag;
|
||||
struct _DictHash* next;
|
||||
};
|
||||
|
||||
typedef struct _DictHash DictHash;
|
||||
#endif
|
||||
|
||||
struct _TidyTagImpl
|
||||
{
|
||||
Dict* xml_tags; /* placeholder for all xml tags */
|
||||
Dict* declared_tag_list; /* User declared tags */
|
||||
#if ELEMENT_HASH_LOOKUP
|
||||
DictHash* hashtab[ELEMENT_HASH_SIZE];
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct _TidyTagImpl TidyTagImpl;
|
||||
|
||||
/* interface for finding tag by name */
|
||||
const Dict* TY_(LookupTagDef)( TidyTagId tid );
|
||||
Bool TY_(FindTag)( TidyDocImpl* doc, Node *node );
|
||||
Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node );
|
||||
void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name );
|
||||
void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType ); /* tagtype_null to free all */
|
||||
|
||||
TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc );
|
||||
ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
|
||||
TidyIterator* iter );
|
||||
|
||||
void TY_(InitTags)( TidyDocImpl* doc );
|
||||
void TY_(FreeTags)( TidyDocImpl* doc );
|
||||
|
||||
|
||||
/* Parser methods for tags */
|
||||
|
||||
Parser TY_(ParseHTML);
|
||||
Parser TY_(ParseHead);
|
||||
Parser TY_(ParseTitle);
|
||||
Parser TY_(ParseScript);
|
||||
Parser TY_(ParseFrameSet);
|
||||
Parser TY_(ParseNoFrames);
|
||||
Parser TY_(ParseBody);
|
||||
Parser TY_(ParsePre);
|
||||
Parser TY_(ParseList);
|
||||
Parser TY_(ParseDefList);
|
||||
Parser TY_(ParseBlock);
|
||||
Parser TY_(ParseInline);
|
||||
Parser TY_(ParseEmpty);
|
||||
Parser TY_(ParseTableTag);
|
||||
Parser TY_(ParseColGroup);
|
||||
Parser TY_(ParseRowGroup);
|
||||
Parser TY_(ParseRow);
|
||||
Parser TY_(ParseSelect);
|
||||
Parser TY_(ParseOptGroup);
|
||||
Parser TY_(ParseText);
|
||||
|
||||
CheckAttribs TY_(CheckAttributes);
|
||||
|
||||
/* 0 == TidyTag_UNKNOWN */
|
||||
#define TagId(node) ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
|
||||
#define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
|
||||
|
||||
Bool TY_(nodeIsText)( Node* node );
|
||||
Bool TY_(nodeIsElement)( Node* node );
|
||||
|
||||
Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
|
||||
|
||||
#if 0
|
||||
/* Compare & result to operand. If equal, then all bits
|
||||
** requested are set.
|
||||
*/
|
||||
Bool nodeMatchCM( Node* node, uint contentModel );
|
||||
#endif
|
||||
|
||||
/* True if any of the bits requested are set.
|
||||
*/
|
||||
Bool TY_(nodeHasCM)( Node* node, uint contentModel );
|
||||
|
||||
Bool TY_(nodeCMIsBlock)( Node* node );
|
||||
Bool TY_(nodeCMIsInline)( Node* node );
|
||||
Bool TY_(nodeCMIsEmpty)( Node* node );
|
||||
|
||||
|
||||
Bool TY_(nodeIsHeader)( Node* node ); /* H1, H2, ..., H6 */
|
||||
uint TY_(nodeHeaderLevel)( Node* node ); /* 1, 2, ..., 6 */
|
||||
|
||||
#define nodeIsHTML( node ) TagIsId( node, TidyTag_HTML )
|
||||
#define nodeIsHEAD( node ) TagIsId( node, TidyTag_HEAD )
|
||||
#define nodeIsTITLE( node ) TagIsId( node, TidyTag_TITLE )
|
||||
#define nodeIsBASE( node ) TagIsId( node, TidyTag_BASE )
|
||||
#define nodeIsMETA( node ) TagIsId( node, TidyTag_META )
|
||||
#define nodeIsBODY( node ) TagIsId( node, TidyTag_BODY )
|
||||
#define nodeIsFRAMESET( node ) TagIsId( node, TidyTag_FRAMESET )
|
||||
#define nodeIsFRAME( node ) TagIsId( node, TidyTag_FRAME )
|
||||
#define nodeIsIFRAME( node ) TagIsId( node, TidyTag_IFRAME )
|
||||
#define nodeIsNOFRAMES( node ) TagIsId( node, TidyTag_NOFRAMES )
|
||||
#define nodeIsHR( node ) TagIsId( node, TidyTag_HR )
|
||||
#define nodeIsH1( node ) TagIsId( node, TidyTag_H1 )
|
||||
#define nodeIsH2( node ) TagIsId( node, TidyTag_H2 )
|
||||
#define nodeIsPRE( node ) TagIsId( node, TidyTag_PRE )
|
||||
#define nodeIsLISTING( node ) TagIsId( node, TidyTag_LISTING )
|
||||
#define nodeIsP( node ) TagIsId( node, TidyTag_P )
|
||||
#define nodeIsUL( node ) TagIsId( node, TidyTag_UL )
|
||||
#define nodeIsOL( node ) TagIsId( node, TidyTag_OL )
|
||||
#define nodeIsDL( node ) TagIsId( node, TidyTag_DL )
|
||||
#define nodeIsDIR( node ) TagIsId( node, TidyTag_DIR )
|
||||
#define nodeIsLI( node ) TagIsId( node, TidyTag_LI )
|
||||
#define nodeIsDT( node ) TagIsId( node, TidyTag_DT )
|
||||
#define nodeIsDD( node ) TagIsId( node, TidyTag_DD )
|
||||
#define nodeIsTABLE( node ) TagIsId( node, TidyTag_TABLE )
|
||||
#define nodeIsCAPTION( node ) TagIsId( node, TidyTag_CAPTION )
|
||||
#define nodeIsTD( node ) TagIsId( node, TidyTag_TD )
|
||||
#define nodeIsTH( node ) TagIsId( node, TidyTag_TH )
|
||||
#define nodeIsTR( node ) TagIsId( node, TidyTag_TR )
|
||||
#define nodeIsCOL( node ) TagIsId( node, TidyTag_COL )
|
||||
#define nodeIsCOLGROUP( node ) TagIsId( node, TidyTag_COLGROUP )
|
||||
#define nodeIsBR( node ) TagIsId( node, TidyTag_BR )
|
||||
#define nodeIsA( node ) TagIsId( node, TidyTag_A )
|
||||
#define nodeIsLINK( node ) TagIsId( node, TidyTag_LINK )
|
||||
#define nodeIsB( node ) TagIsId( node, TidyTag_B )
|
||||
#define nodeIsI( node ) TagIsId( node, TidyTag_I )
|
||||
#define nodeIsSTRONG( node ) TagIsId( node, TidyTag_STRONG )
|
||||
#define nodeIsEM( node ) TagIsId( node, TidyTag_EM )
|
||||
#define nodeIsBIG( node ) TagIsId( node, TidyTag_BIG )
|
||||
#define nodeIsSMALL( node ) TagIsId( node, TidyTag_SMALL )
|
||||
#define nodeIsPARAM( node ) TagIsId( node, TidyTag_PARAM )
|
||||
#define nodeIsOPTION( node ) TagIsId( node, TidyTag_OPTION )
|
||||
#define nodeIsOPTGROUP( node ) TagIsId( node, TidyTag_OPTGROUP )
|
||||
#define nodeIsIMG( node ) TagIsId( node, TidyTag_IMG )
|
||||
#define nodeIsMAP( node ) TagIsId( node, TidyTag_MAP )
|
||||
#define nodeIsAREA( node ) TagIsId( node, TidyTag_AREA )
|
||||
#define nodeIsNOBR( node ) TagIsId( node, TidyTag_NOBR )
|
||||
#define nodeIsWBR( node ) TagIsId( node, TidyTag_WBR )
|
||||
#define nodeIsFONT( node ) TagIsId( node, TidyTag_FONT )
|
||||
#define nodeIsLAYER( node ) TagIsId( node, TidyTag_LAYER )
|
||||
#define nodeIsSPACER( node ) TagIsId( node, TidyTag_SPACER )
|
||||
#define nodeIsCENTER( node ) TagIsId( node, TidyTag_CENTER )
|
||||
#define nodeIsSTYLE( node ) TagIsId( node, TidyTag_STYLE )
|
||||
#define nodeIsSCRIPT( node ) TagIsId( node, TidyTag_SCRIPT )
|
||||
#define nodeIsNOSCRIPT( node ) TagIsId( node, TidyTag_NOSCRIPT )
|
||||
#define nodeIsFORM( node ) TagIsId( node, TidyTag_FORM )
|
||||
#define nodeIsTEXTAREA( node ) TagIsId( node, TidyTag_TEXTAREA )
|
||||
#define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE )
|
||||
#define nodeIsAPPLET( node ) TagIsId( node, TidyTag_APPLET )
|
||||
#define nodeIsOBJECT( node ) TagIsId( node, TidyTag_OBJECT )
|
||||
#define nodeIsDIV( node ) TagIsId( node, TidyTag_DIV )
|
||||
#define nodeIsSPAN( node ) TagIsId( node, TidyTag_SPAN )
|
||||
#define nodeIsINPUT( node ) TagIsId( node, TidyTag_INPUT )
|
||||
#define nodeIsQ( node ) TagIsId( node, TidyTag_Q )
|
||||
#define nodeIsLABEL( node ) TagIsId( node, TidyTag_LABEL )
|
||||
#define nodeIsH3( node ) TagIsId( node, TidyTag_H3 )
|
||||
#define nodeIsH4( node ) TagIsId( node, TidyTag_H4 )
|
||||
#define nodeIsH5( node ) TagIsId( node, TidyTag_H5 )
|
||||
#define nodeIsH6( node ) TagIsId( node, TidyTag_H6 )
|
||||
#define nodeIsADDRESS( node ) TagIsId( node, TidyTag_ADDRESS )
|
||||
#define nodeIsXMP( node ) TagIsId( node, TidyTag_XMP )
|
||||
#define nodeIsSELECT( node ) TagIsId( node, TidyTag_SELECT )
|
||||
#define nodeIsBLINK( node ) TagIsId( node, TidyTag_BLINK )
|
||||
#define nodeIsMARQUEE( node ) TagIsId( node, TidyTag_MARQUEE )
|
||||
#define nodeIsEMBED( node ) TagIsId( node, TidyTag_EMBED )
|
||||
#define nodeIsBASEFONT( node ) TagIsId( node, TidyTag_BASEFONT )
|
||||
#define nodeIsISINDEX( node ) TagIsId( node, TidyTag_ISINDEX )
|
||||
#define nodeIsS( node ) TagIsId( node, TidyTag_S )
|
||||
#define nodeIsSTRIKE( node ) TagIsId( node, TidyTag_STRIKE )
|
||||
#define nodeIsSUB( node ) TagIsId( node, TidyTag_SUB )
|
||||
#define nodeIsSUP( node ) TagIsId( node, TidyTag_SUP )
|
||||
#define nodeIsU( node ) TagIsId( node, TidyTag_U )
|
||||
#define nodeIsMENU( node ) TagIsId( node, TidyTag_MENU )
|
||||
#define nodeIsBUTTON( node ) TagIsId( node, TidyTag_BUTTON )
|
||||
|
||||
|
||||
#endif /* __TAGS_H__ */
|
129
src/tidy-int.h
Normal file
129
src/tidy-int.h
Normal file
|
@ -0,0 +1,129 @@
|
|||
#ifndef __TIDY_INT_H__
|
||||
#define __TIDY_INT_H__
|
||||
|
||||
/* tidy-int.h -- internal library declarations
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/02/11 09:45:52 $
|
||||
$Revision: 1.13 $
|
||||
|
||||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
#include "config.h"
|
||||
#include "lexer.h"
|
||||
#include "tags.h"
|
||||
#include "attrs.h"
|
||||
#include "pprint.h"
|
||||
#include "access.h"
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a,b) (((a) > (b))?(a):(b))
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN(a,b) (((a) < (b))?(a):(b))
|
||||
#endif
|
||||
|
||||
struct _TidyDocImpl
|
||||
{
|
||||
/* The Document Tree (and backing store buffer) */
|
||||
Node root; /* This MUST remain the first declared
|
||||
variable in this structure */
|
||||
Lexer* lexer;
|
||||
|
||||
/* Config + Markup Declarations */
|
||||
TidyConfigImpl config;
|
||||
TidyTagImpl tags;
|
||||
TidyAttribImpl attribs;
|
||||
|
||||
#if SUPPORT_ACCESSIBILITY_CHECKS
|
||||
/* Accessibility Checks state */
|
||||
TidyAccessImpl access;
|
||||
#endif
|
||||
|
||||
/* The Pretty Print buffer */
|
||||
TidyPrintImpl pprint;
|
||||
|
||||
/* I/O */
|
||||
StreamIn* docIn;
|
||||
StreamOut* docOut;
|
||||
StreamOut* errout;
|
||||
TidyReportFilter mssgFilt;
|
||||
TidyOptCallback pOptCallback;
|
||||
|
||||
/* Parse + Repair Results */
|
||||
uint optionErrors;
|
||||
uint errors;
|
||||
uint warnings;
|
||||
uint accessErrors;
|
||||
uint infoMessages;
|
||||
uint docErrors;
|
||||
int parseStatus;
|
||||
|
||||
uint badAccess; /* for accessibility errors */
|
||||
uint badLayout; /* for bad style errors */
|
||||
uint badChars; /* for bad char encodings */
|
||||
uint badForm; /* for badly placed form tags */
|
||||
|
||||
/* Memory allocator */
|
||||
TidyAllocator* allocator;
|
||||
|
||||
/* Miscellaneous */
|
||||
void* appData;
|
||||
uint nClassId;
|
||||
Bool inputHadBOM;
|
||||
|
||||
#ifdef TIDY_STORE_ORIGINAL_TEXT
|
||||
Bool storeText;
|
||||
#endif
|
||||
|
||||
#if PRESERVE_FILE_TIMES
|
||||
struct utimbuf filetimes;
|
||||
#endif
|
||||
tmbstr givenDoctype;
|
||||
};
|
||||
|
||||
|
||||
/* Twizzle internal/external types */
|
||||
#ifdef NEVER
|
||||
TidyDocImpl* tidyDocToImpl( TidyDoc tdoc );
|
||||
TidyDoc tidyImplToDoc( TidyDocImpl* impl );
|
||||
|
||||
Node* tidyNodeToImpl( TidyNode tnod );
|
||||
TidyNode tidyImplToNode( Node* node );
|
||||
|
||||
AttVal* tidyAttrToImpl( TidyAttr tattr );
|
||||
TidyAttr tidyImplToAttr( AttVal* attval );
|
||||
|
||||
const TidyOptionImpl* tidyOptionToImpl( TidyOption topt );
|
||||
TidyOption tidyImplToOption( const TidyOptionImpl* option );
|
||||
#else
|
||||
|
||||
#define tidyDocToImpl( tdoc ) ((TidyDocImpl*)(tdoc))
|
||||
#define tidyImplToDoc( doc ) ((TidyDoc)(doc))
|
||||
|
||||
#define tidyNodeToImpl( tnod ) ((Node*)(tnod))
|
||||
#define tidyImplToNode( node ) ((TidyNode)(node))
|
||||
|
||||
#define tidyAttrToImpl( tattr ) ((AttVal*)(tattr))
|
||||
#define tidyImplToAttr( attval ) ((TidyAttr)(attval))
|
||||
|
||||
#define tidyOptionToImpl( topt ) ((const TidyOptionImpl*)(topt))
|
||||
#define tidyImplToOption( option ) ((TidyOption)(option))
|
||||
|
||||
#endif
|
||||
|
||||
/** Wrappers for easy memory allocation using the document's allocator */
|
||||
#define TidyDocAlloc(doc, size) TidyAlloc((doc)->allocator, size)
|
||||
#define TidyDocRealloc(doc, block, size) TidyRealloc((doc)->allocator, block, size)
|
||||
#define TidyDocFree(doc, block) TidyFree((doc)->allocator, block)
|
||||
#define TidyDocPanic(doc, msg) TidyPanic((doc)->allocator, msg)
|
||||
|
||||
int TY_(DocParseStream)( TidyDocImpl* impl, StreamIn* in );
|
||||
|
||||
#endif /* __TIDY_INT_H__ */
|
1751
src/tidylib.c
Normal file
1751
src/tidylib.c
Normal file
File diff suppressed because it is too large
Load diff
306
src/tmbstr.c
Normal file
306
src/tmbstr.c
Normal file
|
@ -0,0 +1,306 @@
|
|||
/* tmbstr.c -- Tidy string utility functions
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/12/29 16:31:08 $
|
||||
$Revision: 1.13 $
|
||||
|
||||
*/
|
||||
|
||||
#include "forward.h"
|
||||
#include "tmbstr.h"
|
||||
#include "lexer.h"
|
||||
|
||||
/* like strdup but using an allocator */
|
||||
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str )
|
||||
{
|
||||
tmbstr s = NULL;
|
||||
if ( str )
|
||||
{
|
||||
uint len = TY_(tmbstrlen)( str );
|
||||
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
|
||||
while ( 0 != (*cp++ = *str++) )
|
||||
/**/;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* like strndup but using an allocator */
|
||||
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len )
|
||||
{
|
||||
tmbstr s = NULL;
|
||||
if ( str && len > 0 )
|
||||
{
|
||||
tmbstr cp = s = (tmbstr) TidyAlloc( allocator, 1+len );
|
||||
while ( len-- > 0 && (*cp++ = *str++) )
|
||||
/**/;
|
||||
*cp = 0;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/* exactly same as strncpy */
|
||||
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size )
|
||||
{
|
||||
if ( s1 != NULL && s2 != NULL )
|
||||
{
|
||||
tmbstr cp = s1;
|
||||
while ( *s2 && --size ) /* Predecrement: reserve byte */
|
||||
*cp++ = *s2++; /* for NULL terminator. */
|
||||
*cp = 0;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/* Allows expressions like: cp += tmbstrcpy( cp, "joebob" );
|
||||
*/
|
||||
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint ncpy = 0;
|
||||
while (0 != (*s1++ = *s2++) )
|
||||
++ncpy;
|
||||
return ncpy;
|
||||
}
|
||||
|
||||
/* Allows expressions like: cp += tmbstrcat( cp, "joebob" );
|
||||
*/
|
||||
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint ncpy = 0;
|
||||
while ( *s1 )
|
||||
++s1;
|
||||
|
||||
while (0 != (*s1++ = *s2++) )
|
||||
++ncpy;
|
||||
return ncpy;
|
||||
}
|
||||
|
||||
/* exactly same as strcmp */
|
||||
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
int c;
|
||||
while ((c = *s1) == *s2)
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
}
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
/* returns byte count, not char count */
|
||||
uint TY_(tmbstrlen)( ctmbstr str )
|
||||
{
|
||||
uint len = 0;
|
||||
if ( str )
|
||||
{
|
||||
while ( *str++ )
|
||||
++len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
MS C 4.2 doesn't include strcasecmp.
|
||||
Note that tolower and toupper won't
|
||||
work on chars > 127.
|
||||
|
||||
Neither does ToLower()!
|
||||
*/
|
||||
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint c;
|
||||
|
||||
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
}
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n )
|
||||
{
|
||||
uint c;
|
||||
|
||||
while ((c = (byte)*s1) == (byte)*s2)
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
--n;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n )
|
||||
{
|
||||
uint c;
|
||||
|
||||
while (c = (uint)(*s1), TY_(ToLower)(c) == TY_(ToLower)((uint)(*s2)))
|
||||
{
|
||||
if (c == '\0')
|
||||
return 0;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
++s1;
|
||||
++s2;
|
||||
--n;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
return (*s1 > *s2 ? 1 : -1);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* return offset of cc from beginning of s1,
|
||||
** -1 if not found.
|
||||
*/
|
||||
int TY_(tmbstrnchr)( ctmbstr s1, uint maxlen, tmbchar cc )
|
||||
{
|
||||
int i;
|
||||
ctmbstr cp = s1;
|
||||
|
||||
for ( i = 0; (uint)i < maxlen; ++i, ++cp )
|
||||
{
|
||||
if ( *cp == cc )
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 )
|
||||
{
|
||||
uint len2 = TY_(tmbstrlen)(s2);
|
||||
int ix, diff = len1 - len2;
|
||||
|
||||
for ( ix = 0; ix <= diff; ++ix )
|
||||
{
|
||||
if ( TY_(tmbstrncmp)(s1+ix, s2, len2) == 0 )
|
||||
return (ctmbstr) s1+ix;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 )
|
||||
{
|
||||
uint len2 = TY_(tmbstrlen)(s2);
|
||||
int ix, diff = len1 - len2;
|
||||
|
||||
for ( ix = 0; ix <= diff; ++ix )
|
||||
{
|
||||
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
|
||||
return (ctmbstr) s1+ix;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 )
|
||||
{
|
||||
uint len1 = TY_(tmbstrlen)(s1), len2 = TY_(tmbstrlen)(s2);
|
||||
int ix, diff = len1 - len2;
|
||||
|
||||
for ( ix = 0; ix <= diff; ++ix )
|
||||
{
|
||||
if ( TY_(tmbstrncasecmp)(s1+ix, s2, len2) == 0 )
|
||||
return (ctmbstr) s1+ix;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Transform ASCII chars in string to lower case */
|
||||
tmbstr TY_(tmbstrtolower)( tmbstr s )
|
||||
{
|
||||
tmbstr cp;
|
||||
for ( cp=s; *cp; ++cp )
|
||||
*cp = (tmbchar) TY_(ToLower)( *cp );
|
||||
return s;
|
||||
}
|
||||
|
||||
/* Transform ASCII chars in string to upper case */
|
||||
tmbstr TY_(tmbstrtoupper)(tmbstr s)
|
||||
{
|
||||
tmbstr cp;
|
||||
|
||||
for (cp = s; *cp; ++cp)
|
||||
*cp = (tmbchar)TY_(ToUpper)(*cp);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
#if 0
|
||||
Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 )
|
||||
{
|
||||
#if FILENAMES_CASE_SENSITIVE
|
||||
return ( TY_(tmbstrcmp)( filename1, filename2 ) == 0 );
|
||||
#else
|
||||
return ( TY_(tmbstrcasecmp)( filename1, filename2 ) == 0 );
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
|
||||
{
|
||||
int retval;
|
||||
#if HAS_VSNPRINTF
|
||||
retval = vsnprintf(buffer, count - 1, format, args);
|
||||
/* todo: conditionally null-terminate the string? */
|
||||
buffer[count - 1] = 0;
|
||||
#else
|
||||
retval = vsprintf(buffer, format, args);
|
||||
#endif /* HAS_VSNPRINTF */
|
||||
return retval;
|
||||
}
|
||||
|
||||
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
|
||||
{
|
||||
int retval;
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
#if HAS_VSNPRINTF
|
||||
retval = vsnprintf(buffer, count - 1, format, args);
|
||||
/* todo: conditionally null-terminate the string? */
|
||||
buffer[count - 1] = 0;
|
||||
#else
|
||||
retval = vsprintf(buffer, format, args);
|
||||
#endif /* HAS_VSNPRINTF */
|
||||
va_end(args);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
92
src/tmbstr.h
Normal file
92
src/tmbstr.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
#ifndef __TMBSTR_H__
|
||||
#define __TMBSTR_H__
|
||||
|
||||
/* tmbstr.h - Tidy string utility functions
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/12/29 16:31:09 $
|
||||
$Revision: 1.11 $
|
||||
|
||||
*/
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/* like strdup but using an allocator */
|
||||
tmbstr TY_(tmbstrdup)( TidyAllocator *allocator, ctmbstr str );
|
||||
|
||||
/* like strndup but using an allocator */
|
||||
tmbstr TY_(tmbstrndup)( TidyAllocator *allocator, ctmbstr str, uint len);
|
||||
|
||||
/* exactly same as strncpy */
|
||||
uint TY_(tmbstrncpy)( tmbstr s1, ctmbstr s2, uint size );
|
||||
|
||||
uint TY_(tmbstrcpy)( tmbstr s1, ctmbstr s2 );
|
||||
|
||||
uint TY_(tmbstrcat)( tmbstr s1, ctmbstr s2 );
|
||||
|
||||
/* exactly same as strcmp */
|
||||
int TY_(tmbstrcmp)( ctmbstr s1, ctmbstr s2 );
|
||||
|
||||
/* returns byte count, not char count */
|
||||
uint TY_(tmbstrlen)( ctmbstr str );
|
||||
|
||||
/*
|
||||
MS C 4.2 doesn't include strcasecmp.
|
||||
Note that tolower and toupper won't
|
||||
work on chars > 127.
|
||||
|
||||
Neither do Lexer.ToLower() or Lexer.ToUpper()!
|
||||
|
||||
We get away with this because, except for XML tags,
|
||||
we are always comparing to ascii element and
|
||||
attribute names defined by HTML specs.
|
||||
*/
|
||||
int TY_(tmbstrcasecmp)( ctmbstr s1, ctmbstr s2 );
|
||||
|
||||
int TY_(tmbstrncmp)( ctmbstr s1, ctmbstr s2, uint n );
|
||||
|
||||
int TY_(tmbstrncasecmp)( ctmbstr s1, ctmbstr s2, uint n );
|
||||
|
||||
/* return offset of cc from beginning of s1,
|
||||
** -1 if not found.
|
||||
*/
|
||||
/* int TY_(tmbstrnchr)( ctmbstr s1, uint len1, tmbchar cc ); */
|
||||
|
||||
ctmbstr TY_(tmbsubstrn)( ctmbstr s1, uint len1, ctmbstr s2 );
|
||||
/* ctmbstr TY_(tmbsubstrncase)( ctmbstr s1, uint len1, ctmbstr s2 ); */
|
||||
ctmbstr TY_(tmbsubstr)( ctmbstr s1, ctmbstr s2 );
|
||||
|
||||
/* transform string to lower case */
|
||||
tmbstr TY_(tmbstrtolower)( tmbstr s );
|
||||
|
||||
/* Transform ASCII chars in string to upper case */
|
||||
tmbstr TY_(tmbstrtoupper)( tmbstr s );
|
||||
|
||||
/* Bool TY_(tmbsamefile)( ctmbstr filename1, ctmbstr filename2 ); */
|
||||
|
||||
int TY_(tmbvsnprintf)(tmbstr buffer, size_t count, ctmbstr format, va_list args)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 3, 0)))
|
||||
#endif
|
||||
;
|
||||
int TY_(tmbsnprintf)(tmbstr buffer, size_t count, ctmbstr format, ...)
|
||||
#ifdef __GNUC__
|
||||
__attribute__((format(printf, 3, 4)))
|
||||
#endif
|
||||
;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* __TMBSTR_H__ */
|
539
src/utf8.c
Normal file
539
src/utf8.c
Normal file
|
@ -0,0 +1,539 @@
|
|||
/* utf8.c -- convert characters to/from UTF-8
|
||||
|
||||
(c) 1998-2007 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2007/05/30 16:47:31 $
|
||||
$Revision: 1.10 $
|
||||
|
||||
Uses public interfaces to abstract input source and output
|
||||
sink, which may be user supplied or either FILE* or memory
|
||||
based Tidy implementations. Encoding support is uniform
|
||||
regardless of I/O mechanism.
|
||||
|
||||
Note, UTF-8 encoding, by itself, does not affect the actual
|
||||
"codepoints" of the underlying character encoding. In the
|
||||
cases of ASCII, Latin1, Unicode (16-bit, BMP), these all
|
||||
refer to ISO-10646 "codepoints". For anything else, they
|
||||
refer to some other "codepoint" set.
|
||||
|
||||
Put another way, UTF-8 is a variable length method to
|
||||
represent any non-negative integer value. The glyph
|
||||
that a integer value represents is unchanged and defined
|
||||
externally (e.g. by ISO-10646, Big5, Win1252, MacRoman,
|
||||
Latin2-9, and so on).
|
||||
|
||||
Put still another way, UTF-8 is more of a _transfer_ encoding
|
||||
than a _character_ encoding, per se.
|
||||
*/
|
||||
|
||||
#include "tidy.h"
|
||||
#include "forward.h"
|
||||
#include "utf8.h"
|
||||
|
||||
/*
|
||||
UTF-8 encoding/decoding functions
|
||||
Return # of bytes in UTF-8 sequence; result < 0 if illegal sequence
|
||||
|
||||
Also see below for UTF-16 encoding/decoding functions
|
||||
|
||||
References :
|
||||
|
||||
1) UCS Transformation Format 8 (UTF-8):
|
||||
ISO/IEC 10646-1:1996 Amendment 2 or ISO/IEC 10646-1:2000 Annex D
|
||||
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335>
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html>
|
||||
|
||||
Table 4 - Mapping from UCS-4 to UTF-8
|
||||
|
||||
2) Unicode standards:
|
||||
<http://www.unicode.org/unicode/standard/standard.html>
|
||||
|
||||
3) Legal UTF-8 byte sequences:
|
||||
<http://www.unicode.org/unicode/uni2errata/UTF-8_Corrigendum.html>
|
||||
|
||||
Code point 1st byte 2nd byte 3rd byte 4th byte
|
||||
---------- -------- -------- -------- --------
|
||||
U+0000..U+007F 00..7F
|
||||
U+0080..U+07FF C2..DF 80..BF
|
||||
U+0800..U+0FFF E0 A0..BF 80..BF
|
||||
U+1000..U+FFFF E1..EF 80..BF 80..BF
|
||||
U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
||||
U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
||||
U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
||||
|
||||
The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also
|
||||
allows for the use of five- and six-byte sequences to encode
|
||||
characters that are outside the range of the Unicode character
|
||||
set; those five- and six-byte sequences are illegal for the use
|
||||
of UTF-8 as a transformation of Unicode characters. ISO/IEC 10646
|
||||
does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF
|
||||
(but it does allow other noncharacters).
|
||||
|
||||
4) RFC 2279: UTF-8, a transformation format of ISO 10646:
|
||||
<http://www.ietf.org/rfc/rfc2279.txt>
|
||||
|
||||
5) UTF-8 and Unicode FAQ:
|
||||
<http://www.cl.cam.ac.uk/~mgk25/unicode.html>
|
||||
|
||||
6) Markus Kuhn's UTF-8 decoder stress test file:
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt>
|
||||
|
||||
7) UTF-8 Demo:
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt>
|
||||
|
||||
8) UTF-8 Sampler:
|
||||
<http://www.columbia.edu/kermit/utf8.html>
|
||||
|
||||
9) Transformation Format for 16 Planes of Group 00 (UTF-16):
|
||||
ISO/IEC 10646-1:1996 Amendment 1 or ISO/IEC 10646-1:2000 Annex C
|
||||
<http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n2005/n2005.pdf>
|
||||
<http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-16.html>
|
||||
|
||||
10) RFC 2781: UTF-16, an encoding of ISO 10646:
|
||||
<http://www.ietf.org/rfc/rfc2781.txt>
|
||||
|
||||
11) UTF-16 invalid surrogate pairs:
|
||||
<http://www.unicode.org/unicode/faq/utf_bom.html#16>
|
||||
|
||||
UTF-16 UTF-8 UCS-4
|
||||
D83F DFF* F0 9F BF B* 0001FFF*
|
||||
D87F DFF* F0 AF BF B* 0002FFF*
|
||||
D8BF DFF* F0 BF BF B* 0003FFF*
|
||||
D8FF DFF* F1 8F BF B* 0004FFF*
|
||||
D93F DFF* F1 9F BF B* 0005FFF*
|
||||
D97F DFF* F1 AF BF B* 0006FFF*
|
||||
...
|
||||
DBBF DFF* F3 BF BF B* 000FFFF*
|
||||
DBFF DFF* F4 8F BF B* 0010FFF*
|
||||
|
||||
* = E or F
|
||||
|
||||
1010 A
|
||||
1011 B
|
||||
1100 C
|
||||
1101 D
|
||||
1110 E
|
||||
1111 F
|
||||
|
||||
*/
|
||||
|
||||
#define kNumUTF8Sequences 7
|
||||
#define kMaxUTF8Bytes 4
|
||||
|
||||
#define kUTF8ByteSwapNotAChar 0xFFFE
|
||||
#define kUTF8NotAChar 0xFFFF
|
||||
|
||||
#define kMaxUTF8FromUCS4 0x10FFFF
|
||||
|
||||
#define kUTF16SurrogatesBegin 0x10000
|
||||
#define kMaxUTF16FromUCS4 0x10FFFF
|
||||
|
||||
/* UTF-16 surrogate pair areas */
|
||||
#define kUTF16LowSurrogateBegin 0xD800
|
||||
#define kUTF16LowSurrogateEnd 0xDBFF
|
||||
#define kUTF16HighSurrogateBegin 0xDC00
|
||||
#define kUTF16HighSurrogateEnd 0xDFFF
|
||||
|
||||
|
||||
/* offsets into validUTF8 table below */
|
||||
static const int offsetUTF8Sequences[kMaxUTF8Bytes + 1] =
|
||||
{
|
||||
0, /* 1 byte */
|
||||
1, /* 2 bytes */
|
||||
2, /* 3 bytes */
|
||||
4, /* 4 bytes */
|
||||
kNumUTF8Sequences /* must be last */
|
||||
};
|
||||
|
||||
static const struct validUTF8Sequence
|
||||
{
|
||||
uint lowChar;
|
||||
uint highChar;
|
||||
int numBytes;
|
||||
byte validBytes[8];
|
||||
} validUTF8[kNumUTF8Sequences] =
|
||||
{
|
||||
/* low high #bytes byte 1 byte 2 byte 3 byte 4 */
|
||||
{0x0000, 0x007F, 1, {0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
|
||||
{0x0080, 0x07FF, 2, {0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}},
|
||||
{0x0800, 0x0FFF, 3, {0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
|
||||
{0x1000, 0xFFFF, 3, {0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}},
|
||||
{0x10000, 0x3FFFF, 4, {0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
|
||||
{0x40000, 0xFFFFF, 4, {0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}},
|
||||
{0x100000, 0x10FFFF, 4, {0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}}
|
||||
};
|
||||
|
||||
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
|
||||
TidyInputSource* inp, int* count )
|
||||
{
|
||||
byte tempbuf[10];
|
||||
byte *buf = &tempbuf[0];
|
||||
uint ch = 0, n = 0;
|
||||
int i, bytes = 0;
|
||||
Bool hasError = no;
|
||||
|
||||
if ( successorBytes )
|
||||
buf = (byte*) successorBytes;
|
||||
|
||||
/* special check if we have been passed an EOF char */
|
||||
if ( firstByte == EndOfStream )
|
||||
{
|
||||
/* at present */
|
||||
*c = firstByte;
|
||||
*count = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ch = firstByte; /* first byte is passed in separately */
|
||||
|
||||
if (ch <= 0x7F) /* 0XXX XXXX one byte */
|
||||
{
|
||||
n = ch;
|
||||
bytes = 1;
|
||||
}
|
||||
else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */
|
||||
{
|
||||
n = ch & 31;
|
||||
bytes = 2;
|
||||
}
|
||||
else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
|
||||
{
|
||||
n = ch & 15;
|
||||
bytes = 3;
|
||||
}
|
||||
else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
|
||||
{
|
||||
n = ch & 7;
|
||||
bytes = 4;
|
||||
}
|
||||
else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */
|
||||
{
|
||||
n = ch & 3;
|
||||
bytes = 5;
|
||||
hasError = yes;
|
||||
}
|
||||
else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */
|
||||
{
|
||||
n = ch & 1;
|
||||
bytes = 6;
|
||||
hasError = yes;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* not a valid first byte of a UTF-8 sequence */
|
||||
n = ch;
|
||||
bytes = 1;
|
||||
hasError = yes;
|
||||
}
|
||||
|
||||
/* successor bytes should have the form 10XX XXXX */
|
||||
|
||||
/* If caller supplied buffer, use it. Else see if caller
|
||||
** supplied an input source, use that.
|
||||
*/
|
||||
if ( successorBytes )
|
||||
{
|
||||
for ( i=0; i < bytes-1; ++i )
|
||||
{
|
||||
if ( !buf[i] || (buf[i] & 0xC0) != 0x80 )
|
||||
{
|
||||
hasError = yes;
|
||||
bytes = i+1;
|
||||
break;
|
||||
}
|
||||
n = (n << 6) | (buf[i] & 0x3F);
|
||||
}
|
||||
}
|
||||
else if ( inp )
|
||||
{
|
||||
for ( i=0; i < bytes-1 && !inp->eof(inp->sourceData); ++i )
|
||||
{
|
||||
int b = inp->getByte( inp->sourceData );
|
||||
buf[i] = (tmbchar) b;
|
||||
|
||||
/* End of data or illegal successor byte value */
|
||||
if ( b == EOF || (buf[i] & 0xC0) != 0x80 )
|
||||
{
|
||||
hasError = yes;
|
||||
bytes = i+1;
|
||||
if ( b != EOF )
|
||||
inp->ungetByte( inp->sourceData, buf[i] );
|
||||
break;
|
||||
}
|
||||
n = (n << 6) | (buf[i] & 0x3F);
|
||||
}
|
||||
}
|
||||
else if ( bytes > 1 )
|
||||
{
|
||||
hasError = yes;
|
||||
bytes = 1;
|
||||
}
|
||||
|
||||
if (!hasError && ((n == kUTF8ByteSwapNotAChar) || (n == kUTF8NotAChar)))
|
||||
hasError = yes;
|
||||
|
||||
if (!hasError && (n > kMaxUTF8FromUCS4))
|
||||
hasError = yes;
|
||||
|
||||
#if 0 /* Breaks Big5 D8 - DF */
|
||||
if (!hasError && (n >= kUTF16LowSurrogateBegin) && (n <= kUTF16HighSurrogateEnd))
|
||||
/* unpaired surrogates not allowed */
|
||||
hasError = yes;
|
||||
#endif
|
||||
|
||||
if (!hasError)
|
||||
{
|
||||
int lo, hi;
|
||||
|
||||
lo = offsetUTF8Sequences[bytes - 1];
|
||||
hi = offsetUTF8Sequences[bytes] - 1;
|
||||
|
||||
/* check for overlong sequences */
|
||||
if ((n < validUTF8[lo].lowChar) || (n > validUTF8[hi].highChar))
|
||||
hasError = yes;
|
||||
else
|
||||
{
|
||||
hasError = yes; /* assume error until proven otherwise */
|
||||
|
||||
for (i = lo; i <= hi; i++)
|
||||
{
|
||||
int tempCount;
|
||||
byte theByte;
|
||||
|
||||
for (tempCount = 0; tempCount < bytes; tempCount++)
|
||||
{
|
||||
if (!tempCount)
|
||||
theByte = (tmbchar) firstByte;
|
||||
else
|
||||
theByte = buf[tempCount - 1];
|
||||
|
||||
if ( theByte >= validUTF8[i].validBytes[(tempCount * 2)] &&
|
||||
theByte <= validUTF8[i].validBytes[(tempCount * 2) + 1] )
|
||||
hasError = no;
|
||||
if (hasError)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 1 && defined(_DEBUG)
|
||||
if ( hasError )
|
||||
{
|
||||
/* debug */
|
||||
fprintf( stderr, "UTF-8 decoding error of %d bytes : ", bytes );
|
||||
fprintf( stderr, "0x%02x ", firstByte );
|
||||
for (i = 1; i < bytes; i++)
|
||||
fprintf( stderr, "0x%02x ", buf[i - 1] );
|
||||
fprintf( stderr, " = U+%04ulx\n", n );
|
||||
}
|
||||
#endif
|
||||
|
||||
*count = bytes;
|
||||
*c = n;
|
||||
if ( hasError )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
|
||||
TidyOutputSink* outp, int* count )
|
||||
{
|
||||
byte tempbuf[10] = {0};
|
||||
byte* buf = &tempbuf[0];
|
||||
int bytes = 0;
|
||||
Bool hasError = no;
|
||||
|
||||
if ( encodebuf )
|
||||
buf = (byte*) encodebuf;
|
||||
|
||||
if (c <= 0x7F) /* 0XXX XXXX one byte */
|
||||
{
|
||||
buf[0] = (tmbchar) c;
|
||||
bytes = 1;
|
||||
}
|
||||
else if (c <= 0x7FF) /* 110X XXXX two bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) ( 0xC0 | (c >> 6) );
|
||||
buf[1] = (tmbchar) ( 0x80 | (c & 0x3F) );
|
||||
bytes = 2;
|
||||
}
|
||||
else if (c <= 0xFFFF) /* 1110 XXXX three bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xE0 | (c >> 12));
|
||||
buf[1] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[2] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 3;
|
||||
if ( c == kUTF8ByteSwapNotAChar || c == kUTF8NotAChar )
|
||||
hasError = yes;
|
||||
#if 0 /* Breaks Big5 D8 - DF */
|
||||
else if ( c >= kUTF16LowSurrogateBegin && c <= kUTF16HighSurrogateEnd )
|
||||
/* unpaired surrogates not allowed */
|
||||
hasError = yes;
|
||||
#endif
|
||||
}
|
||||
else if (c <= 0x1FFFFF) /* 1111 0XXX four bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xF0 | (c >> 18));
|
||||
buf[1] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
|
||||
buf[2] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[3] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 4;
|
||||
if (c > kMaxUTF8FromUCS4)
|
||||
hasError = yes;
|
||||
}
|
||||
else if (c <= 0x3FFFFFF) /* 1111 10XX five bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xF8 | (c >> 24));
|
||||
buf[1] = (tmbchar) (0x80 | (c >> 18));
|
||||
buf[2] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
|
||||
buf[3] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[4] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 5;
|
||||
hasError = yes;
|
||||
}
|
||||
else if (c <= 0x7FFFFFFF) /* 1111 110X six bytes */
|
||||
{
|
||||
buf[0] = (tmbchar) (0xFC | (c >> 30));
|
||||
buf[1] = (tmbchar) (0x80 | ((c >> 24) & 0x3F));
|
||||
buf[2] = (tmbchar) (0x80 | ((c >> 18) & 0x3F));
|
||||
buf[3] = (tmbchar) (0x80 | ((c >> 12) & 0x3F));
|
||||
buf[4] = (tmbchar) (0x80 | ((c >> 6) & 0x3F));
|
||||
buf[5] = (tmbchar) (0x80 | (c & 0x3F));
|
||||
bytes = 6;
|
||||
hasError = yes;
|
||||
}
|
||||
else
|
||||
hasError = yes;
|
||||
|
||||
/* don't output invalid UTF-8 byte sequence to a stream */
|
||||
if ( !hasError && outp != NULL )
|
||||
{
|
||||
int ix;
|
||||
for ( ix=0; ix < bytes; ++ix )
|
||||
outp->putByte( outp->sinkData, buf[ix] );
|
||||
}
|
||||
|
||||
#if 1 && defined(_DEBUG)
|
||||
if ( hasError )
|
||||
{
|
||||
int i;
|
||||
fprintf( stderr, "UTF-8 encoding error for U+%x : ", c );
|
||||
for (i = 0; i < bytes; i++)
|
||||
fprintf( stderr, "0x%02x ", buf[i] );
|
||||
fprintf( stderr, "\n" );
|
||||
}
|
||||
#endif
|
||||
|
||||
*count = bytes;
|
||||
if (hasError)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* return one less than the number of bytes used by the UTF-8 byte sequence */
|
||||
/* str points to the UTF-8 byte sequence */
|
||||
/* the Unicode char is returned in *ch */
|
||||
uint TY_(GetUTF8)( ctmbstr str, uint *ch )
|
||||
{
|
||||
uint n;
|
||||
int bytes;
|
||||
|
||||
int err;
|
||||
|
||||
bytes = 0;
|
||||
|
||||
/* first byte "str[0]" is passed in separately from the */
|
||||
/* rest of the UTF-8 byte sequence starting at "str[1]" */
|
||||
err = TY_(DecodeUTF8BytesToChar)( &n, str[0], str+1, NULL, &bytes );
|
||||
if (err)
|
||||
{
|
||||
#if 1 && defined(_DEBUG)
|
||||
fprintf(stderr, "pprint UTF-8 decoding error for U+%x : ", n);
|
||||
#endif
|
||||
n = 0xFFFD; /* replacement char */
|
||||
}
|
||||
|
||||
*ch = n;
|
||||
return bytes - 1;
|
||||
}
|
||||
|
||||
/* store char c as UTF-8 encoded byte stream */
|
||||
tmbstr TY_(PutUTF8)( tmbstr buf, uint c )
|
||||
{
|
||||
int err, count = 0;
|
||||
|
||||
err = TY_(EncodeCharToUTF8Bytes)( c, buf, NULL, &count );
|
||||
if (err)
|
||||
{
|
||||
#if 1 && defined(_DEBUG)
|
||||
fprintf(stderr, "pprint UTF-8 encoding error for U+%x : ", c);
|
||||
#endif
|
||||
/* replacement char 0xFFFD encoded as UTF-8 */
|
||||
buf[0] = (byte) 0xEF;
|
||||
buf[1] = (byte) 0xBF;
|
||||
buf[2] = (byte) 0xBD;
|
||||
count = 3;
|
||||
}
|
||||
|
||||
buf += count;
|
||||
return buf;
|
||||
}
|
||||
|
||||
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 )
|
||||
{
|
||||
return ( ucs4 <= kMaxUTF16FromUCS4 );
|
||||
}
|
||||
|
||||
Bool TY_(IsHighSurrogate)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16HighSurrogateBegin && ch <= kUTF16HighSurrogateEnd );
|
||||
}
|
||||
Bool TY_(IsLowSurrogate)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16LowSurrogateBegin && ch <= kUTF16LowSurrogateEnd );
|
||||
}
|
||||
|
||||
tchar TY_(CombineSurrogatePair)( tchar high, tchar low )
|
||||
{
|
||||
assert( TY_(IsHighSurrogate)(high) && TY_(IsLowSurrogate)(low) );
|
||||
return ( ((low - kUTF16LowSurrogateBegin) * 0x400) +
|
||||
high - kUTF16HighSurrogateBegin + 0x10000 );
|
||||
}
|
||||
|
||||
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* low, tchar* high )
|
||||
{
|
||||
Bool status = ( TY_(IsValidCombinedChar)( utf16 ) && high && low );
|
||||
if ( status )
|
||||
{
|
||||
*low = (utf16 - kUTF16SurrogatesBegin) / 0x400 + kUTF16LowSurrogateBegin;
|
||||
*high = (utf16 - kUTF16SurrogatesBegin) % 0x400 + kUTF16HighSurrogateBegin;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Bool TY_(IsValidCombinedChar)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16SurrogatesBegin &&
|
||||
(ch & 0x0000FFFE) != 0x0000FFFE &&
|
||||
(ch & 0x0000FFFF) != 0x0000FFFF );
|
||||
}
|
||||
|
||||
Bool TY_(IsCombinedChar)( tchar ch )
|
||||
{
|
||||
return ( ch >= kUTF16SurrogatesBegin );
|
||||
}
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
52
src/utf8.h
Normal file
52
src/utf8.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
/* utf8.h -- convert characters to/from UTF-8
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2006/09/12 15:14:44 $
|
||||
$Revision: 1.5 $
|
||||
|
||||
*/
|
||||
|
||||
#include "platform.h"
|
||||
#include "buffio.h"
|
||||
|
||||
/* UTF-8 encoding/decoding support
|
||||
** Does not convert character "codepoints", i.e. to/from 10646.
|
||||
*/
|
||||
|
||||
int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
|
||||
TidyInputSource* inp, int* count );
|
||||
|
||||
int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
|
||||
TidyOutputSink* outp, int* count );
|
||||
|
||||
|
||||
uint TY_(GetUTF8)( ctmbstr str, uint *ch );
|
||||
tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
|
||||
|
||||
#define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
|
||||
#define UNICODE_BOM UNICODE_BOM_BE
|
||||
#define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
|
||||
#define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
|
||||
|
||||
|
||||
Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
|
||||
Bool TY_(IsHighSurrogate)( tchar ch );
|
||||
Bool TY_(IsLowSurrogate)( tchar ch );
|
||||
|
||||
Bool TY_(IsCombinedChar)( tchar ch );
|
||||
Bool TY_(IsValidCombinedChar)( tchar ch );
|
||||
|
||||
tchar TY_(CombineSurrogatePair)( tchar high, tchar low );
|
||||
Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
|
||||
|
||||
|
||||
|
||||
#endif /* __UTF8_H__ */
|
14
src/version.h
Executable file
14
src/version.h
Executable file
|
@ -0,0 +1,14 @@
|
|||
/* version information
|
||||
|
||||
(c) 2007-2009 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2009/03/25 21:37:11 $
|
||||
$Revision: 1.46 $
|
||||
|
||||
*/
|
||||
|
||||
static const char TY_(release_date)[] = "16 November 2011";
|
14
src/version.h~
Executable file
14
src/version.h~
Executable file
|
@ -0,0 +1,14 @@
|
|||
/* version information
|
||||
|
||||
(c) 2007-2009 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
CVS Info :
|
||||
|
||||
$Author: arnaud02 $
|
||||
$Date: 2009/03/25 21:37:11 $
|
||||
$Revision: 1.46 $
|
||||
|
||||
*/
|
||||
|
||||
static const char TY_(release_date)[] = "25 March 2009";
|
795
src/win32tc.c
Normal file
795
src/win32tc.c
Normal file
|
@ -0,0 +1,795 @@
|
|||
/* win32tc.c -- Interface to Win32 transcoding routines
|
||||
|
||||
(c) 1998-2008 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: win32tc.c,v 1.12 2008/08/09 11:55:27 hoehrmann Exp $
|
||||
*/
|
||||
|
||||
/* keep these here to keep file non-empty */
|
||||
#include "tidy.h"
|
||||
#include "forward.h"
|
||||
#include "streamio.h"
|
||||
#include "tmbstr.h"
|
||||
#include "utf8.h"
|
||||
|
||||
#ifdef TIDY_WIN32_MLANG_SUPPORT
|
||||
|
||||
#define VC_EXTRALEAN
|
||||
#define CINTERFACE
|
||||
#define COBJMACROS
|
||||
|
||||
#include <windows.h>
|
||||
#include <mlang.h>
|
||||
|
||||
#undef COBJMACROS
|
||||
#undef CINTERFACE
|
||||
#undef VC_EXTRALEAN
|
||||
|
||||
/* maximum number of bytes for a single character */
|
||||
#define TC_INBUFSIZE 16
|
||||
|
||||
/* maximum number of characters per byte sequence */
|
||||
#define TC_OUTBUFSIZE 16
|
||||
|
||||
#define CreateMLangObject(p) \
|
||||
CoCreateInstance( \
|
||||
&CLSID_CMLangConvertCharset, \
|
||||
NULL, \
|
||||
CLSCTX_ALL, \
|
||||
&IID_IMLangConvertCharset, \
|
||||
(VOID **)&p);
|
||||
|
||||
|
||||
/* Character Set to Microsoft Windows Codepage Identifier map, */
|
||||
/* from <rotor/sscli/clr/src/classlibnative/nls/encodingdata.cpp>. */
|
||||
|
||||
/* note: the 'safe' field indicates whether this encoding can be */
|
||||
/* read/written character-by-character; this does not apply to */
|
||||
/* various stateful encodings such as ISO-2022 or UTF-7, these */
|
||||
/* must be read/written as a complete stream. It is possible that */
|
||||
/* some 'unsafe' encodings are marked as 'save'. */
|
||||
|
||||
/* todo: cleanup; Tidy should use only a single mapping table to */
|
||||
/* circumvent unsupported aliases in other transcoding libraries, */
|
||||
/* enable reverse lookup of encoding names and ease maintenance. */
|
||||
|
||||
static struct _nameWinCPMap
|
||||
{
|
||||
tmbstr name;
|
||||
uint wincp;
|
||||
Bool safe;
|
||||
} const NameWinCPMap[] = {
|
||||
{ "cp037", 37, yes },
|
||||
{ "csibm037", 37, yes },
|
||||
{ "ebcdic-cp-ca", 37, yes },
|
||||
{ "ebcdic-cp-nl", 37, yes },
|
||||
{ "ebcdic-cp-us", 37, yes },
|
||||
{ "ebcdic-cp-wt", 37, yes },
|
||||
{ "ibm037", 37, yes },
|
||||
{ "cp437", 437, yes },
|
||||
{ "cspc8codepage437", 437, yes },
|
||||
{ "ibm437", 437, yes },
|
||||
{ "cp500", 500, yes },
|
||||
{ "csibm500", 500, yes },
|
||||
{ "ebcdic-cp-be", 500, yes },
|
||||
{ "ebcdic-cp-ch", 500, yes },
|
||||
{ "ibm500", 500, yes },
|
||||
{ "asmo-708", 708, yes },
|
||||
{ "dos-720", 720, yes },
|
||||
{ "ibm737", 737, yes },
|
||||
{ "ibm775", 775, yes },
|
||||
{ "cp850", 850, yes },
|
||||
{ "ibm850", 850, yes },
|
||||
{ "cp852", 852, yes },
|
||||
{ "ibm852", 852, yes },
|
||||
{ "cp855", 855, yes },
|
||||
{ "ibm855", 855, yes },
|
||||
{ "cp857", 857, yes },
|
||||
{ "ibm857", 857, yes },
|
||||
{ "ccsid00858", 858, yes },
|
||||
{ "cp00858", 858, yes },
|
||||
{ "cp858", 858, yes },
|
||||
{ "ibm00858", 858, yes },
|
||||
{ "pc-multilingual-850+euro", 858, yes },
|
||||
{ "cp860", 860, yes },
|
||||
{ "ibm860", 860, yes },
|
||||
{ "cp861", 861, yes },
|
||||
{ "ibm861", 861, yes },
|
||||
{ "cp862", 862, yes },
|
||||
{ "dos-862", 862, yes },
|
||||
{ "ibm862", 862, yes },
|
||||
{ "cp863", 863, yes },
|
||||
{ "ibm863", 863, yes },
|
||||
{ "cp864", 864, yes },
|
||||
{ "ibm864", 864, yes },
|
||||
{ "cp865", 865, yes },
|
||||
{ "ibm865", 865, yes },
|
||||
{ "cp866", 866, yes },
|
||||
{ "ibm866", 866, yes },
|
||||
{ "cp869", 869, yes },
|
||||
{ "ibm869", 869, yes },
|
||||
{ "cp870", 870, yes },
|
||||
{ "csibm870", 870, yes },
|
||||
{ "ebcdic-cp-roece", 870, yes },
|
||||
{ "ebcdic-cp-yu", 870, yes },
|
||||
{ "ibm870", 870, yes },
|
||||
{ "dos-874", 874, yes },
|
||||
{ "iso-8859-11", 874, yes },
|
||||
{ "tis-620", 874, yes },
|
||||
{ "windows-874", 874, yes },
|
||||
{ "cp875", 875, yes },
|
||||
{ "csshiftjis", 932, yes },
|
||||
{ "cswindows31j", 932, yes },
|
||||
{ "ms_kanji", 932, yes },
|
||||
{ "shift-jis", 932, yes },
|
||||
{ "shift_jis", 932, yes },
|
||||
{ "sjis", 932, yes },
|
||||
{ "x-ms-cp932", 932, yes },
|
||||
{ "x-sjis", 932, yes },
|
||||
{ "chinese", 936, yes },
|
||||
{ "cn-gb", 936, yes },
|
||||
{ "csgb2312", 936, yes },
|
||||
{ "csgb231280", 936, yes },
|
||||
{ "csiso58gb231280", 936, yes },
|
||||
{ "gb2312", 936, yes },
|
||||
{ "gb2312-80", 936, yes },
|
||||
{ "gb231280", 936, yes },
|
||||
{ "gb_2312-80", 936, yes },
|
||||
{ "gbk", 936, yes },
|
||||
{ "iso-ir-58", 936, yes },
|
||||
{ "csksc56011987", 949, yes },
|
||||
{ "iso-ir-149", 949, yes },
|
||||
{ "korean", 949, yes },
|
||||
{ "ks-c-5601", 949, yes },
|
||||
{ "ks-c5601", 949, yes },
|
||||
{ "ks_c_5601", 949, yes },
|
||||
{ "ks_c_5601-1987", 949, yes },
|
||||
{ "ks_c_5601-1989", 949, yes },
|
||||
{ "ks_c_5601_1987", 949, yes },
|
||||
{ "ksc5601", 949, yes },
|
||||
{ "ksc_5601", 949, yes },
|
||||
{ "big5", 950, yes },
|
||||
{ "big5-hkscs", 950, yes },
|
||||
{ "cn-big5", 950, yes },
|
||||
{ "csbig5", 950, yes },
|
||||
{ "x-x-big5", 950, yes },
|
||||
{ "cp1026", 1026, yes },
|
||||
{ "csibm1026", 1026, yes },
|
||||
{ "ibm1026", 1026, yes },
|
||||
{ "ibm01047", 1047, yes },
|
||||
{ "ccsid01140", 1140, yes },
|
||||
{ "cp01140", 1140, yes },
|
||||
{ "ebcdic-us-37+euro", 1140, yes },
|
||||
{ "ibm01140", 1140, yes },
|
||||
{ "ccsid01141", 1141, yes },
|
||||
{ "cp01141", 1141, yes },
|
||||
{ "ebcdic-de-273+euro", 1141, yes },
|
||||
{ "ibm01141", 1141, yes },
|
||||
{ "ccsid01142", 1142, yes },
|
||||
{ "cp01142", 1142, yes },
|
||||
{ "ebcdic-dk-277+euro", 1142, yes },
|
||||
{ "ebcdic-no-277+euro", 1142, yes },
|
||||
{ "ibm01142", 1142, yes },
|
||||
{ "ccsid01143", 1143, yes },
|
||||
{ "cp01143", 1143, yes },
|
||||
{ "ebcdic-fi-278+euro", 1143, yes },
|
||||
{ "ebcdic-se-278+euro", 1143, yes },
|
||||
{ "ibm01143", 1143, yes },
|
||||
{ "ccsid01144", 1144, yes },
|
||||
{ "cp01144", 1144, yes },
|
||||
{ "ebcdic-it-280+euro", 1144, yes },
|
||||
{ "ibm01144", 1144, yes },
|
||||
{ "ccsid01145", 1145, yes },
|
||||
{ "cp01145", 1145, yes },
|
||||
{ "ebcdic-es-284+euro", 1145, yes },
|
||||
{ "ibm01145", 1145, yes },
|
||||
{ "ccsid01146", 1146, yes },
|
||||
{ "cp01146", 1146, yes },
|
||||
{ "ebcdic-gb-285+euro", 1146, yes },
|
||||
{ "ibm01146", 1146, yes },
|
||||
{ "ccsid01147", 1147, yes },
|
||||
{ "cp01147", 1147, yes },
|
||||
{ "ebcdic-fr-297+euro", 1147, yes },
|
||||
{ "ibm01147", 1147, yes },
|
||||
{ "ccsid01148", 1148, yes },
|
||||
{ "cp01148", 1148, yes },
|
||||
{ "ebcdic-international-500+euro", 1148, yes },
|
||||
{ "ibm01148", 1148, yes },
|
||||
{ "ccsid01149", 1149, yes },
|
||||
{ "cp01149", 1149, yes },
|
||||
{ "ebcdic-is-871+euro", 1149, yes },
|
||||
{ "ibm01149", 1149, yes },
|
||||
{ "iso-10646-ucs-2", 1200, yes },
|
||||
{ "ucs-2", 1200, yes },
|
||||
{ "unicode", 1200, yes },
|
||||
{ "utf-16", 1200, yes },
|
||||
{ "utf-16le", 1200, yes },
|
||||
{ "unicodefffe", 1201, yes },
|
||||
{ "utf-16be", 1201, yes },
|
||||
{ "windows-1250", 1250, yes },
|
||||
{ "x-cp1250", 1250, yes },
|
||||
{ "windows-1251", 1251, yes },
|
||||
{ "x-cp1251", 1251, yes },
|
||||
{ "windows-1252", 1252, yes },
|
||||
{ "x-ansi", 1252, yes },
|
||||
{ "windows-1253", 1253, yes },
|
||||
{ "windows-1254", 1254, yes },
|
||||
{ "windows-1255", 1255, yes },
|
||||
{ "cp1256", 1256, yes },
|
||||
{ "windows-1256", 1256, yes },
|
||||
{ "windows-1257", 1257, yes },
|
||||
{ "windows-1258", 1258, yes },
|
||||
{ "johab", 1361, yes },
|
||||
{ "macintosh", 10000, yes },
|
||||
{ "x-mac-japanese", 10001, yes },
|
||||
{ "x-mac-chinesetrad", 10002, yes },
|
||||
{ "x-mac-korean", 10003, yes },
|
||||
{ "x-mac-arabic", 10004, yes },
|
||||
{ "x-mac-hebrew", 10005, yes },
|
||||
{ "x-mac-greek", 10006, yes },
|
||||
{ "x-mac-cyrillic", 10007, yes },
|
||||
{ "x-mac-chinesesimp", 10008, yes },
|
||||
{ "x-mac-romanian", 10010, yes },
|
||||
{ "x-mac-ukrainian", 10017, yes },
|
||||
{ "x-mac-thai", 10021, yes },
|
||||
{ "x-mac-ce", 10029, yes },
|
||||
{ "x-mac-icelandic", 10079, yes },
|
||||
{ "x-mac-turkish", 10081, yes },
|
||||
{ "x-mac-croatian", 10082, yes },
|
||||
{ "x-chinese-cns", 20000, yes },
|
||||
{ "x-cp20001", 20001, yes },
|
||||
{ "x-chinese-eten", 20002, yes },
|
||||
{ "x-cp20003", 20003, yes },
|
||||
{ "x-cp20004", 20004, yes },
|
||||
{ "x-cp20005", 20005, yes },
|
||||
{ "irv", 20105, yes },
|
||||
{ "x-ia5", 20105, yes },
|
||||
{ "din_66003", 20106, yes },
|
||||
{ "german", 20106, yes },
|
||||
{ "x-ia5-german", 20106, yes },
|
||||
{ "sen_850200_b", 20107, yes },
|
||||
{ "swedish", 20107, yes },
|
||||
{ "x-ia5-swedish", 20107, yes },
|
||||
{ "norwegian", 20108, yes },
|
||||
{ "ns_4551-1", 20108, yes },
|
||||
{ "x-ia5-norwegian", 20108, yes },
|
||||
{ "ansi_x3.4-1968", 20127, yes },
|
||||
{ "ansi_x3.4-1986", 20127, yes },
|
||||
{ "ascii", 20127, yes },
|
||||
{ "cp367", 20127, yes },
|
||||
{ "csascii", 20127, yes },
|
||||
{ "ibm367", 20127, yes },
|
||||
{ "iso-ir-6", 20127, yes },
|
||||
{ "iso646-us", 20127, yes },
|
||||
{ "iso_646.irv:1991", 20127, yes },
|
||||
{ "us", 20127, yes },
|
||||
{ "us-ascii", 20127, yes },
|
||||
{ "x-cp20261", 20261, yes },
|
||||
{ "x-cp20269", 20269, yes },
|
||||
{ "cp273", 20273, yes },
|
||||
{ "csibm273", 20273, yes },
|
||||
{ "ibm273", 20273, yes },
|
||||
{ "csibm277", 20277, yes },
|
||||
{ "ebcdic-cp-dk", 20277, yes },
|
||||
{ "ebcdic-cp-no", 20277, yes },
|
||||
{ "ibm277", 20277, yes },
|
||||
{ "cp278", 20278, yes },
|
||||
{ "csibm278", 20278, yes },
|
||||
{ "ebcdic-cp-fi", 20278, yes },
|
||||
{ "ebcdic-cp-se", 20278, yes },
|
||||
{ "ibm278", 20278, yes },
|
||||
{ "cp280", 20280, yes },
|
||||
{ "csibm280", 20280, yes },
|
||||
{ "ebcdic-cp-it", 20280, yes },
|
||||
{ "ibm280", 20280, yes },
|
||||
{ "cp284", 20284, yes },
|
||||
{ "csibm284", 20284, yes },
|
||||
{ "ebcdic-cp-es", 20284, yes },
|
||||
{ "ibm284", 20284, yes },
|
||||
{ "cp285", 20285, yes },
|
||||
{ "csibm285", 20285, yes },
|
||||
{ "ebcdic-cp-gb", 20285, yes },
|
||||
{ "ibm285", 20285, yes },
|
||||
{ "cp290", 20290, yes },
|
||||
{ "csibm290", 20290, yes },
|
||||
{ "ebcdic-jp-kana", 20290, yes },
|
||||
{ "ibm290", 20290, yes },
|
||||
{ "cp297", 20297, yes },
|
||||
{ "csibm297", 20297, yes },
|
||||
{ "ebcdic-cp-fr", 20297, yes },
|
||||
{ "ibm297", 20297, yes },
|
||||
{ "cp420", 20420, yes },
|
||||
{ "csibm420", 20420, yes },
|
||||
{ "ebcdic-cp-ar1", 20420, yes },
|
||||
{ "ibm420", 20420, yes },
|
||||
{ "cp423", 20423, yes },
|
||||
{ "csibm423", 20423, yes },
|
||||
{ "ebcdic-cp-gr", 20423, yes },
|
||||
{ "ibm423", 20423, yes },
|
||||
{ "cp424", 20424, yes },
|
||||
{ "csibm424", 20424, yes },
|
||||
{ "ebcdic-cp-he", 20424, yes },
|
||||
{ "ibm424", 20424, yes },
|
||||
{ "x-ebcdic-koreanextended", 20833, yes },
|
||||
{ "csibmthai", 20838, yes },
|
||||
{ "ibm-thai", 20838, yes },
|
||||
{ "cskoi8r", 20866, yes },
|
||||
{ "koi", 20866, yes },
|
||||
{ "koi8", 20866, yes },
|
||||
{ "koi8-r", 20866, yes },
|
||||
{ "koi8r", 20866, yes },
|
||||
{ "cp871", 20871, yes },
|
||||
{ "csibm871", 20871, yes },
|
||||
{ "ebcdic-cp-is", 20871, yes },
|
||||
{ "ibm871", 20871, yes },
|
||||
{ "cp880", 20880, yes },
|
||||
{ "csibm880", 20880, yes },
|
||||
{ "ebcdic-cyrillic", 20880, yes },
|
||||
{ "ibm880", 20880, yes },
|
||||
{ "cp905", 20905, yes },
|
||||
{ "csibm905", 20905, yes },
|
||||
{ "ebcdic-cp-tr", 20905, yes },
|
||||
{ "ibm905", 20905, yes },
|
||||
{ "ccsid00924", 20924, yes },
|
||||
{ "cp00924", 20924, yes },
|
||||
{ "ebcdic-latin9--euro", 20924, yes },
|
||||
{ "ibm00924", 20924, yes },
|
||||
{ "x-cp20936", 20936, yes },
|
||||
{ "x-cp20949", 20949, yes },
|
||||
{ "cp1025", 21025, yes },
|
||||
{ "x-cp21027", 21027, yes },
|
||||
{ "koi8-ru", 21866, yes },
|
||||
{ "koi8-u", 21866, yes },
|
||||
{ "cp819", 28591, yes },
|
||||
{ "csisolatin1", 28591, yes },
|
||||
{ "ibm819", 28591, yes },
|
||||
{ "iso-8859-1", 28591, yes },
|
||||
{ "iso-ir-100", 28591, yes },
|
||||
{ "iso8859-1", 28591, yes },
|
||||
{ "iso_8859-1", 28591, yes },
|
||||
{ "iso_8859-1:1987", 28591, yes },
|
||||
{ "l1", 28591, yes },
|
||||
{ "latin1", 28591, yes },
|
||||
{ "csisolatin2", 28592, yes },
|
||||
{ "iso-8859-2", 28592, yes },
|
||||
{ "iso-ir-101", 28592, yes },
|
||||
{ "iso8859-2", 28592, yes },
|
||||
{ "iso_8859-2", 28592, yes },
|
||||
{ "iso_8859-2:1987", 28592, yes },
|
||||
{ "l2", 28592, yes },
|
||||
{ "latin2", 28592, yes },
|
||||
{ "csisolatin3", 28593, yes },
|
||||
{ "iso-8859-3", 28593, yes },
|
||||
{ "iso-ir-109", 28593, yes },
|
||||
{ "iso_8859-3", 28593, yes },
|
||||
{ "iso_8859-3:1988", 28593, yes },
|
||||
{ "l3", 28593, yes },
|
||||
{ "latin3", 28593, yes },
|
||||
{ "csisolatin4", 28594, yes },
|
||||
{ "iso-8859-4", 28594, yes },
|
||||
{ "iso-ir-110", 28594, yes },
|
||||
{ "iso_8859-4", 28594, yes },
|
||||
{ "iso_8859-4:1988", 28594, yes },
|
||||
{ "l4", 28594, yes },
|
||||
{ "latin4", 28594, yes },
|
||||
{ "csisolatincyrillic", 28595, yes },
|
||||
{ "cyrillic", 28595, yes },
|
||||
{ "iso-8859-5", 28595, yes },
|
||||
{ "iso-ir-144", 28595, yes },
|
||||
{ "iso_8859-5", 28595, yes },
|
||||
{ "iso_8859-5:1988", 28595, yes },
|
||||
{ "arabic", 28596, yes },
|
||||
{ "csisolatinarabic", 28596, yes },
|
||||
{ "ecma-114", 28596, yes },
|
||||
{ "iso-8859-6", 28596, yes },
|
||||
{ "iso-ir-127", 28596, yes },
|
||||
{ "iso_8859-6", 28596, yes },
|
||||
{ "iso_8859-6:1987", 28596, yes },
|
||||
{ "csisolatingreek", 28597, yes },
|
||||
{ "ecma-118", 28597, yes },
|
||||
{ "elot_928", 28597, yes },
|
||||
{ "greek", 28597, yes },
|
||||
{ "greek8", 28597, yes },
|
||||
{ "iso-8859-7", 28597, yes },
|
||||
{ "iso-ir-126", 28597, yes },
|
||||
{ "iso_8859-7", 28597, yes },
|
||||
{ "iso_8859-7:1987", 28597, yes },
|
||||
{ "csisolatinhebrew", 28598, yes },
|
||||
{ "hebrew", 28598, yes },
|
||||
{ "iso-8859-8", 28598, yes },
|
||||
{ "iso-ir-138", 28598, yes },
|
||||
{ "iso_8859-8", 28598, yes },
|
||||
{ "iso_8859-8:1988", 28598, yes },
|
||||
{ "logical", 28598, yes },
|
||||
{ "visual", 28598, yes },
|
||||
{ "csisolatin5", 28599, yes },
|
||||
{ "iso-8859-9", 28599, yes },
|
||||
{ "iso-ir-148", 28599, yes },
|
||||
{ "iso_8859-9", 28599, yes },
|
||||
{ "iso_8859-9:1989", 28599, yes },
|
||||
{ "l5", 28599, yes },
|
||||
{ "latin5", 28599, yes },
|
||||
{ "iso-8859-13", 28603, yes },
|
||||
{ "csisolatin9", 28605, yes },
|
||||
{ "iso-8859-15", 28605, yes },
|
||||
{ "iso_8859-15", 28605, yes },
|
||||
{ "l9", 28605, yes },
|
||||
{ "latin9", 28605, yes },
|
||||
{ "x-europa", 29001, yes },
|
||||
{ "iso-8859-8-i", 38598, yes },
|
||||
{ "iso-2022-jp", 50220, no },
|
||||
{ "csiso2022jp", 50221, no },
|
||||
{ "csiso2022kr", 50225, no },
|
||||
{ "iso-2022-kr", 50225, no },
|
||||
{ "iso-2022-kr-7", 50225, no },
|
||||
{ "iso-2022-kr-7bit", 50225, no },
|
||||
{ "cp50227", 50227, no },
|
||||
{ "x-cp50227", 50227, no },
|
||||
{ "cp930", 50930, yes },
|
||||
{ "x-ebcdic-japaneseanduscanada", 50931, yes },
|
||||
{ "cp933", 50933, yes },
|
||||
{ "cp935", 50935, yes },
|
||||
{ "cp937", 50937, yes },
|
||||
{ "cp939", 50939, yes },
|
||||
{ "cseucpkdfmtjapanese", 51932, yes },
|
||||
{ "euc-jp", 51932, yes },
|
||||
{ "extended_unix_code_packed_format_for_japanese", 51932, yes },
|
||||
{ "iso-2022-jpeuc", 51932, yes },
|
||||
{ "x-euc", 51932, yes },
|
||||
{ "x-euc-jp", 51932, yes },
|
||||
{ "euc-cn", 51936, yes },
|
||||
{ "x-euc-cn", 51936, yes },
|
||||
{ "cseuckr", 51949, yes },
|
||||
{ "euc-kr", 51949, yes },
|
||||
{ "iso-2022-kr-8", 51949, yes },
|
||||
{ "iso-2022-kr-8bit", 51949, yes },
|
||||
{ "hz-gb-2312", 52936, no },
|
||||
{ "gb18030", 54936, yes },
|
||||
{ "x-iscii-de", 57002, yes },
|
||||
{ "x-iscii-be", 57003, yes },
|
||||
{ "x-iscii-ta", 57004, yes },
|
||||
{ "x-iscii-te", 57005, yes },
|
||||
{ "x-iscii-as", 57006, yes },
|
||||
{ "x-iscii-or", 57007, yes },
|
||||
{ "x-iscii-ka", 57008, yes },
|
||||
{ "x-iscii-ma", 57009, yes },
|
||||
{ "x-iscii-gu", 57010, yes },
|
||||
{ "x-iscii-pa", 57011, yes },
|
||||
{ "csunicode11utf7", 65000, no },
|
||||
{ "unicode-1-1-utf-7", 65000, no },
|
||||
{ "unicode-2-0-utf-7", 65000, no },
|
||||
{ "utf-7", 65000, no },
|
||||
{ "x-unicode-1-1-utf-7", 65000, no },
|
||||
{ "x-unicode-2-0-utf-7", 65000, no },
|
||||
{ "unicode-1-1-utf-8", 65001, yes },
|
||||
{ "unicode-2-0-utf-8", 65001, yes },
|
||||
{ "utf-8", 65001, yes },
|
||||
{ "x-unicode-1-1-utf-8", 65001, yes },
|
||||
{ "x-unicode-2-0-utf-8", 65001, yes },
|
||||
|
||||
/* final entry */
|
||||
{ NULL, 0, no }
|
||||
};
|
||||
|
||||
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator, ctmbstr encoding)
|
||||
{
|
||||
uint i;
|
||||
tmbstr enc;
|
||||
|
||||
/* ensure name is in lower case */
|
||||
enc = TY_(tmbstrdup)(allocator,encoding);
|
||||
enc = TY_(tmbstrtolower)(enc);
|
||||
|
||||
for (i = 0; NameWinCPMap[i].name; ++i)
|
||||
{
|
||||
if (TY_(tmbstrcmp)(NameWinCPMap[i].name, enc) == 0)
|
||||
{
|
||||
IMLangConvertCharset * p = NULL;
|
||||
uint wincp = NameWinCPMap[i].wincp;
|
||||
HRESULT hr;
|
||||
|
||||
TidyFree(allocator, enc);
|
||||
|
||||
/* currently no support for unsafe encodings */
|
||||
if (!NameWinCPMap[i].safe)
|
||||
return 0;
|
||||
|
||||
/* hack for config.c */
|
||||
CoInitialize(NULL);
|
||||
hr = CreateMLangObject(p);
|
||||
|
||||
if (hr != S_OK || !p)
|
||||
{
|
||||
wincp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
hr = IMLangConvertCharset_Initialize(p, wincp, 1200, 0);
|
||||
|
||||
if (hr != S_OK)
|
||||
wincp = 0;
|
||||
|
||||
IMLangConvertCharset_Release(p);
|
||||
p = NULL;
|
||||
}
|
||||
|
||||
CoUninitialize();
|
||||
|
||||
return wincp;
|
||||
}
|
||||
}
|
||||
|
||||
TidyFree(allocator, enc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp)
|
||||
{
|
||||
IMLangConvertCharset * p = NULL;
|
||||
HRESULT hr;
|
||||
|
||||
assert( in != NULL );
|
||||
|
||||
CoInitialize(NULL);
|
||||
|
||||
if (wincp == 0)
|
||||
{
|
||||
/* no codepage found for this encoding */
|
||||
return no;
|
||||
}
|
||||
|
||||
hr = CreateMLangObject(p);
|
||||
|
||||
if (hr != S_OK || !p)
|
||||
{
|
||||
/* MLang not supported */
|
||||
return no;
|
||||
}
|
||||
|
||||
hr = IMLangConvertCharset_Initialize(p, wincp, 1200, 0);
|
||||
|
||||
if (hr != S_OK)
|
||||
{
|
||||
/* encoding not supported, insufficient memory, etc. */
|
||||
return no;
|
||||
}
|
||||
|
||||
in->mlang = p;
|
||||
|
||||
return yes;
|
||||
}
|
||||
|
||||
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in)
|
||||
{
|
||||
IMLangConvertCharset * p;
|
||||
|
||||
assert( in != NULL );
|
||||
|
||||
p = (IMLangConvertCharset *)in->mlang;
|
||||
if (p)
|
||||
{
|
||||
IMLangConvertCharset_Release(p);
|
||||
p = NULL;
|
||||
in->mlang = NULL;
|
||||
}
|
||||
|
||||
CoUninitialize();
|
||||
}
|
||||
|
||||
#if 0
|
||||
Bool Win32MLangInitOutputTranscoder(TidyAllocator *allocator, StreamOut * out, tmbstr encoding)
|
||||
{
|
||||
IMLangConvertCharset * p = NULL;
|
||||
HRESULT hr;
|
||||
uint wincp;
|
||||
|
||||
assert( out != NULL );
|
||||
|
||||
CoInitialize(NULL);
|
||||
|
||||
wincp = TY_(Win32MLangGetCPFromName)(allocator, encoding);
|
||||
if (wincp == 0)
|
||||
{
|
||||
/* no codepage found for this encoding */
|
||||
return no;
|
||||
}
|
||||
|
||||
hr = CreateMLangObject(p);
|
||||
|
||||
if (hr != S_OK || !p)
|
||||
{
|
||||
/* MLang not supported */
|
||||
return no;
|
||||
}
|
||||
|
||||
IMLangConvertCharset_Initialize(p, 1200, wincp, MLCONVCHARF_NOBESTFITCHARS);
|
||||
|
||||
if (hr != S_OK)
|
||||
{
|
||||
/* encoding not supported, insufficient memory, etc. */
|
||||
return no;
|
||||
}
|
||||
|
||||
out->mlang = p;
|
||||
|
||||
return yes;
|
||||
}
|
||||
|
||||
void Win32MLangUninitOutputTranscoder(StreamOut * out)
|
||||
{
|
||||
IMLangConvertCharset * p;
|
||||
|
||||
assert( out != NULL );
|
||||
|
||||
p = (IMLangConvertCharset *)out->mlang;
|
||||
if (p)
|
||||
{
|
||||
IMLangConvertCharset_Release(p);
|
||||
p = NULL;
|
||||
out->mlang = NULL;
|
||||
}
|
||||
|
||||
CoUninitialize();
|
||||
}
|
||||
#endif
|
||||
|
||||
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead)
|
||||
{
|
||||
IMLangConvertCharset * p;
|
||||
TidyInputSource * source;
|
||||
CHAR inbuf[TC_INBUFSIZE] = { 0 };
|
||||
WCHAR outbuf[TC_OUTBUFSIZE] = { 0 };
|
||||
HRESULT hr = S_OK;
|
||||
size_t inbufsize = 0;
|
||||
|
||||
assert( in != NULL );
|
||||
assert( &in->source != NULL );
|
||||
assert( bytesRead != NULL );
|
||||
assert( in->mlang != NULL );
|
||||
|
||||
p = (IMLangConvertCharset *)in->mlang;
|
||||
source = &in->source;
|
||||
|
||||
inbuf[inbufsize++] = (CHAR)firstByte;
|
||||
|
||||
while(inbufsize < TC_INBUFSIZE)
|
||||
{
|
||||
UINT outbufsize = TC_OUTBUFSIZE;
|
||||
UINT readNow = inbufsize;
|
||||
int nextByte = EndOfStream;
|
||||
|
||||
hr = IMLangConvertCharset_DoConversionToUnicode(p, inbuf, &readNow, outbuf, &outbufsize);
|
||||
|
||||
assert( hr == S_OK );
|
||||
assert( outbufsize <= 2 );
|
||||
|
||||
if (outbufsize == 2)
|
||||
{
|
||||
/* U+10000-U+10FFFF are returned as a pair of surrogates */
|
||||
tchar m = (tchar)outbuf[0];
|
||||
tchar n = (tchar)outbuf[1];
|
||||
assert( TY_(IsHighSurrogate)(n) && TY_(IsLowSurrogate)(m) );
|
||||
*bytesRead = readNow;
|
||||
return (int)TY_(CombineSurrogatePair)(n, m);
|
||||
}
|
||||
|
||||
if (outbufsize == 1)
|
||||
{
|
||||
/* we found the character */
|
||||
/* set bytesRead and return */
|
||||
*bytesRead = readNow;
|
||||
return (int)outbuf[0];
|
||||
}
|
||||
|
||||
/* we need more bytes */
|
||||
nextByte = source->getByte(source->sourceData);
|
||||
|
||||
if (nextByte == EndOfStream)
|
||||
{
|
||||
/* todo: error message for broken stream? */
|
||||
|
||||
*bytesRead = readNow;
|
||||
return EndOfStream;
|
||||
}
|
||||
|
||||
inbuf[inbufsize++] = (CHAR)nextByte;
|
||||
}
|
||||
|
||||
/* No full character found after reading TC_INBUFSIZE bytes, */
|
||||
/* give up to read this stream, it's obviously unreadable. */
|
||||
|
||||
/* todo: error message for broken stream? */
|
||||
return EndOfStream;
|
||||
}
|
||||
|
||||
Bool Win32MLangIsConvertible(tchar c, StreamOut * out)
|
||||
{
|
||||
IMLangConvertCharset * p;
|
||||
UINT i = 1;
|
||||
HRESULT hr;
|
||||
WCHAR inbuf[2] = { 0 };
|
||||
UINT inbufsize = 0;
|
||||
|
||||
assert( c != 0 );
|
||||
assert( c <= 0x10FFFF );
|
||||
assert( out != NULL );
|
||||
assert( out->mlang != NULL );
|
||||
|
||||
if (c > 0xFFFF)
|
||||
{
|
||||
tchar high = 0;
|
||||
tchar low = 0;
|
||||
|
||||
TY_(SplitSurrogatePair)(c, &low, &high);
|
||||
|
||||
inbuf[inbufsize++] = (WCHAR)low;
|
||||
inbuf[inbufsize++] = (WCHAR)high;
|
||||
}
|
||||
else
|
||||
inbuf[inbufsize++] = (WCHAR)c;
|
||||
|
||||
p = (IMLangConvertCharset *)out->mlang;
|
||||
hr = IMLangConvertCharset_DoConversionFromUnicode(p, inbuf, &inbufsize, NULL, NULL);
|
||||
|
||||
return hr == S_OK ? yes : no;
|
||||
}
|
||||
|
||||
void Win32MLangPutChar(tchar c, StreamOut * out, uint * bytesWritten)
|
||||
{
|
||||
IMLangConvertCharset * p;
|
||||
TidyOutputSink * sink;
|
||||
CHAR outbuf[TC_OUTBUFSIZE] = { 0 };
|
||||
UINT outbufsize = TC_OUTBUFSIZE;
|
||||
HRESULT hr = S_OK;
|
||||
WCHAR inbuf[2] = { 0 };
|
||||
UINT inbufsize = 0;
|
||||
uint i;
|
||||
|
||||
assert( c != 0 );
|
||||
assert( c <= 0x10FFFF );
|
||||
assert( bytesWritten != NULL );
|
||||
assert( out != NULL );
|
||||
assert( &out->sink != NULL );
|
||||
assert( out->mlang != NULL );
|
||||
|
||||
p = (IMLangConvertCharset *)out->mlang;
|
||||
sink = &out->sink;
|
||||
|
||||
if (c > 0xFFFF)
|
||||
{
|
||||
tchar high = 0;
|
||||
tchar low = 0;
|
||||
|
||||
TY_(SplitSurrogatePair)(c, &low, &high);
|
||||
|
||||
inbuf[inbufsize++] = (WCHAR)low;
|
||||
inbuf[inbufsize++] = (WCHAR)high;
|
||||
}
|
||||
else
|
||||
inbuf[inbufsize++] = (WCHAR)c;
|
||||
|
||||
hr = IMLangConvertCharset_DoConversionFromUnicode(p, inbuf, &inbufsize, outbuf, &outbufsize);
|
||||
|
||||
assert( hr == S_OK );
|
||||
assert( outbufsize > 0 );
|
||||
assert( inbufsize == 1 || inbufsize == 2 );
|
||||
|
||||
for (i = 0; i < outbufsize; ++i)
|
||||
sink->putByte(sink->sinkData, (byte)(outbuf[i]));
|
||||
|
||||
*bytesWritten = outbufsize;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#endif /* TIDY_WIN32_MLANG_SUPPORT */
|
||||
|
||||
/*
|
||||
* local variables:
|
||||
* mode: c
|
||||
* indent-tabs-mode: nil
|
||||
* c-basic-offset: 4
|
||||
* eval: (c-set-offset 'substatement-open 0)
|
||||
* end:
|
||||
*/
|
19
src/win32tc.h
Normal file
19
src/win32tc.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef __WIN32TC_H__
|
||||
#define __WIN32TC_H__
|
||||
#ifdef TIDY_WIN32_MLANG_SUPPORT
|
||||
|
||||
/* win32tc.h -- Interface to Win32 transcoding routines
|
||||
|
||||
(c) 1998-2006 (W3C) MIT, ERCIM, Keio University
|
||||
See tidy.h for the copyright notice.
|
||||
|
||||
$Id: win32tc.h,v 1.3 2006/12/29 16:31:09 arnaud02 Exp $
|
||||
*/
|
||||
|
||||
uint TY_(Win32MLangGetCPFromName)(TidyAllocator *allocator,ctmbstr encoding);
|
||||
Bool TY_(Win32MLangInitInputTranscoder)(StreamIn * in, uint wincp);
|
||||
void TY_(Win32MLangUninitInputTranscoder)(StreamIn * in);
|
||||
int TY_(Win32MLangGetChar)(byte firstByte, StreamIn * in, uint * bytesRead);
|
||||
|
||||
#endif /* TIDY_WIN32_MLANG_SUPPORT */
|
||||
#endif /* __WIN32TC_H__ */
|
9
test/accessTest/1-1-1-1.html
Normal file
9
test/accessTest/1-1-1-1.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.1</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="noAlt.jpg">
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-1-10.html
Normal file
9
test/accessTest/1-1-1-10.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.1</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="hasAlt.gif" alt="0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789">
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-1-2.html
Normal file
9
test/accessTest/1-1-1-2.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.1</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="gifimage.gif" alt="gifimage.gif">
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-1-3.html
Normal file
9
test/accessTest/1-1-1-3.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.1</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="bytesImage.gif" alt="34K bytes">
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-1-4.html
Normal file
9
test/accessTest/1-1-1-4.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.1</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="animage.gif" alt="{short description of image}">
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-10-1.html
Normal file
9
test/accessTest/1-1-10-1.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.10</title>
|
||||
</head>
|
||||
<body>
|
||||
<script><!-- do nothing --></script>
|
||||
</body>
|
||||
</html>
|
23
test/accessTest/1-1-12-1.html
Normal file
23
test/accessTest/1-1-12-1.html
Normal file
|
@ -0,0 +1,23 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.12</title>
|
||||
</head>
|
||||
<body>
|
||||
<pre>
|
||||
% __ __ __ __ __ __ __ __ __ __ __ __ __ __
|
||||
100 | * |
|
||||
90 | * * |
|
||||
80 | * * |
|
||||
70 | @ * |
|
||||
60 | @ * |
|
||||
50 | * @ * |
|
||||
40 | @ * |
|
||||
30 | * @ @ @ * |
|
||||
20 | |
|
||||
10 | @ @ @ @ @ |
|
||||
0 5 10 15 20 25 30 35 40 45 50 55 60 65 70
|
||||
Flash frequency (Hertz)
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-2-1.html
Normal file
9
test/accessTest/1-1-2-1.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.2</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="pie-chart.jpg" alt="Pie chart of federal expenditures">
|
||||
</body>
|
||||
</html>
|
9
test/accessTest/1-1-2-2.html
Normal file
9
test/accessTest/1-1-2-2.html
Normal file
|
@ -0,0 +1,9 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>bobby/g13</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="pie-chart.jpg" longdesc="pie-chart.html" alt="Pie chart of federal expenditures">
|
||||
</body>
|
||||
</html>
|
10
test/accessTest/1-1-2-3.html
Normal file
10
test/accessTest/1-1-2-3.html
Normal file
|
@ -0,0 +1,10 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.2</title>
|
||||
</head>
|
||||
<body>
|
||||
<img src="pie-chart.jpg" alt="Pie chart of federal expenditures">
|
||||
<a href="pie-chart.html">D</a>
|
||||
</body>
|
||||
</html>
|
11
test/accessTest/1-1-3-1.html
Normal file
11
test/accessTest/1-1-3-1.html
Normal file
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>aert1.0/1.1.3</title>
|
||||
</head>
|
||||
<body>
|
||||
<form action="">
|
||||
<input type="image" name="submit" src="submit.jpg">
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue