From: Klaus Ethgen Date: Thu, 5 May 2016 11:26:05 +0000 (+0100) Subject: Removed some obsolete tools X-Git-Tag: v1.3~31 X-Git-Url: http://geeqie.org/cgi-bin/gitweb.cgi?p=geeqie.git;a=commitdiff_plain;h=82a1755fd2387f5b400df78949fe67e0d031ad65 Removed some obsolete tools --- diff --git a/doc/download.sh b/doc/download.sh deleted file mode 100755 index 4d915462..00000000 --- a/doc/download.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -unset LANG -PAGES=`curl "http://sourceforge.net/apps/trac/geeqie/wiki/TitleIndex" | \ - sed -e "s|>|>\n|g" |grep 'href=.*/geeqie/wiki/Guide'|sed -e 's|.*/wiki/Guide\([a-zA-Z0-9]*\).*|Guide\1|'` - -mkdir wiki - -for p in $PAGES ; do - curl "http://sourceforge.net/apps/trac/geeqie/wiki/$p?format=txt" > wiki/$p -done - - - \ No newline at end of file diff --git a/doc/wiki2docbook.py b/doc/wiki2docbook.py deleted file mode 100755 index 398a9151..00000000 --- a/doc/wiki2docbook.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/python -# -# This script converts trac wiki to docbook -# wiki pages must be in wiki/ directory and their names must start with "Guide" -# the first page is named GuideIndex -# output is written to docbook/ directory -# -# based on the following scripts: -# -# http://trac-hacks.org/wiki/Page2DocbookPlugin -# http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py -# -# see the links above for a list of requirements - - -import sys -import os -from trac.test import EnvironmentStub, Mock, MockPerm -from trac.mimeview import Context -from trac.wiki.formatter import HtmlFormatter -from trac.wiki.model import WikiPage -from trac.web.href import Href - -import urllib -from tidy import parseString -import libxml2 -import libxslt -import re - -datadir = os.getcwd() + "/wiki2docbook" - - -xhtml2dbXsl = u""" - - - - - - -""" - -normalizedHeadingsXsl = u""" - - - - - - -""" - -normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl) -normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc) - -xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl) -xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc) - -def html2docbook(html): - - options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8') - xhtml = parseString(html.encode("utf-8"), **options) - - xhtml_xmldoc = libxml2.parseDoc(str(xhtml)) - - xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None) - - nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc) - - docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None) - - dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc) - - xhtml_xmldoc.freeDoc() - xhtml2_xmldoc.freeDoc() - docbook_xmldoc.freeDoc() - return dbstring.decode('utf-8') - - -text = {} #wiki text -depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2 -parent = {}#parent document (if depth > 0) -inner = {} #defined for documents that are parents - -#top element indexed by depth -top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ] - -env = EnvironmentStub() -req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'), - authname='anonymous', perm=MockPerm(), args={}) -context = Context.from_request(req, 'wiki') - - -def read_file(name): - text[name] = file("wiki/" + name).read().decode('utf-8') - page = WikiPage(env) - page.name = name - page.text = '--' - page.save('', '', '::1', 0) - - -def read_index(): - index_name = "GuideIndex" - read_file(index_name) - index_text = text[index_name] - depth[index_name] = 0 - inner[index_name] = 1 - - stack = [ index_name , '', '', '' ] - - for line in index_text.splitlines() : - match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line) - if match: - name = match.group(2) - d = len(match.group(1)) / 2 - if (d > 0): - depth[name] = d - parent[name] = stack[d - 1] - inner[stack[d - 1]] = 1 - stack[d] = name - read_file(name) - -# exclude links with depth > 1 from wiki text, they will be included indirectly -def filter_out_indirect(text): - out = "" - for line in text.splitlines() : - match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line) - d = 1 - if match: - d = len(match.group(1)) / 2 - if (d == 1): - out = out + line + "\n" - return out - -def process_pages(): - for name in text.keys(): - txt = text[name] - - if name in inner: - txt = filter_out_indirect(txt) - - html = HtmlFormatter(env, context, txt).generate() - - html = html.replace("/wiki/Guide", "#Guide") - - top = top_element[depth[name]] - db = html2docbook(html) - - if name in inner: - # replace list items with XIncludes, FIXME: this is ugly - r = re.compile(']*>') - db = r.sub(r'', db); - - r = re.compile('') - db = r.sub(r'', db); - - r = re.compile('\s*\s*[^<]*\s*\s*') - db = r.sub(r'\n', db); - - - db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">") - db = db.replace("", "") - - open("docbook/" + name + ".xml", "w").write(db.encode('utf-8')) - - -read_index() -process_pages() - - - diff --git a/doc/wiki2docbook/headingsNormalizer.about.txt b/doc/wiki2docbook/headingsNormalizer.about.txt deleted file mode 100644 index 72e58470..00000000 --- a/doc/wiki2docbook/headingsNormalizer.about.txt +++ /dev/null @@ -1,8 +0,0 @@ -Credits: Filipe Correia - -This stylesheet can be applied to xhtml documents. It ensures one one -h1 element exists per document. If the input document has only one h1 -element it just copies all nodes to the output, otherwise it adds a -new top level (h1) heading and depromotes every existing heading to a -lower level (ie, h1s turn into h2s, h2s turn into h3s, etc). - diff --git a/doc/wiki2docbook/headingsNormalizer/headingsNormalizer.xsl b/doc/wiki2docbook/headingsNormalizer/headingsNormalizer.xsl deleted file mode 100644 index cd807ed2..00000000 --- a/doc/wiki2docbook/headingsNormalizer/headingsNormalizer.xsl +++ /dev/null @@ -1,90 +0,0 @@ - - - - - - - - - - - - - - - -

- - -
- - -

- -

-
- - -

- -

-
- - -

- -

-
- - -
- -
-
- - -
- -
-
- - -
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/doc/wiki2docbook/html2db.about.txt b/doc/wiki2docbook/html2db.about.txt deleted file mode 100644 index d9b8efdb..00000000 --- a/doc/wiki2docbook/html2db.about.txt +++ /dev/null @@ -1,5 +0,0 @@ -Credits: Oliver Steele -http://osteele.com/projects/ -http://osteele.com/software/xslt/html2db/ - -html2db.xsl converts an XHTML source document into a Docbook output document. It provides features for customizing the generation of the output, so that the output can be tuned by annotating the source, rather than hand-editing the output. This makes it useful in a processing pipeline where the source documents are maintained in HTML, although it can be used as a one-time conversion tool too. diff --git a/doc/wiki2docbook/html2db/LICENSE.txt b/doc/wiki2docbook/html2db/LICENSE.txt deleted file mode 100644 index 858c2ab3..00000000 --- a/doc/wiki2docbook/html2db/LICENSE.txt +++ /dev/null @@ -1,133 +0,0 @@ -Preamble --------- -The intent of this document is to state the conditions under which a -Package may be copied, such that the Copyright Holder maintains some -semblance of artistic control over the development of the package, -while giving the users of the package the right to use and distribute -the Package in a more-or-less customary fashion, plus the right to -make reasonable modifications. - -Definitions ------------ -"Package" refers to the collection of files distributed by the -Copyright Holder, and derivatives of that collection of files created -through textual modification. - -"Standard Version" refers to such a Package if it has not been -modified, or has been modified in accordance with the wishes of the -Copyright Holder as specified below. - -"Copyright Holder" is whoever is named in the copyright or copyrights -for the package. - -"You" is you, if you're thinking about copying or distributing this -Package. - -"Reasonable copying fee" is whatever you can justify on the basis of -media cost, duplication charges, time of people involved, and so -on. (You will not be required to justify it to the Copyright Holder, -but only to the computing community at large as a market that must -bear the fee.) - -"Freely Available" means that no fee is charged for the item itself, -though there may be fees involved in handling the item. It also means -that recipients of the item may redistribute it under the same -conditions they received it. - - -You may make and give away verbatim copies of the source form of the -Standard Version of this Package without restriction, provided that -you duplicate all of the original copyright notices and associated -disclaimers. - - -You may apply bug fixes, portability fixes and other modifications -derived from the Public Domain or from the Copyright Holder. A Package -modified in such a way shall still be considered the Standard Version. - - -You may otherwise modify your copy of this Package in any way, -provided that you insert a prominent notice in each changed file -stating how and when you changed that file, and provided that you do -at least ONE of the following: - -- place your modifications in the Public Domain or otherwise make them - Freely Available, such as by posting said modifications to Usenet or - an equivalent medium, or placing the modifications on a major - archive site such as uunet.uu.net, or by allowing the Copyright - Holder to include your modifications in the Standard Version of the - Package. -- use the modified Package only within your corporation or - organization. -- rename any non-standard executables so the names do not conflict - with standard executables, which must also be provided, and provide - a separate manual page for each non-standard executable that clearly - documents how it differs from the Standard Version. -- make other distribution arrangements with the Copyright Holder. - - -You may distribute the programs of this Package in object code or -executable form, provided that you do at least ONE of the following: - -- distribute a Standard Version of the executables and library files, - together with instructions (in the manual page or equivalent) on - where to get the Standard Version. -- accompany the distribution with the machine-readable source of the - Package with your modifications. -- give non-standard executables non-standard names, and clearly - document the differences in manual pages (or equivalent), together - with instructions on where to get the Standard Version. -- make other distribution arrangements with the Copyright Holder. - - -You may charge a reasonable copying fee for any distribution of this -Package. You may charge any fee you choose for support of this -Package. You may not charge a fee for this Package itself. However, -you may distribute this Package in aggregate with other (possibly -commercial) programs as part of a larger (possibly commercial) -software distribution provided that you do not advertise this Package -as a product of your own. You may embed this Package's interpreter -within an executable of yours (by linking); this shall be construed as -a mere form of aggregation, provided that the complete Standard -Version of the interpreter is so embedded. - - -The scripts and library files supplied as input to or produced as -output from the programs of this Package do not automatically fall -under the copyright of this Package, but belong to whomever generated -them, and may be sold commercially, and may be aggregated with this -Package. If such scripts or library files are aggregated with this -Package via the so-called "undump" or "unexec" methods of producing a -binary executable image, then distribution of such an image shall -neither be construed as a distribution of this Package nor shall it -fall under the restrictions of Paragraphs 3 and 4, provided that you -do not represent such an executable image as a Standard Version of -this Package. - - -C subroutines (or comparably compiled subroutines in other languages) -supplied by you and linked into this Package in order to emulate -subroutines and variables of the language defined by this Package -shall not be considered part of this Package, but are the equivalent -of input as in Paragraph 6, provided these subroutines do not change -the language in any way that would cause it to fail the regression -tests for the language. - - -Aggregation of this Package with a commercial distribution is always -permitted provided that the use of this Package is embedded; that is, -when no overt attempt is made to make this Package's interfaces -visible to the end user of the commercial distribution. Such use shall -not be construed as a distribution of this Package. - - -The name of the Copyright Holder may not be used to endorse or promote -products derived from this software without specific prior written -permission. - - -THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -The End diff --git a/doc/wiki2docbook/html2db/ar01s02.html b/doc/wiki2docbook/html2db/ar01s02.html deleted file mode 100644 index 246b0000..00000000 --- a/doc/wiki2docbook/html2db/ar01s02.html +++ /dev/null @@ -1,10 +0,0 @@ -Features

Features

XSLT implementation

This tool is designed to be embedded within an XSLT processing -pipeline. html2html.xslt can be used in a custom -stylesheet or integrated into a larger system. See Overriding.

Customizable

The output can be customized by the means of additonal markup in -the XHMTL source. See the section on customization.

Creates outline structure

h1, h2, etc. are turned into nested -section and title elements (as opposed to -bridge heads).

Accepts a wide variety of XHTML

In particular, html2db.xsl automatically wraps naked item -text (text that is not enclosed in a <p>) -inside a table cell or list item. Naked text is a common property of -XHTML documents, but needs to be clothed to create valid -Docbook.[1]



[1] This feature is limited. See Implicit Blocks.)

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s03.html b/doc/wiki2docbook/html2db/ar01s03.html deleted file mode 100644 index 9ad1ad9c..00000000 --- a/doc/wiki2docbook/html2db/ar01s03.html +++ /dev/null @@ -1,3 +0,0 @@ -Requirements

Requirements

  • Java: JRE or JDK 1.3 or greater.

  • Xalan 2.5.0.

  • Familiarity with installing and running JAR files.

html2db.xsl might work with earlier versions of Java and Xalan, and -it might work with other XSLT processors such as Saxon and -xsltproc.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s04.html b/doc/wiki2docbook/html2db/ar01s04.html deleted file mode 100644 index dacfed27..00000000 --- a/doc/wiki2docbook/html2db/ar01s04.html +++ /dev/null @@ -1 +0,0 @@ -License

License

This software is released under the Open Source Artistic License.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s05.html b/doc/wiki2docbook/html2db/ar01s05.html deleted file mode 100644 index 807fa7ea..00000000 --- a/doc/wiki2docbook/html2db/ar01s05.html +++ /dev/null @@ -1 +0,0 @@ -Installation

Installation

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s06.html b/doc/wiki2docbook/html2db/ar01s06.html deleted file mode 100644 index 1b40f538..00000000 --- a/doc/wiki2docbook/html2db/ar01s06.html +++ /dev/null @@ -1,9 +0,0 @@ -Usage

Usage

Use Xalan to process an XHTML source file into a Docbook file:

-java org.apache.xalan.xslt.Process -XSL html2dbk.xsl -IN doc.html > doc.xml
-

See index.src.html for an -example of an input file.

If your source files are in HTML, not XHTML, you may find the Tidy tool useful. This is a -tool that converts from HTML to XHTML, and can be added to the front -of your processing pipeline.

(If you need to process HTML and you don't know or can't figure out -from context what a processing pipeline is, html2db.xsl is probably not -the right tool for you, and you should look for a local XML or Java -guru or for a commercially supported product.)

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s07.html b/doc/wiki2docbook/html2db/ar01s07.html deleted file mode 100644 index 455313a0..00000000 --- a/doc/wiki2docbook/html2db/ar01s07.html +++ /dev/null @@ -1,73 +0,0 @@ -Specification

Specification

XHTML Elements

code/i stands for "an i element -immediately within a code element". This notation is -from XPath.

XHTML elements must be in the XHTML Transitional namespace, -http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd.

XHTMLDocbookNotes
b, i, em, strongemphasisThe role attribute is the original tag name
dfnglossitem, and also primaryindexterm 
code/i, tt/i, pre/ireplaceableIn practice, i within a monospace content is usually used to mean replaceable text. If you're using it for emphasis, use em instead.
pre, body/codeprogramlisting 
imginlinemediaobject/imageobject/imagedataIn an inline context.
img[informal]figure/mediaobject/imageobject/imagedataIf it has a title attribute or db:title it's wrapped in a figure. Otherwise it's wrapped in an informalfigure.
table[informal]tableXHTML table becomes Docbook table if it has a summary attribute; informaltable otherwise.
ulitemizedlistBut see the processing instruction below.

Links

Table 1. Link Translation

XHTMLDocbookNotes
<a name="name"><anchor id="{$anchor-id-prefix}name">An anchor within a hn element is attached to the enclosing section as an id attribute instead.
<a href="#name"><link linkend="{$anchor-id-prefix}name"> 
<a href="url"><ulink url="name"> 
<a name="mailto:address"><email>address</email> 

Tables

XHTML table support is minimal. html2db.xsl changes the -element names and counts the columns (this is necessary to get table -footnotes to span all the columns), but it does not attempt to deal -with tables in their full generality.

An XHTML table with a summary attribute -generates a table, whose title is the value -of that summary. An XHTML table without a -summary generates an informaltable.

Any trs that contain ths are pulled to -the top of the table, and placed inside a thead. Other -trs are placed inside a tbody. This matches -the commanon XHTML table pattern, where the first row is -a header row.

Implicit Blocks

XHTML allows li, dd, and td -elements to contain either inline text (for instance, -<li>a list item</li>) or block structure -(<li><p>a block</p></li>). The -corresponding Docbook elements require block structure, such as -para.

html2db.xsl provides limited support for wrapping naked text in -these positions in para elements. If a list item or -table cell item directly contains text, all text up to the position of -the first element (or all text, if there is no element) is wrapped in -para. This handles the simple case of an item that -directly contains text, and also the case of an item that contains -text followed by blocks such as paragraphs.

Note that this algorithm is easily confused. It doesn't -distinguish between block and inline XHTML elements, so it will only -wrap the first word in <li>some <b>bold</b> -text</li>, leading to badly formatted output. Twhe -workaround is to wrap troublesome content in explicit -<p> tags.

Docbook Elements

Elements from the Docbook namespace are passed through as is. -There are two ways to include a Docbook element in your XHTML -source:

Global prefix

A fake Docbook namespace[2] - -declaration may be added to the document root element. Anywhere in -the document, the prefix from this namespace declaration may be used -to include a Docbook element. This is useful if a document contains -many Docbook elements, such as footnote or -glossterm, interspersed with XHTML. (In this case it may -be more convenient to allow these elements in the XHMTL namespace and -add a customization layer that translates them to docbook elements, -however. See Customization.)

-<html xmlns="http://www.w3.org/1999/xhtml"
-      xmlns:db="urn:docbook">
-  ...
-  <p>Some text<db:footnote>and a footnote</db:footnote>.</p>
-
Local namespace

A Docbook element may be introduced along with a prefix-less -namespace declaration. This is useful for embedding a Docbook -document fragment (a hierarchy of elements that all use Docbook tags) -within of a XHTML document.

-  ...
-  <articleinfo xmlns="urn:docbook">
-    <author>
-      <firstname>...</firstname>
-  ...
-

The source to this document -illustrates both of these techniques.

Note

Both these techniques will cause your document to be -invalid as XHTML. In order to validate an XHTML document that -contains Docbook elements, you will need to create a custom schema. -Technically, you then ought to place your document in a different -namespace, but this will cause html2db.xsl not to recognize it!

Output Processing Instructions

html2db.xsl adds a few of processing instructions to the output file. -The Docbook XSL stylesheets ignore these, but if you write a -customization layer for Docbook XSL, you can use the information in -these processing instructions to customize the HTML output. This can -be used, for example, to set the a onclick -and target attributes in the HTML files that Docbook XSL -creates to the same values they had in the input document.

<?html2db attribute="name" value="value"?>

Placed inside a link element to capture the value of the a target and onclick attributes. name is the name of the attribute (target or onclick), and value is its value, with " and \ replaced by \" and \\, respectively.

<?html2db element="br"?>

Represents the location of an XHTML br element in the -source document.

You can also include <?db2html?> processing -instructions in the HTML source document, and they will be copied -through to the Docbook output file unchanged (as will all other -processing instructions).



[2] The fake -Docbook namespace is urn:docbook. Docbook doesn't really -have a namespace, and if it did, it wouldn't be this one. See Docbook namespace for a discussion of -this issue.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s08.html b/doc/wiki2docbook/html2db/ar01s08.html deleted file mode 100644 index 80c12a9c..00000000 --- a/doc/wiki2docbook/html2db/ar01s08.html +++ /dev/null @@ -1,22 +0,0 @@ -Customization

Customization

XSLT Parameters

<xsl:param name="anchor-id-prefix" select="''/>

Prefixed to every id generated from <a name=> - and <a href="#">. This is useful to avoid - collisions between multiple documents that are compiled into the - same book. For instance, if a number of XHTML sources are assembled - into chapters of a book, you style each source file with a prefix of - docid. where docid is a unique id - for each source file.

<xsl:param name="document-root" select="'article'"/>

The default document root. This can be overridden by - <?html2db class="name"> within the - document itself, and defaults to article.

Processing instructions

Use the <?html2db?> processing instruction to -customize the transformation of the XHTML source to Docbook:

Processing instructionContentEffect
<?html2db class="xxx"?>bodySets the output document root to xxx. Useful for -translating to prefix, appendix, or chapter; the default is -$document-root.
<?html2db class="simplelist"?>ulCreates a vertical simplelist.[a]
<?html2db rowsep="1"?>[informal]tableSets the rowsep attribute on the generated table.[b]

[a] Note that the -current implementation simply checks for the presence of any -html2db processing instruction.

[b] Note that the current implementation simply checks for the presence of any html2db processing instruction that begins with rowsep, and assumes the vlaue is 1.

Overriding the built-in templates

For cases where the previous techniques don't allow for enough -customization, you can override the builtin templates. You will need -to know XSLT in order to do this, and you will need to write a new -stylesheet that uses the xsl:import element to import -html2db.xsl.

The example.xsl stylesheet -is an example customization layer. It recognizes the <div -class="abstract"> and <p class="note"> -classes in the source for this document, -and generates the corresponding Docbook elements.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s09.html b/doc/wiki2docbook/html2db/ar01s09.html deleted file mode 100644 index 4fbe8af5..00000000 --- a/doc/wiki2docbook/html2db/ar01s09.html +++ /dev/null @@ -1,36 +0,0 @@ -FAQ

FAQ

Why generate Docbook?

The primary reason to use Docbook as an output format is -to take advantage of the Docbook XSL stylesheets. These are a -well-designed, well-documented set of XSL stylesheets that provide a -variety of publishing features that would be difficult to recreate -from scratch for HTML:

  • Automatic Table-of-Contents generation

  • Automatic part, chapter, and section numbering.

  • Creation of single-page, multi-page, PDF, and WinHelp files from the same source document.

  • Navigation headers, footers, and metadata for multi-page HTML -documents.

  • Link resolution and link target text insertion across multiple pages and numbered targets.

  • Figure, example, and table numbering, and tables of these.

  • Index and glossary tools.

Why write in XHTML?

Given that Docbook is so great, why not write in it?

Where there are not legacy concerns, Docbook is probably a better -choice for structured or technical documentation.

Where the only legacy concern is the documents themselves, and not -the tools and skill sets of documentation contributors, you should -consider using an (X)HMTL convertor to perform a one-time conversion -of your documentation source into Docbook, and then switching -development to the result files. You can use this stylesheet to -perform this conversion, or evaluate other tools, many of which are -probably appropriate for this purpose.

Often there are other legacy concerns: the availability of cheap -(including free) and usable HTML editors and editing modes; and the -fact that it's easier to teach people XHTML than Docbook. If either -of this is an issue in your organization, you may want to maintain -documentation sources in XHTML instead of Docbook

For example, at Laszlo, -most developers contribute directly to the documentation. Requiring -that developers learn Docbook, or that they wait on the doc team to -get content into the docs, would discourage this.

Why not use an existing convertor?

This isn't the first (X)HTML to Docbook convertor. Why not use one -of the exisitng ones?

Each HTML to Docbook convertors that I could find had at least some -of the following limitations, some of which stemmed from their -intended use as one-time-only convertors for legacy documents:

  • Many only operated on a subset of HTML, and relied upon hand -editing of the output to clean up mistakes. This made them impossible -to use as part of a processing pipeline, where the source is -maintained in XHTML.

  • There was no way to customize the output, except by (1) hand -editing, or (2) writing a post-processing stylesheet, which didn't -have access to the information in the XHTML source document.

  • Many of them were difficult or impossible to customize and -extend. They were closed-source, or written in Java or Perl (which I -find to be a difficult languages to use for customizing this kind of -thing) and embedded in a larger system.

  • They didn't take full advantage of the Docbook tag set and content -model to represent document structure. For instance, they didn't -generate nested section elements to represent -h1 h2 sequences, or table to -represent tables with summary attributes.

I got this error. What does it mean?

Q. Fatal Error! The element type "br" must be terminated by the matching end-tag "</br>". -

A. Your document is HTML, not XHTML. You need to fix it, or run it through Tidy first.

Q. My output document is empty except for the <?xml version="1.0" encoding="UTF-8"?> line.

A. The document is missing a namespace declaration. See the example for an example.

Q. Some of the headers and document sections are repeated multiple times.

A. The document has out-of-sequence headers, such as h1 followed by h3 (instead of h2). This won't work.

Q. Fatal Error! The prefix "db" for element "db:footnote" is not bound.

A. You haven't declared the db namespace prefix. See the example for an example.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/ar01s10.html b/doc/wiki2docbook/html2db/ar01s10.html deleted file mode 100644 index 29db1cfd..00000000 --- a/doc/wiki2docbook/html2db/ar01s10.html +++ /dev/null @@ -1,45 +0,0 @@ -Implementation Notes

Implementation Notes

Bugs

  • Improperly sequenced hn (for example -h1 followed by h3, instead of -h2) will result in duplicate text.

Limitations

  • The id attribute is only preserved for certain -elements (at least hn, images, paragraphs, and -tables). It ought to be preserved for all of them.

  • Only the very simplest table format is -implemented.

  • Always uses compact lists.

  • The string matching for <?html2b -class="classname"?> requires an exact match -(spaces and all).

  • The implicit blocks code is easily -confused, as documented in that section. This is -easy to fix now that I understand the difference between block and -inline elements (I didn't when I was implementing this), but I -probably won't do so until I run into the problem again.

Wishlist

  • Allow <html2db attribute-name="name" -value="value"?> at any position, to set arbitrary -Docbook attributes on the generated element.

  • Use different technique from the fake -namespace prefix to name Docbook elements in the source, that -preserves the XHTML validity of the source file. For example, an -option transform <div class="db:footnote"> into -<footnote>, or to use a processing attribute -(<div><?html2db classname="footnote"?>).

  • Parse DC metadata from XHTML html/head/meta.

  • Add an option to use html/head/title instead of -html/body/h1[1] for top title.

  • Allow an id on every element.

  • Add an option to translate the XHTML class into a -Docbook role.

  • Preserve more of the whitespace from the source document especially within lists and tables in order to make it easier to debug the output document.

Design Notes

The Docbook Namespace

html2db.xsl accepts elements in the "Docbook namespace" in XHTML -source. This namespace is urn:docbook.

This isn't technically correct. Docbook doesn't really have a -namespace, and if it did, it wouldn't be this one. RFC 3151 suggests -urn:publicid:-:OASIS:DTD+DocBook+XML+V4.1.2:EN as the -Docbook namespace.

There two problems with the RFC 3151 namespace. First, it's long -and hard to remember. Second, it's limited to Docbook v4.1.2 -but html2db.xsl works with other versions of Docbook too, which would -presumably have other namespaces. I think it's more useful to -underspecify the Docbook version in the spec for this tool. -Docbook itself underspecifies the version completely, by avoiding a -namespace at all, but when mixing Docbook and XHTML elements I find it -useful to be more specific than that.

History

The original version of html2db.xsl was written by Oliver Steele, as part of the Laszlo Systems, Inc. documentation -effort. We had a set of custom stylesheets that formatted and added -linking information to programming-language elements such as -classname and tagname, and added -Table-of-Contents to chapter documentation and numbers examples.

As the documentation set grew, the doc team (John Sundman) -requested features such as inter-chapter navigation, callouts, and -index and glossary elements. I was able to beat all of these back -except for navigation, which seemed critical. After a few days trying -to implement this, I decided it would be simpler to convert the subset -of XHTML that we used into a subset of Docbook, and use the latter to -add navigation. (Once this was done, the other features came for -free.)

During my August 2004 "sabbatical", I factored the general html2db -code out from the Laszlo-specific code, refactored and otherwise -cleaned it up, and wrote this documentation.

Credits

html2db.xsl was written by Oliver Steele, as part of the Laszlo Systems, Inc. documentation effort.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/build.xml b/doc/wiki2docbook/html2db/build.xml deleted file mode 100644 index 1804cae6..00000000 --- a/doc/wiki2docbook/html2db/build.xml +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/wiki2docbook/html2db/example.xsl b/doc/wiki2docbook/html2db/example.xsl deleted file mode 100644 index 21be297f..00000000 --- a/doc/wiki2docbook/html2db/example.xsl +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/wiki2docbook/html2db/extract-section.xsl b/doc/wiki2docbook/html2db/extract-section.xsl deleted file mode 100644 index 0a4e0215..00000000 --- a/doc/wiki2docbook/html2db/extract-section.xsl +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - diff --git a/doc/wiki2docbook/html2db/extract-toc.xsl b/doc/wiki2docbook/html2db/extract-toc.xsl deleted file mode 100644 index b2f2db24..00000000 --- a/doc/wiki2docbook/html2db/extract-toc.xsl +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/wiki2docbook/html2db/html2db-utils.xsl b/doc/wiki2docbook/html2db/html2db-utils.xsl deleted file mode 100644 index 2839be13..00000000 --- a/doc/wiki2docbook/html2db/html2db-utils.xsl +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ - - - - - - - - - - - - - - - diff --git a/doc/wiki2docbook/html2db/html2db.xsl b/doc/wiki2docbook/html2db/html2db.xsl deleted file mode 100644 index fc50797b..00000000 --- a/doc/wiki2docbook/html2db/html2db.xsl +++ /dev/null @@ -1,565 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Unknown element - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Text must be inside a <p> tag. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - -
-
-
- - - - - <xsl:apply-templates mode="skip-anchors" select="node()"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- - - - - - - - - - - - - - - - - informal - - - - - - <xsl:value-of select="@title"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - attribute name=" - - " value= - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - element=" - - " - - - - - - - - - - - - - - - - - - informal - - - - - - - - - - - - - - - 1 - - - - <xsl:value-of select="@summary"/> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/doc/wiki2docbook/html2db/index.html b/doc/wiki2docbook/html2db/index.html deleted file mode 100644 index 2791e9ee..00000000 --- a/doc/wiki2docbook/html2db/index.html +++ /dev/null @@ -1,12 +0,0 @@ -html2db.xsl

html2db.xsl

Oliver Steele

Revision History
Revision 12004-07-30
Revision 1.0.12004-08-01

Editorial changes to the - readme.


Overview

html2db.xsl converts an XHTML source document into a Docbook output -document. It provides features for customizing the generation of the -output, so that the output can be tuned by annotating -the source, rather than hand-editing the output. This makes it useful -in a processing pipeline where the source documents are maintained in -HTML, although it can be used as a one-time conversion tool -too.

This document is an example of html2db.xsl used in conjunction with -the Docbook XSL stylesheets. The source -file is an XHTML file with some embedded Docbook elements and -processing instructions. html2db.xsl compiles it into a Docbook document, which can be used to generate -this output file (which includes a Table of Contents), a chunked HTML file, a PDF, or other formats.

\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/index.src.html b/doc/wiki2docbook/html2db/index.src.html deleted file mode 100644 index 8f6d8b31..00000000 --- a/doc/wiki2docbook/html2db/index.src.html +++ /dev/null @@ -1,620 +0,0 @@ -html2db.xsl"> -]> - - -This title is ignored - - - -

html2db.xsl

- - - - - Oliver - Steele - - - - 1 - 2004-07-30 - - - 1.0.1 - 2004-08-01 - Editorial changes to the - readme. - - - 2004-07-30 - - -

Overview

- -

&html2db; converts an XHTML source document into a Docbook output -document. It provides features for customizing the generation of the -output, so that the output can be tuned by annotating -the source, rather than hand-editing the output. This makes it useful -in a processing pipeline where the source documents are maintained in -HTML, although it can be used as a one-time conversion tool -too.

- -

This document is an example of &html2db; used in conjunction with -the Docbook XSL stylesheets. The source -file is an XHTML file with some embedded Docbook elements and -processing instructions. &html2db; compiles it into a Docbook document, which can be used to generate -this output file (which includes a Table of Contents), a chunked HTML file, a PDF, or other formats.

- -

Features

-
-
XSLT implementation
-
This tool is designed to be embedded within an XSLT processing -pipeline. html2html.xslt can be used in a custom -stylesheet or integrated into a larger system. See Overriding.
- -
Customizable
-
The output can be customized by the means of additonal markup in -the XHMTL source. See the section on customization.
- -
Creates outline structure
-
h1, h2, etc. are turned into nested -section and title elements (as opposed to -bridge heads).
- -
Accepts a wide variety of XHTML
-
In particular, &html2db; automatically wraps naked item -text (text that is not enclosed in a <p>) -inside a table cell or list item. Naked text is a common property of -XHTML documents, but needs to be clothed to create valid -Docbook.

This feature is limited. See Implicit Blocks.)

- -
- -

Requirements

-
    -
  • Java: JRE or JDK 1.3 or greater.
  • -
  • Xalan 2.5.0.
  • -
  • Familiarity with installing and running JAR files.
  • -
- -

&html2db; might work with earlier versions of Java and Xalan, and -it might work with other XSLT processors such as Saxon and -xsltproc.

- -

License

-

This software is released under the Open Source Artistic License.

- -

Installation

- - -

Usage

-

Use Xalan to process an XHTML source file into a Docbook file:

- -
-java org.apache.xalan.xslt.Process -XSL html2dbk.xsl -IN doc.html > doc.xml
-
- -

See index.src.html for an -example of an input file.

- -

If your source files are in HTML, not XHTML, you may find the Tidy tool useful. This is a -tool that converts from HTML to XHTML, and can be added to the front -of your processing pipeline.

- -

(If you need to process HTML and you don't know or can't figure out -from context what a processing pipeline is, &html2db; is probably not -the right tool for you, and you should look for a local XML or Java -guru or for a commercially supported product.)

- -

Specification

- -

XHTML Elements

-

code/i stands for "an i element -immediately within a code element". This notation is -from XPath.

- -

XHTML elements must be in the XHTML Transitional namespace, -http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
XHTMLDocbookNotes
b, i, em, strongemphasisThe role attribute is the original tag name
dfnglossitem, and also primary indexterm
code/i, tt/i, pre/ireplaceableIn practice, i within a monospace content is usually used to mean replaceable text. If you're using it for emphasis, use em instead.
pre, body/codeprogramlisting
imginlinemediaobject/imageobject/imagedataIn an inline context.
img[informal]figure/mediaobject/imageobject/imagedataIf it has a title attribute or db:title it's wrapped in a figure. Otherwise it's wrapped in an informalfigure.
table[informal]tableXHTML table becomes Docbook table if it has a summary attribute; informaltable otherwise.
ulitemizedlistBut see the processing instruction below.
- - - -

Links

- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
XHTMLDocbookNotes
<a name="name"><anchor id="{$anchor-id-prefix}name">An anchor within a hn element is attached to the enclosing section as an id attribute instead.
<a href="#name"><link linkend="{$anchor-id-prefix}name">
<a href="url"><ulink url="name">
<a name="mailto:address"><email>address</email>
- -

Tables

- -

XHTML table support is minimal. &html2db; changes the -element names and counts the columns (this is necessary to get table -footnotes to span all the columns), but it does not attempt to deal -with tables in their full generality.

- -

An XHTML table with a summary attribute -generates a table, whose title is the value -of that summary. An XHTML table without a -summary generates an informaltable.

- -

Any trs that contain ths are pulled to -the top of the table, and placed inside a thead. Other -trs are placed inside a tbody. This matches -the commanon XHTML table pattern, where the first row is -a header row.

- -

Implicit Blocks

-

XHTML allows li, dd, and td -elements to contain either inline text (for instance, -<li>a list item</li>) or block structure -(<li><p>a block</p></li>). The -corresponding Docbook elements require block structure, such as -para.

- -

&html2db; provides limited support for wrapping naked text in -these positions in para elements. If a list item or -table cell item directly contains text, all text up to the position of -the first element (or all text, if there is no element) is wrapped in -para. This handles the simple case of an item that -directly contains text, and also the case of an item that contains -text followed by blocks such as paragraphs.

- -

Note that this algorithm is easily confused. It doesn't -distinguish between block and inline XHTML elements, so it will only -wrap the first word in <li>some <b>bold</b> -text</li>, leading to badly formatted output. Twhe -workaround is to wrap troublesome content in explicit -<p> tags.

- -

Docbook Elements

- -

Elements from the Docbook namespace are passed through as is. -There are two ways to include a Docbook element in your XHTML -source:

- -
-
Global prefix
-

A fake Docbook namespace

The fake -Docbook namespace is urn:docbook. Docbook doesn't really -have a namespace, and if it did, it wouldn't be this one. See Docbook namespace for a discussion of -this issue.

- -declaration may be added to the document root element. Anywhere in -the document, the prefix from this namespace declaration may be used -to include a Docbook element. This is useful if a document contains -many Docbook elements, such as footnote or -glossterm, interspersed with XHTML. (In this case it may -be more convenient to allow these elements in the XHMTL namespace and -add a customization layer that translates them to docbook elements, -however. See Customization.)

- -

-  ...
-  

Some textand a footnote.

-]]>
- -
Local namespace
-

A Docbook element may be introduced along with a prefix-less -namespace declaration. This is useful for embedding a Docbook -document fragment (a hierarchy of elements that all use Docbook tags) -within of a XHTML document.

- -

-    
-      ...
-  ...
-]]>
-
- -

The source to this document -illustrates both of these techniques.

- -

Both these techniques will cause your document to be -invalid as XHTML. In order to validate an XHTML document that -contains Docbook elements, you will need to create a custom schema. -Technically, you then ought to place your document in a different -namespace, but this will cause &html2db; not to recognize it!

- - -

Output Processing Instructions

- -

&html2db; adds a few of processing instructions to the output file. -The Docbook XSL stylesheets ignore these, but if you write a -customization layer for Docbook XSL, you can use the information in -these processing instructions to customize the HTML output. This can -be used, for example, to set the a onclick -and target attributes in the HTML files that Docbook XSL -creates to the same values they had in the input document.

- -
-
<?html2db attribute="name" value="value"?>
-
Placed inside a link element to capture the value of the a target and onclick attributes. name is the name of the attribute (target or onclick), and value is its value, with " and \ replaced by \" and \\, respectively.
- -
<?html2db element="br"?>
-
Represents the location of an XHTML br element in the -source document.
- -
- -

You can also include <?db2html?> processing -instructions in the HTML source document, and they will be copied -through to the Docbook output file unchanged (as will all other -processing instructions).

- - -

Customization

-

XSLT Parameters

-
-
<xsl:param name="anchor-id-prefix" select="''/>
-
Prefixed to every id generated from <a name=> - and <a href="#">. This is useful to avoid - collisions between multiple documents that are compiled into the - same book. For instance, if a number of XHTML sources are assembled - into chapters of a book, you style each source file with a prefix of - docid. where docid is a unique id - for each source file.
- -
<xsl:param name="document-root" select="'article'"/>
-
The default document root. This can be overridden by - <?html2db class="name"> within the - document itself, and defaults to article.
-
- -

Processing instructions

-

Use the <?html2db?> processing instruction to -customize the transformation of the XHTML source to Docbook:

- - - - - - - - - - - - - - - - - - - - - - - - - - -
Processing instructionContentEffect
<?html2db class="xxx"?>bodySets the output document root to xxx. Useful for -translating to prefix, appendix, or chapter; the default is -$document-root.
<?html2db class="simplelist"?>ulCreates a vertical simplelist.Note that the -current implementation simply checks for the presence of any -html2db processing instruction.
<?html2db rowsep="1"?>[informal]tableSets the rowsep attribute on the generated table.Note that the current implementation simply checks for the presence of any html2db processing instruction that begins with rowsep, and assumes the vlaue is 1.
- -

Overriding the built-in templates

-

For cases where the previous techniques don't allow for enough -customization, you can override the builtin templates. You will need -to know XSLT in order to do this, and you will need to write a new -stylesheet that uses the xsl:import element to import -html2db.xsl.

- -

The example.xsl stylesheet -is an example customization layer. It recognizes the <div -class="abstract"> and <p class="note"> -classes in the source for this document, -and generates the corresponding Docbook elements.

- - -

FAQ

-

Why generate Docbook?

-

The primary reason to use Docbook as an output format is -to take advantage of the Docbook XSL stylesheets. These are a -well-designed, well-documented set of XSL stylesheets that provide a -variety of publishing features that would be difficult to recreate -from scratch for HTML:

- -
    -
  • Automatic Table-of-Contents generation
  • -
  • Automatic part, chapter, and section numbering.
  • -
  • Creation of single-page, multi-page, PDF, and WinHelp files from the same source document.
  • -
  • Navigation headers, footers, and metadata for multi-page HTML -documents.
  • -
  • Link resolution and link target text insertion across multiple pages and numbered targets.
  • -
  • Figure, example, and table numbering, and tables of these.
  • -
  • Index and glossary tools.
  • -
- -

Why write in XHTML?

- -

Given that Docbook is so great, why not write in it?

- -

Where there are not legacy concerns, Docbook is probably a better -choice for structured or technical documentation.

- -

Where the only legacy concern is the documents themselves, and not -the tools and skill sets of documentation contributors, you should -consider using an (X)HMTL convertor to perform a one-time conversion -of your documentation source into Docbook, and then switching -development to the result files. You can use this stylesheet to -perform this conversion, or evaluate other tools, many of which are -probably appropriate for this purpose.

- -

Often there are other legacy concerns: the availability of cheap -(including free) and usable HTML editors and editing modes; and the -fact that it's easier to teach people XHTML than Docbook. If either -of this is an issue in your organization, you may want to maintain -documentation sources in XHTML instead of Docbook

- -

For example, at Laszlo, -most developers contribute directly to the documentation. Requiring -that developers learn Docbook, or that they wait on the doc team to -get content into the docs, would discourage this.

- -

Why not use an existing convertor?

- -

This isn't the first (X)HTML to Docbook convertor. Why not use one -of the exisitng ones?

- -

Each HTML to Docbook convertors that I could find had at least some -of the following limitations, some of which stemmed from their -intended use as one-time-only convertors for legacy documents:

- -
    -
  • Many only operated on a subset of HTML, and relied upon hand -editing of the output to clean up mistakes. This made them impossible -to use as part of a processing pipeline, where the source is -maintained in XHTML.
  • - -
  • There was no way to customize the output, except by (1) hand -editing, or (2) writing a post-processing stylesheet, which didn't -have access to the information in the XHTML source document.
  • - -
  • Many of them were difficult or impossible to customize and -extend. They were closed-source, or written in Java or Perl (which I -find to be a difficult languages to use for customizing this kind of -thing) and embedded in a larger system.
  • - -
  • They didn't take full advantage of the Docbook tag set and content -model to represent document structure. For instance, they didn't -generate nested section elements to represent -h1 h2 sequences, or table to -represent tables with summary attributes.
  • -
- -

I got this error. What does it mean?

-
-
Q. Fatal Error! The element type "br" must be terminated by the matching end-tag "</br>". -
-
A. Your document is HTML, not XHTML. You need to fix it, or run it through Tidy first.
- -
Q. My output document is empty except for the <?xml version="1.0" encoding="UTF-8"?> line.
-
A. The document is missing a namespace declaration. See the example for an example.
- -
Q. Some of the headers and document sections are repeated multiple times.
-
A. The document has out-of-sequence headers, such as h1 followed by h3 (instead of h2). This won't work.
- -
Q. Fatal Error! The prefix "db" for element "db:footnote" is not bound.
-
A. You haven't declared the db namespace prefix. See the example for an example.
- -
- - -

Implementation Notes

- -

Bugs

-
    -
  • Improperly sequenced hn (for example -h1 followed by h3, instead of -h2) will result in duplicate text.
  • -
- - -

Limitations

-
    -
  • The id attribute is only preserved for certain -elements (at least hn, images, paragraphs, and -tables). It ought to be preserved for all of them.
  • -
  • Only the very simplest table format is -implemented.
  • -
  • Always uses compact lists.
  • -
  • The string matching for <?html2b -class="classname"?> requires an exact match -(spaces and all).
  • -
  • The implicit blocks code is easily -confused, as documented in that section. This is -easy to fix now that I understand the difference between block and -inline elements (I didn't when I was implementing this), but I -probably won't do so until I run into the problem again.
  • - -
- - - - -

Wishlist

-
    -
  • Allow <html2db attribute-name="name" -value="value"?> at any position, to set arbitrary -Docbook attributes on the generated element.
  • - -
  • Use different technique from the fake -namespace prefix to name Docbook elements in the source, that -preserves the XHTML validity of the source file. For example, an -option transform <div class="db:footnote"> into -<footnote>, or to use a processing attribute -(<div><?html2db classname="footnote"?>).
  • - -
  • Parse DC metadata from XHTML html/head/meta.
  • - -
  • Add an option to use html/head/title instead of -html/body/h1[1] for top title.
  • - -
  • Allow an id on every element.
  • - -
  • Add an option to translate the XHTML class into a -Docbook role.
  • - -
  • Preserve more of the whitespace from the source document &emdash; especially within lists and tables &emdash; in order to make it easier to debug the output document.
  • - -

    Support

    -

    This is a work in progress. It serves my needs, but doesn't -attempt to be much more general than that. If you run into anything -it can't handle, please send a note, or better yet, a patch, to steele@osteele.com. I can't -promise to address problems (I have a day job too), but knowing what -people have run into will help my prioritize my work when I do have -time to work on this.

    - - -
- - -

Design Notes

-

The Docbook Namespace

-

&html2db; accepts elements in the "Docbook namespace" in XHTML -source. This namespace is urn:docbook.

- -

This isn't technically correct. Docbook doesn't really have a -namespace, and if it did, it wouldn't be this one. RFC 3151 suggests -urn:publicid:-:OASIS:DTD+DocBook+XML+V4.1.2:EN as the -Docbook namespace.

- -

There two problems with the RFC 3151 namespace. First, it's long -and hard to remember. Second, it's limited to Docbook v4.1.2 &emdash; -but &html2db; works with other versions of Docbook too, which would -presumably have other namespaces. I think it's more useful to -underspecify the Docbook version in the spec for this tool. -Docbook itself underspecifies the version completely, by avoiding a -namespace at all, but when mixing Docbook and XHTML elements I find it -useful to be more specific than that.

- -

History

-

The original version of &html2db; was written by Oliver Steele, as part of the Laszlo Systems, Inc. documentation -effort. We had a set of custom stylesheets that formatted and added -linking information to programming-language elements such as -classname and tagname, and added -Table-of-Contents to chapter documentation and numbers examples.

- -

As the documentation set grew, the doc team (John Sundman) -requested features such as inter-chapter navigation, callouts, and -index and glossary elements. I was able to beat all of these back -except for navigation, which seemed critical. After a few days trying -to implement this, I decided it would be simpler to convert the subset -of XHTML that we used into a subset of Docbook, and use the latter to -add navigation. (Once this was done, the other features came for -free.)

- -

During my August 2004 "sabbatical", I factored the general html2db -code out from the Laszlo-specific code, refactored and otherwise -cleaned it up, and wrote this documentation.

- -

Credits

-

&html2db; was written by Oliver Steele, as part of the Laszlo Systems, Inc. documentation effort.

- - - \ No newline at end of file diff --git a/doc/wiki2docbook/html2db/index.xml b/doc/wiki2docbook/html2db/index.xml deleted file mode 100644 index 63383ca0..00000000 --- a/doc/wiki2docbook/html2db/index.xml +++ /dev/null @@ -1,410 +0,0 @@ - -
- -html2db.xsl - - - - - Oliver - Steele - - - - 1 - 2004-07-30 - - - 1.0.1 - 2004-08-01 - Editorial changes to the - readme. - - - 2004-07-30 - - -
Overview - -html2db.xsl converts an XHTML source document into a Docbook output -document. It provides features for customizing the generation of the -output, so that the output can be tuned by annotating -the source, rather than hand-editing the output. This makes it useful -in a processing pipeline where the source documents are maintained in -HTML, although it can be used as a one-time conversion tool -too. - -This document is an example of html2db.xsl used in conjunction with -the Docbook XSL stylesheets. The source -file is an XHTML file with some embedded Docbook elements and -processing instructions. html2db.xsl compiles it into a Docbook document, which can be used to generate -this output file (which includes a Table of Contents), a chunked HTML file, a PDF, or other formats. - -
Features -XSLT implementationThis tool is designed to be embedded within an XSLT processing -pipeline. html2html.xslt can be used in a custom -stylesheet or integrated into a larger system. See Overriding.CustomizableThe output can be customized by the means of additonal markup in -the XHMTL source. See the section on customization.Creates outline structureh1, h2, etc. are turned into nested -section and title elements (as opposed to -bridge heads).Accepts a wide variety of XHTMLIn particular, html2db.xsl automatically wraps naked item -textnaked item -text (text that is not enclosed in a <p>) -inside a table cell or list item. Naked text is a common property of -XHTML documents, but needs to be clothed to create valid -Docbook.This feature is limited. See Implicit Blocks.) - -
Requirements -Java: JRE or JDK 1.3 or greater.Xalan 2.5.0.Familiarity with installing and running JAR files. - -html2db.xsl might work with earlier versions of Java and Xalan, and -it might work with other XSLT processors such as Saxon and -xsltproc. - -
License -This software is released under the Open Source Artistic License. - -
Installation -Install JRE 1.3 or higher.Install Xalan, if necessary.Download html2db-1.zip from http://osteele.com/sources/html2db-1.zip.Unzip html2db-1.zip. - -
Usage -Use Xalan to process an XHTML source file into a Docbook file: - - -java org.apache.xalan.xslt.Process -XSL html2dbk.xsl -IN doc.html > doc.xml - - -See index.src.html for an -example of an input file. - -If your source files are in HTML, not XHTML, you may find the Tidy tool useful. This is a -tool that converts from HTML to XHTML, and can be added to the front -of your processing pipeline. - -(If you need to process HTML and you don't know or can't figure out -from context what a processing pipeline is, html2db.xsl is probably not -the right tool for you, and you should look for a local XML or Java -guru or for a commercially supported product.) - -
Specification - -
XHTML Elements -code/i stands for "an i element -immediately within a code element". This notation is -from XPath. - -XHTML elements must be in the XHTML Transitional namespace, -http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd. - -XHTMLDocbookNotes -b, i, em, strongemphasisThe role attribute is the original tag name -dfnglossitem, and also primary indexterm -code/i, tt/i, pre/ireplaceableIn practice, i within a monospace content is usually used to mean replaceable text. If you're using it for emphasis, use em instead. -pre, body/codeprogramlisting -imginlinemediaobject/imageobject/imagedataIn an inline context. -img[informal]figure/mediaobject/imageobject/imagedataIf it has a title attribute or db:title it's wrapped in a figure. Otherwise it's wrapped in an informalfigure. -table[informal]tableXHTML table becomes Docbook table if it has a summary attribute; informaltable otherwise. -ulitemizedlistBut see the processing instruction below. - - - - -
Links -Link TranslationXHTMLDocbookNotes -<a name="name"><anchor id="{$anchor-id-prefix}name">An anchor within a hn element is attached to the enclosing section as an id attribute instead. -<a href="#name"><link linkend="{$anchor-id-prefix}name"> -<a href="url"><ulink url="name"> -<a name="mailto:address"><email>address</email> -
- -
Tables - -XHTML table support is minimal. html2db.xsl changes the -element names and counts the columns (this is necessary to get table -footnotes to span all the columns), but it does not attempt to deal -with tables in their full generality. - -An XHTML table with a summary attribute -generates a table, whose title is the value -of that summary. An XHTML table without a -summary generates an informaltable. - -Any trs that contain ths are pulled to -the top of the table, and placed inside a thead. Other -trs are placed inside a tbody. This matches -the commanon XHTML table pattern, where the first row is -a header row. - -
Implicit Blocks -XHTML allows li, dd, and td -elements to contain either inline text (for instance, -<li>a list item</li>) or block structure -(<li><p>a block</p></li>). The -corresponding Docbook elements require block structure, such as -para. - -html2db.xsl provides limited support for wrapping naked text in -these positions in para elements. If a list item or -table cell item directly contains text, all text up to the position of -the first element (or all text, if there is no element) is wrapped in -para. This handles the simple case of an item that -directly contains text, and also the case of an item that contains -text followed by blocks such as paragraphs. - -Note that this algorithm is easily confused. It doesn't -distinguish between block and inline XHTML elements, so it will only -wrap the first word in <li>some <b>bold</b> -text</li>, leading to badly formatted output. Twhe -workaround is to wrap troublesome content in explicit -<p> tags. - -
Docbook Elements - -Elements from the Docbook namespace are passed through as is. -There are two ways to include a Docbook element in your XHTML -source: - -Global prefixA fake Docbook namespacefake Docbook namespaceThe fake -Docbook namespace is urn:docbook. Docbook doesn't really -have a namespace, and if it did, it wouldn't be this one. See Docbook namespace for a discussion of -this issue. - -declaration may be added to the document root element. Anywhere in -the document, the prefix from this namespace declaration may be used -to include a Docbook element. This is useful if a document contains -many Docbook elements, such as footnote or -glossterm, interspersed with XHTML. (In this case it may -be more convenient to allow these elements in the XHMTL namespace and -add a customization layer that translates them to docbook elements, -however. See Customization.) - - -<html xmlns="http://www.w3.org/1999/xhtml" - xmlns:db="urn:docbook"> - ... - <p>Some text<db:footnote>and a footnote</db:footnote>.</p> -Local namespaceA Docbook element may be introduced along with a prefix-less -namespace declaration. This is useful for embedding a Docbook -document fragment (a hierarchy of elements that all use Docbook tags) -within of a XHTML document. - - - ... - <articleinfo xmlns="urn:docbook"> - <author> - <firstname>...</firstname> - ... - - -The source to this document -illustrates both of these techniques. - -Both these techniques will cause your document to be -invalid as XHTML. In order to validate an XHTML document that -contains Docbook elements, you will need to create a custom schema. -Technically, you then ought to place your document in a different -namespace, but this will cause html2db.xsl not to recognize it! - - -
Output Processing Instructions - -html2db.xsl adds a few of processing instructions to the output file. -The Docbook XSL stylesheets ignore these, but if you write a -customization layer for Docbook XSL, you can use the information in -these processing instructions to customize the HTML output. This can -be used, for example, to set the a onclick -and target attributes in the HTML files that Docbook XSL -creates to the same values they had in the input document. - -<?html2db attribute="name" value="value"?>Placed inside a link element to capture the value of the a target and onclick attributes. name is the name of the attribute (target or onclick), and value is its value, with " and \ replaced by \" and \\, respectively.<?html2db element="br"?>Represents the location of an XHTML br element in the -source document. - -You can also include <?db2html?> processing -instructions in the HTML source document, and they will be copied -through to the Docbook output file unchanged (as will all other -processing instructions). - - -
Customization -
XSLT Parameters -<xsl:param name="anchor-id-prefix" select="''/>Prefixed to every id generated from <a name=> - and <a href="#">. This is useful to avoid - collisions between multiple documents that are compiled into the - same book. For instance, if a number of XHTML sources are assembled - into chapters of a book, you style each source file with a prefix of - docid. where docid is a unique id - for each source file.<xsl:param name="document-root" select="'article'"/>The default document root. This can be overridden by - <?html2db class="name"> within the - document itself, and defaults to article. - -
Processing instructions -Use the <?html2db?> processing instruction to -customize the transformation of the XHTML source to Docbook: - -Processing instructionContentEffect -<?html2db class="xxx"?>bodySets the output document root to xxx. Useful for -translating to prefix, appendix, or chapter; the default is -$document-root. -<?html2db class="simplelist"?>ulCreates a vertical simplelist.Note that the -current implementation simply checks for the presence of any -html2db processing instruction. -<?html2db rowsep="1"?>[informal]tableSets the rowsep attribute on the generated table.Note that the current implementation simply checks for the presence of any html2db processing instruction that begins with rowsep, and assumes the vlaue is 1. - - -
Overriding the built-in templates -For cases where the previous techniques don't allow for enough -customization, you can override the builtin templates. You will need -to know XSLT in order to do this, and you will need to write a new -stylesheet that uses the xsl:import element to import -html2db.xsl. - -The example.xsl stylesheet -is an example customization layer. It recognizes the <div -class="abstract"> and <p class="note"> -classes in the source for this document, -and generates the corresponding Docbook elements. - - -
FAQ -
Why generate Docbook? -The primary reason to use Docbook as an output format is -to take advantage of the Docbook XSL stylesheets. These are a -well-designed, well-documented set of XSL stylesheets that provide a -variety of publishing features that would be difficult to recreate -from scratch for HTML: - -Automatic Table-of-Contents generationAutomatic part, chapter, and section numbering.Creation of single-page, multi-page, PDF, and WinHelp files from the same source document.Navigation headers, footers, and metadata for multi-page HTML -documents.Link resolution and link target text insertion across multiple pages and numbered targets.Figure, example, and table numbering, and tables of these.Index and glossary tools. - -
Why write in XHTML? - -Given that Docbook is so great, why not write in it? - -Where there are not legacy concerns, Docbook is probably a better -choice for structured or technical documentation. - -Where the only legacy concern is the documents themselves, and not -the tools and skill sets of documentation contributors, you should -consider using an (X)HMTL convertor to perform a one-time conversion -of your documentation source into Docbook, and then switching -development to the result files. You can use this stylesheet to -perform this conversion, or evaluate other tools, many of which are -probably appropriate for this purpose. - -Often there are other legacy concerns: the availability of cheap -(including free) and usable HTML editors and editing modes; and the -fact that it's easier to teach people XHTML than Docbook. If either -of this is an issue in your organization, you may want to maintain -documentation sources in XHTML instead of Docbook - -For example, at Laszlo, -most developers contribute directly to the documentation. Requiring -that developers learn Docbook, or that they wait on the doc team to -get content into the docs, would discourage this. - -
Why not use an existing convertor? - -This isn't the first (X)HTML to Docbook convertor. Why not use one -of the exisitng ones? - -Each HTML to Docbook convertors that I could find had at least some -of the following limitations, some of which stemmed from their -intended use as one-time-only convertors for legacy documents: - -Many only operated on a subset of HTML, and relied upon hand -editing of the output to clean up mistakes. This made them impossible -to use as part of a processing pipeline, where the source is -maintained in XHTML.There was no way to customize the output, except by (1) hand -editing, or (2) writing a post-processing stylesheet, which didn't -have access to the information in the XHTML source document.Many of them were difficult or impossible to customize and -extend. They were closed-source, or written in Java or Perl (which I -find to be a difficult languages to use for customizing this kind of -thing) and embedded in a larger system.They didn't take full advantage of the Docbook tag set and content -model to represent document structure. For instance, they didn't -generate nested section elements to represent -h1 h2 sequences, or table to -represent tables with summary attributes. - -
I got this error. What does it mean? -Q. Fatal Error! The element type "br" must be terminated by the matching end-tag "</br>". -A. Your document is HTML, not XHTML. You need to fix it, or run it through Tidy first.Q. My output document is empty except for the <?xml version="1.0" encoding="UTF-8"?> line.A. The document is missing a namespace declaration. See the example for an example.Q. Some of the headers and document sections are repeated multiple times.A. The document has out-of-sequence headers, such as h1 followed by h3 (instead of h2). This won't work.Q. Fatal Error! The prefix "db" for element "db:footnote" is not bound.A. You haven't declared the db namespace prefix. See the example for an example. - - -
Implementation Notes - -
Bugs -Improperly sequenced hn (for example -h1 followed by h3, instead of -h2) will result in duplicate text. - - -
Limitations -The id attribute is only preserved for certain -elements (at least hn, images, paragraphs, and -tables). It ought to be preserved for all of them.Only the very simplest table format is -implemented.Always uses compact lists.The string matching for <?html2b -class="classname"?> requires an exact match -(spaces and all).The implicit blocks code is easily -confused, as documented in that section. This is -easy to fix now that I understand the difference between block and -inline elements (I didn't when I was implementing this), but I -probably won't do so until I run into the problem again. - - - - -
Wishlist -Allow <html2db attribute-name="name" -value="value"?> at any position, to set arbitrary -Docbook attributes on the generated element.Use different technique from the fake -namespace prefix to name Docbook elements in the source, that -preserves the XHTML validity of the source file. For example, an -option transform <div class="db:footnote"> into -<footnote>, or to use a processing attribute -(<div><?html2db classname="footnote"?>).Parse DC metadata from XHTML html/head/meta.Add an option to use html/head/title instead of -html/body/h1[1] for top title.Allow an id on every element.Add an option to translate the XHTML class into a -Docbook role.Preserve more of the whitespace from the source document especially within lists and tables in order to make it easier to debug the output document. - - -
Design Notes -
The Docbook Namespace -html2db.xsl accepts elements in the "Docbook namespace" in XHTML -source. This namespace is urn:docbook. - -This isn't technically correct. Docbook doesn't really have a -namespace, and if it did, it wouldn't be this one. RFC 3151 suggests -urn:publicid:-:OASIS:DTD+DocBook+XML+V4.1.2:EN as the -Docbook namespace. - -There two problems with the RFC 3151 namespace. First, it's long -and hard to remember. Second, it's limited to Docbook v4.1.2 -but html2db.xsl works with other versions of Docbook too, which would -presumably have other namespaces. I think it's more useful to -underspecify the Docbook version in the spec for this tool. -Docbook itself underspecifies the version completely, by avoiding a -namespace at all, but when mixing Docbook and XHTML elements I find it -useful to be more specific than that. - -
History -The original version of html2db.xsl was written by Oliver Steele, as part of the Laszlo Systems, Inc. documentation -effort. We had a set of custom stylesheets that formatted and added -linking information to programming-language elements such as -classname and tagname, and added -Table-of-Contents to chapter documentation and numbers examples. - -As the documentation set grew, the doc team (John Sundman) -requested features such as inter-chapter navigation, callouts, and -index and glossary elements. I was able to beat all of these back -except for navigation, which seemed critical. After a few days trying -to implement this, I decided it would be simpler to convert the subset -of XHTML that we used into a subset of Docbook, and use the latter to -add navigation. (Once this was done, the other features came for -free.) - -During my August 2004 "sabbatical", I factored the general html2db -code out from the Laszlo-specific code, refactored and otherwise -cleaned it up, and wrote this documentation. - -
Credits -html2db.xsl was written by Oliver Steele, as part of the Laszlo Systems, Inc. documentation effort. - -
\ No newline at end of file diff --git a/doc/wiki2docbook/html2db/tidy.properties b/doc/wiki2docbook/html2db/tidy.properties deleted file mode 100644 index 956fa0e0..00000000 --- a/doc/wiki2docbook/html2db/tidy.properties +++ /dev/null @@ -1,3 +0,0 @@ -output-xhtml: true -show-warnings: no -quiet: yes