3 # This script converts trac wiki to docbook
4 # wiki pages must be in wiki/ directory and their names must start with "Guide"
5 # the first page is named GuideIndex
6 # output is written to docbook/ directory
8 # based on the following scripts:
10 # http://trac-hacks.org/wiki/Page2DocbookPlugin
11 # http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py
13 # see the links above for a list of requirements
18 from trac.test import EnvironmentStub, Mock, MockPerm
19 from trac.mimeview import Context
20 from trac.wiki.formatter import HtmlFormatter
21 from trac.wiki.model import WikiPage
22 from trac.web.href import Href
25 from tidy import parseString
30 datadir = os.getcwd() + "/wiki2docbook"
33 xhtml2dbXsl = u"""<?xml version="1.0"?>
34 <xsl:stylesheet version="1.0"
35 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
37 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/html2db/html2db.xsl') + """\" />
38 <xsl:output method="xml" indent="no" encoding="utf-8"/>
39 <xsl:param name="document-root" select="'__top_element__'"/>
43 normalizedHeadingsXsl = u"""<?xml version="1.0"?>
44 <xsl:stylesheet version="1.0"
45 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
47 <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/headingsNormalizer/headingsNormalizer.xsl') + """\" />
48 <xsl:output method="xml" indent="no" encoding="utf-8"/>
49 <xsl:param name="defaultTopHeading" select="FIXME"/>
53 normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl)
54 normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc)
56 xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl)
57 xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc)
59 def html2docbook(html):
61 options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8')
62 xhtml = parseString(html.encode("utf-8"), **options)
64 xhtml_xmldoc = libxml2.parseDoc(str(xhtml))
66 xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None)
68 nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc)
70 docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None)
72 dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc)
74 xhtml_xmldoc.freeDoc()
75 xhtml2_xmldoc.freeDoc()
76 docbook_xmldoc.freeDoc()
77 return dbstring.decode('utf-8')
81 depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2
82 parent = {}#parent document (if depth > 0)
83 inner = {} #defined for documents that are parents
85 #top element indexed by depth
86 top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ]
88 env = EnvironmentStub()
89 req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'),
90 authname='anonymous', perm=MockPerm(), args={})
91 context = Context.from_request(req, 'wiki')
95 text[name] = file("wiki/" + name).read().decode('utf-8')
99 page.save('', '', '::1', 0)
103 index_name = "GuideIndex"
104 read_file(index_name)
105 index_text = text[index_name]
106 depth[index_name] = 0
107 inner[index_name] = 1
109 stack = [ index_name , '', '', '' ]
111 for line in index_text.splitlines() :
112 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
114 name = match.group(2)
115 d = len(match.group(1)) / 2
118 parent[name] = stack[d - 1]
119 inner[stack[d - 1]] = 1
123 # exclude links with depth > 1 from wiki text, they will be included indirectly
124 def filter_out_indirect(text):
126 for line in text.splitlines() :
127 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
130 d = len(match.group(1)) / 2
132 out = out + line + "\n"
136 for name in text.keys():
140 txt = filter_out_indirect(txt)
142 html = HtmlFormatter(env, context, txt).generate()
144 html = html.replace("/wiki/Guide", "#Guide")
146 top = top_element[depth[name]]
147 db = html2docbook(html)
150 # replace list items with XIncludes, FIXME: this is ugly
151 r = re.compile('<itemizedlist[^>]*>')
154 r = re.compile('</itemizedlist>')
157 r = re.compile('<listitem>\s*<para>\s*<link\s*linkend="(Guide[a-zA-Z0-9]*)">[^<]*</link>\s*</para>\s*</listitem>')
158 db = r.sub(r'<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="\1.xml"/>\n', db);
161 db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">")
162 db = db.replace("</__top_element__>", "</" + top + ">")
164 open("docbook/" + name + ".xml", "w").write(db.encode('utf-8'))