Remove commented out code.
[geeqie.git] / doc / wiki2docbook.py
1 #!/usr/bin/python
2 #
3 # This script converts trac wiki to docbook
4 # wiki pages must be in wiki/ directory and their names must start with "Guide"
5 # the first page is named GuideIndex
6 # output is written to docbook/ directory
7 #
8 # based on the following scripts:
9 #
10 # http://trac-hacks.org/wiki/Page2DocbookPlugin
11 # http://trac.edgewall.org/attachment/wiki/TracWiki/trac_wiki2html.py
12 #
13 # see the links above for a list of requirements
14
15
16 import sys
17 import os
18 from trac.test import EnvironmentStub, Mock, MockPerm
19 from trac.mimeview import Context
20 from trac.wiki.formatter import HtmlFormatter
21 from trac.wiki.model import WikiPage
22 from trac.web.href import Href
23
24 import urllib
25 from tidy import parseString
26 import libxml2
27 import libxslt
28 import re
29
30 datadir = os.getcwd() + "/wiki2docbook"
31
32
33 xhtml2dbXsl = u"""<?xml version="1.0"?>
34 <xsl:stylesheet version="1.0" 
35     xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
36
37   <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/html2db/html2db.xsl') + """\" />
38   <xsl:output method="xml" indent="no" encoding="utf-8"/>
39   <xsl:param name="document-root" select="'__top_element__'"/>
40 </xsl:stylesheet>
41 """
42
43 normalizedHeadingsXsl = u"""<?xml version="1.0"?>
44 <xsl:stylesheet version="1.0" 
45     xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
46
47   <xsl:import href=\"file:///""" + urllib.pathname2url(datadir + '/headingsNormalizer/headingsNormalizer.xsl') + """\" />
48   <xsl:output method="xml" indent="no" encoding="utf-8"/>
49   <xsl:param name="defaultTopHeading" select="FIXME"/>
50 </xsl:stylesheet>
51 """
52
53 normalizedHeadingsXsl_xmldoc = libxml2.parseDoc(normalizedHeadingsXsl)
54 normalizedHeadingsXsl_xsldoc = libxslt.parseStylesheetDoc(normalizedHeadingsXsl_xmldoc)
55
56 xhtml2dbXsl_xmldoc = libxml2.parseDoc(xhtml2dbXsl)
57 xhtml2dbXsl_xsldoc = libxslt.parseStylesheetDoc(xhtml2dbXsl_xmldoc)
58
59 def html2docbook(html):
60
61         options = dict(output_xhtml=1, add_xml_decl=1, indent=1, tidy_mark=0, input_encoding='utf8', output_encoding='utf8', doctype='auto', wrap=0, char_encoding='utf8')
62         xhtml = parseString(html.encode("utf-8"), **options)
63
64         xhtml_xmldoc = libxml2.parseDoc(str(xhtml))
65
66         xhtml2_xmldoc = normalizedHeadingsXsl_xsldoc.applyStylesheet(xhtml_xmldoc, None)
67
68         nhstring = normalizedHeadingsXsl_xsldoc.saveResultToString(xhtml2_xmldoc)
69
70         docbook_xmldoc = xhtml2dbXsl_xsldoc.applyStylesheet(xhtml2_xmldoc, None)
71
72         dbstring = xhtml2dbXsl_xsldoc.saveResultToString(docbook_xmldoc)
73
74         xhtml_xmldoc.freeDoc()
75         xhtml2_xmldoc.freeDoc()
76         docbook_xmldoc.freeDoc()
77         return dbstring.decode('utf-8')
78
79
80 text = {}  #wiki text
81 depth = {} #document depth, 0 for index, leaf documents have depth 1 or 2
82 parent = {}#parent document (if depth > 0)
83 inner = {} #defined for documents that are parents
84
85 #top element indexed by depth
86 top_element = [ 'book', 'chapter', 'section', 'section', 'section', 'section', 'section', 'section', 'section', 'section' ]
87
88 env = EnvironmentStub()
89 req = Mock(href=Href('/'), abs_href=Href('http://www.example.com/'),
90            authname='anonymous', perm=MockPerm(), args={})
91 context = Context.from_request(req, 'wiki')
92
93
94 def read_file(name):
95         text[name] = file("wiki/" + name).read().decode('utf-8')
96         page = WikiPage(env)
97         page.name = name
98         page.text = '--'
99         page.save('', '', '::1', 0)
100
101
102 def read_index():
103         index_name = "GuideIndex"
104         read_file(index_name)
105         index_text = text[index_name]
106         depth[index_name] = 0
107         inner[index_name] = 1
108         
109         stack = [ index_name , '', '', '' ]
110         
111         for line in index_text.splitlines() :
112                 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
113                 if match:
114                         name = match.group(2)
115                         d = len(match.group(1)) / 2
116                         if (d > 0):
117                                 depth[name] = d
118                                 parent[name] = stack[d - 1]
119                                 inner[stack[d - 1]] = 1
120                                 stack[d] = name
121                                 read_file(name)
122
123 # exclude links with depth > 1 from wiki text, they will be included indirectly
124 def filter_out_indirect(text):
125         out = ""
126         for line in text.splitlines() :
127                 match = re.match('^( *)\* \[wiki:(Guide[a-zA-Z0-9]*)', line)
128                 d = 1
129                 if match:
130                         d = len(match.group(1)) / 2
131                 if (d == 1):
132                          out = out + line + "\n"
133         return out
134
135 def process_pages():
136         for name in text.keys():
137                 txt = text[name]
138                 
139                 if name in inner:
140                         txt = filter_out_indirect(txt)
141                 
142                 html = HtmlFormatter(env, context, txt).generate()
143                 
144                 html = html.replace("/wiki/Guide", "#Guide")
145                 
146                 top = top_element[depth[name]]
147                 db = html2docbook(html)
148
149                 if name in inner:
150                         # replace list items with XIncludes, FIXME: this is ugly
151                         r = re.compile('<itemizedlist[^>]*>')
152                         db = r.sub(r'', db);
153                         
154                         r = re.compile('</itemizedlist>')
155                         db = r.sub(r'', db);
156                         
157                         r = re.compile('<listitem>\s*<para>\s*<link\s*linkend="(Guide[a-zA-Z0-9]*)">[^<]*</link>\s*</para>\s*</listitem>')
158                         db = r.sub(r'<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="\1.xml"/>\n', db);
159                 
160                 
161                 db = db.replace("<__top_element__>", "<" + top + " id=\"" + name + "\">")
162                 db = db.replace("</__top_element__>", "</" + top + ">")
163                 
164                 open("docbook/" + name + ".xml", "w").write(db.encode('utf-8'))
165
166
167 read_index()
168 process_pages()
169
170
171