--- /dev/null
+# -*- coding: utf-8 -*-
+#
+# Code scavenged from Glasnost
+# By: Odile Bénassy <obenassy@entrouvert.com>
+# Romain Chantereau <rchantereau@entrouvert.com>
+# Nicolas Clapiès <nclapies@easter-eggs.org>
+# Pierre-Antoine Dejace <padejace@entrouvert.be>
+# Thierry Dulieu <tdulieu@easter-eggs.com>
+# Florent Monnier <monnier@codelutin.com>
+# Cédric Musso <cmusso@easter-eggs.org>
+# Frédéric Péters <fpeters@entrouvert.be>
+# Benjamin Poussin <poussin@codelutin.com>
+# Emmanuel Raviart <eraviart@entrouvert.com>
+# Sébastien Régnier <regnier@codelutin.com>
+# Emmanuel Saracco <esaracco@easter-eggs.com>
+#
+# Copyright (C) 2000, 2001 Easter-eggs & Emmanuel Raviart
+# Copyright (C) 2002 Odile Bénassy, Code Lutin, Thierry Dulieu, Easter-eggs,
+# Entr'ouvert, Frédéric Péters, Benjamin Poussin, Emmanuel Raviart,
+# Emmanuel Saracco & Théridion
+# Copyright (C) 2003 Odile Bénassy, Romain Chantereau, Nicolas Clapiès,
+# Code Lutin, Pierre-Antoine Dejace, Thierry Dulieu, Easter-eggs,
+# Entr'ouvert, Florent Monnier, Cédric Musso, Ouvaton, Frédéric Péters,
+# Benjamin Poussin, Rodolphe Quiédeville, Emmanuel Raviart, Sébastien
+# Régnier, Emmanuel Saracco, Théridion & Vecam
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+import re
+import sys
+
+_non_id_chars = re.compile('[^a-z0-9]+')
+_non_id_at_ends = re.compile('^[-0-9]+|-+$')
+
+def enhanceTypo(s):
+ # typographic junkie
+ # idea from: http://daringfireball.net/projects/smartypants/
+ s = s.replace('...', '…') # ellipsis (...)
+ s = s.replace(' -- ', ' — ') # em-dash
+ s = s.replace('(c)', '© ') # copyright symbol
+ return s
+
+
+def parseSpipLink(link):
+ link = link.replace('\n', ' ')
+ matchObject = re.match(
+ r'(?P<role>alias|art(icle)?|atom|book|card|election|file|'\
+ 'grade|group|heading|im(g|age)?|person(ne)?|rubri(c|que)'\
+ ') *(?P<localId>\S+) *(?P<option>\S+)?', link)
+
+ if matchObject is None:
+ name = link
+ for prefix in [ 'http://', 'https://', 'ftp://', 'mailto:' ]:
+ if name.startswith(prefix):
+ if not '/' in name[len(prefix):]:
+ name = name[len(prefix):]
+ break
+ else:
+ name = ''
+ return name, link
+
+ role = matchObject.group('role')
+ localId = matchObject.group('localId')
+ option = matchObject.group('option') and \
+ ':%s' % matchObject.group('option') or ''
+
+ return ('XXX', '#')
+
+
+class Formatter:
+ def close(self, **keywords):
+ return ''
+
+ def list(self, list):
+ def getListIndent(stack):
+ if len(stack) == 0:
+ return -1
+ else:
+ return stack[-1][0]
+ stack = []
+ result = ''
+ for line in list:
+ indent, type, text = line
+ while indent < getListIndent(stack):
+ result += self.listEnd(len(stack), stack[-1][1])
+ del stack[-1]
+ if indent > getListIndent(stack):
+ stack.append((indent, type))
+ result += self.listBegin(len(stack) - 1, type)
+ result += self.listItem(len(stack), text)
+ while stack:
+ result += self.listEnd(len(stack) - 1, stack[-1][1])
+ del stack[-1]
+ return result
+
+ def open(self, **keywords):
+ return ''
+
+ def prescaleImage(self):
+ return 1
+
+ def table(self, table, hasHeader):
+ result = ''
+ rows = len(table)
+ cols = max(map(len, table))
+ result += self.tableBegin(rows, cols)
+ for i in range(len(table)):
+ row = table[i]
+ rowNumber = i
+ if hasHeader:
+ rowNumber -= 1
+ result += self.tableLineBegin(rowNumber,
+ isHeader = i == 0 and hasHeader)
+ j = 0
+ for cell in row:
+ if type(cell) in (str, unicode) and cell.strip() == '###':
+ cell = rowNumber + 1
+ result += self.tableCell(cell, rowNumber, j,
+ isHeader = i == 0 and hasHeader)
+ j += 1
+ result += self.tableLineEnd(i, isHeader = i == 0 and hasHeader)
+ result += self.tableEnd(isHeader = len(table) == 1 and hasHeader)
+ return result
+
+ def text(self, text):
+ return text
+
+
+class FormatterHtml(Formatter):
+ def __init__(self):
+ self.states = [ '' ]
+
+ def intertitle(self, text, sublevel = 0):
+ nameText = re.sub('<.*?>', '', text).strip()
+ # from docutils/nodes.py, def make_id(string)
+ nameText = _non_id_chars.sub('-', ' '.join(nameText.lower().split()))
+ nameText = _non_id_at_ends.sub('', nameText)
+
+ return '<h%(sectionLevel)d id="%(nameText)s">%(text)s'\
+ '</h%(sectionLevel)d>\n\n' % {
+ 'text': text.strip(),
+ 'sectionLevel': self.sectionLevel + sublevel,
+ 'nameText': nameText,
+ }
+
+ def lineBreak(self):
+ return '<br />\n'
+
+ def list(self, list):
+ # FIXME: the Formatter.list method doesn't work for sub-lists, so it is
+ # rewritten here. Maybe this method should also be used by
+ # Formatter.list
+ def getListIndent(stack):
+ if len(stack) == 0:
+ return -1
+ else:
+ return stack[-1][0]
+ self.states.append('blockLevel')
+ stack = []
+ result = ''
+ for line in list:
+ indent, type, text = line
+ while indent < getListIndent(stack):
+ result += self.listItemEnd()
+ result += self.listEnd(len(stack), stack[-1][1])
+ del stack[-1]
+ if indent > getListIndent(stack):
+ stack.append((indent, type))
+ result += '\n'
+ result += self.listBegin(len(stack) - 1, type)
+ else:
+ result += self.listItemEnd()
+ result += self.listItemBegin(len(stack), text)
+ while stack:
+ result += self.listItemEnd()
+ result += self.listEnd(len(stack) - 1, stack[-1][1])
+ del stack[-1]
+ return result
+
+ def listBegin(self, indent, type):
+ if type:
+ type = 'ol'
+ else:
+ type = 'ul'
+ return '%(indent)s<%(type)s>\n' % {
+ 'indent': ' ' * indent,
+ 'type': type,
+ }
+
+ def listEnd(self, indent, type):
+ if type:
+ type = 'ol'
+ else:
+ type = 'ul'
+ return '%(indent)s</%(type)s>\n' % {
+ 'indent': ' ' * indent,
+ 'type': type,
+ }
+
+ def listItemBegin(self, indent, item):
+ return '%(indent)s<li>%(item)s' % {
+ 'indent': ' ' * indent,
+ 'item': item.strip('*'),
+ }
+
+ def listItemEnd(self):
+ return '</li>\n'
+
+ def nonBreakingSpace(self):
+ return ' '
+
+ def open(self, **keywords):
+ self.sectionLevel = 2
+ if keywords.has_key('sectionLevel'):
+ self.sectionLevel = int(keywords['sectionLevel'])
+ return ''
+
+ def paragraph(self, text):
+ if self.states[-1] == 'blockLevel':
+ self.states.pop()
+ return text
+
+
+ def line80(text):
+ t = []
+ i = 0
+ while 1:
+ val = text[i+70:].find(' ')
+ if val == -1:
+ t.append(text[i:])
+ break
+ part = text[i:i+70+val+1]
+ t.append(part)
+ i += len(part)
+ return '\n'.join(t)
+
+ text = enhanceTypo(text.strip())
+ text = line80(text.replace('\n', ' '))
+ text = text.replace('\n_ ', '\n<br/>\n')
+ if not text:
+ return self.lineBreak()
+ return '<p>%(text)s</p>\n\n' % {
+ 'text': text,
+ }
+
+ def preformatted(self, text):
+ if not text:
+ return ''
+## text = self.text(text)
+ self.states.append('blockLevel')
+ return '<pre>%(text)s</pre>' % {
+ 'text': text,
+ }
+
+ def preformattedInline(self, text):
+ if not text:
+ return ''
+ text = self.text(text)
+ return '<code>%(text)s</code>' % {
+ 'text': text,
+ }
+
+ def punctuationAndSpace(self, punctuation):
+ return {
+ u'« ': u'« ',
+ }[punctuation]
+
+ def spaceAndPunctuation(self, punctuation):
+ return {
+ u' :': u' :',
+ u' ;': u' ;',
+ u' !': u' !',
+ u' ?': u' ?',
+ u' »': u' »',
+ u' %': u' %',
+ u' ...': u' ...',
+ }[punctuation]
+
+ def tableBegin(self, rows, cols):
+ self.states.append('blockLevel')
+ return '<table>\n'
+
+ def tableCell(self, cell, row, col, isHeader):
+ if not cell:
+ cell = ' '
+ if isHeader:
+ tag = 'th'
+ attrtag = ' scope="col"'
+ else:
+ tag = 'td'
+ attrtag = ''
+ return ' <%(tag)s%(attrtag)s>%(cell)s</%(tag)s>\n' % {
+ 'tag': tag,
+ 'cell': cell.strip(),
+ 'attrtag': attrtag,
+ }
+
+ def tableEnd(self, isHeader = 0):
+ return '</table>\n'
+
+ def tableLineBegin(self, tableLineCount, isHeader = 0):
+ if isHeader:
+ return ' <tr>\n'
+ elif (tableLineCount + 1) % 2 == 0:
+ return ' <tr class="even">\n'
+ else:
+ return ' <tr class="odd">\n'
+
+ def tableLineEnd(self, tableLineCount, isHeader = 0):
+ return ' </tr>\n'
+
+ def text(self, text):
+ if not text:
+ return ''
+ text = text.replace('&', '&')
+ text = text.replace('<', '<')
+ #text = text.replace('\x85', '…') # ellipsis (...)
+ return text
+
+
+class FormatterHtmlComplete(FormatterHtml):
+ _footNotes = None
+
+ def close(self, **keywords):
+ footNotes = self._footNotes
+ if footNotes is None or len(footNotes) == 0:
+ return ''
+ indexes = footNotes.keys()
+ indexes.sort()
+ result = '<div class="spip-notes">\n'
+ for index in indexes:
+ result += '<p>'
+ result += '[<a href="#nh%(index)s" name="nb%(index)s" '\
+ 'class="spip-note">%(index)s</a>] %(text)s' % {
+ 'index': index,
+ 'text': footNotes[index],
+ }
+ result += '</p>\n'
+ result += '</div>\n'
+ return result
+
+ def emphasis(self, text):
+ if not text:
+ return ''
+ return '<em>%(text)s</em>' % {
+ 'text': text,
+ }
+
+ def footNoteCall(self, footNote):
+ if self._footNotes is None:
+ self._footNotes = {}
+ index = len(self._footNotes) + 1
+ self._footNotes[index] = footNote
+ return ' [<a href="#nb%(index)s" name="nh%(index)s" '\
+ 'class="spip-note">%(index)s</a>]' % { 'index': index }
+
+ def image(self, name, url, width = None, height = None, fullUrl = None):
+ if width:
+ width = ' width="%s"' % width
+ else:
+ width = ''
+ if height:
+ height = ' height="%s"' % height
+ else:
+ height = ''
+ image = """\
+<img src="%(url)s" alt="%(name)s" title="%(name)s"%(height)s%(width)s />\
+""" % {
+ 'height': height,
+ 'name': name,
+ 'url': url,
+ 'width': width,
+ }
+ if fullUrl:
+ image = '<a href="%(fullUrl)s">%(image)s</a>' % {
+ 'fullUrl': fullUrl,
+ 'image': image,
+ }
+ return image
+
+ def lineSeparator(self):
+ return '<hr />\n'
+
+ def link(self, name, url, title=None):
+ url = url.replace('"', '"') # '"' must be converted.
+ # The following replace should not be done. An url must be left as is.
+ # url = url.replace('&', '&') #
+ # url = url.replace('<', '<') # ?
+ # url = url.replace('>', '>') # ?
+ attrs = ['href="%s"' % url]
+ if title:
+ attrs.append('title="%s"' % title)
+ if url.startswith('http://'):
+ attrs.append('class="external"')
+ attrs = ' '.join(attrs)
+ return '<a %s>%s</a>' % (attrs, name)
+
+ def strong(self, text):
+ if not text:
+ return ''
+ return '<strong>%(text)s</strong>' % {
+ 'text': text,
+ }
+
+ def teletype(self, text):
+ if not text:
+ return ''
+ return '<tt>%(text)s</tt>' % {
+ 'text': text,
+ }
+
+
+class FormatterHtmlSimple(FormatterHtml):
+ def emphasis(self, text):
+ return text
+
+ def footNoteCall(self, footNote):
+ return ''
+
+ def image(self, name, url, width = None, height = None, fullUrl = None):
+ return ''
+
+ def lineSeparator(self):
+ return '<br />\n'
+
+ def link(self, name, url, title=None):
+ return str(name)
+
+ def strong(self, text):
+ return text
+
+ def teletype(self, text):
+ return text
+
+
+
+class SpipParser:
+ codeRegexp = r"""
+(?P<codeStop>\</(code|CODE)\>)
+|(?P<wordAsIs>\w+)
+"""
+ codeRegexp = codeRegexp.replace('\n', '')
+ codeRegexpObject = None
+ footNoteRegexp = r"""
+(?P<footNoteStop>\]\])
+|(?P<image>\<((im(g|age)?)|(IM(G|AGE)?)) *(\d+|[^ \>]+)( +\d+\*\d+)?\>)
+|(?P<teletypeStart>\(\()
+|(?P<teletypeStop>\)\))
+|(?P<strongStart>\{\{)
+|(?P<strongStop>\}\})
+|(?P<emphasisStart>\{)
+|(?P<emphasisStop>\})
+|(?P<linkStart>\[)
+|(?P<antislash>\\.?)
+|(?P<punctuationAndSpace>[«] )
+|(?P<spaceAndPunctuation> ([:;!\?»%]|\.\.\.))
+|(?P<nonBreakingSpace>~)
+|(?P<word>\w+)
+"""
+ footNoteRegexp = footNoteRegexp.replace('\n', '')
+ footNoteRegexpObject = None
+ linkNameRegexp = r"""
+(?P<image>\<((im(g|age)?)|(IM(G|AGE)?)) *(\d+|[^ \>]+)( +\d+\*\d+)?\>)
+|(?P<teletypeStart>\(\()
+|(?P<teletypeStop>\)\))
+|(?P<strongStart>\{\{)
+|(?P<strongStop>\}\})
+|(?P<emphasisStart>\{)
+|(?P<emphasisStop>\})
+|(?P<linkMiddle>->)
+|(?P<linkWithoutMiddleStop>\])
+|(?P<antislash>\\.?)
+|(?P<punctuationAndSpace>[«] )
+|(?P<spaceAndPunctuation> ([:;!\?»%]|\.\.\.))
+|(?P<nonBreakingSpace>~)
+|(?P<word>\w+)
+"""
+ linkNameRegexp = linkNameRegexp.replace('\n', '')
+ linkNameRegexpObject = None
+ linkRegexp = r"""
+(?P<linkStop>\])
+|(?P<wordInUrl>\w+)
+"""
+ linkRegexp = linkRegexp.replace('\n', '')
+ linkRegexpObject = None
+ listRegexp = r"""
+(?P<listLineStart>^ *-([0-9aAiI]\.)?(?=[^-]))
+|(?P<listLineStop>\r?\n(?= *-([0-9aAiI]\.)?(?=[^-])))
+|(?P<listLineContinuation>\r?\n(?= +))
+|(?P<listStop>(?=\r?\n|$))
+|(?P<image>\<((im(g|age)?)|(IM(G|AGE)?)) *(\d+|[^ \>]+)( +\d+\*\d+)?\>)
+|(?P<teletypeStart>\(\()
+|(?P<teletypeStop>\)\))
+|(?P<strongStart>\{\{)
+|(?P<strongStop>\}\})
+|(?P<emphasisStart>\{)
+|(?P<emphasisStop>\})
+|(?P<footNoteStart>\[\[)
+|(?P<linkStart>\[)
+|(?P<antislash>\\.?)
+|(?P<punctuationAndSpace>[«] )
+|(?P<spaceAndPunctuation> ([:;!\?»%]|\.\.\.))
+|(?P<nonBreakingSpace>~)
+|(?P<word>\w+)
+"""
+ listRegexp = listRegexp.replace('\n', '')
+ listRegexpObject = None
+ paragraphRegexp = r"""
+(?P<paragraphStop>\r?\n( *\r?\n)* *(?=\r?\n|$))
+|(?P<lineSeparator>^ *[-_]{4,} *(\r?\n|$))
+|(?P<intertitleStart>^ *\{\{\{)
+|(?P<intertitleStop>\}\}\}( *$|))
+|(?P<listStart>^ *-([0-9aAiI]\.)?)
+|(?P<tableWithHeaderStart>^ *\|(?=( *\{\{.*?\}\} *\|)+ *(\r?\n|$)))
+|(?P<tableStart>^ *\|)
+|(?P<codeStart>\<(code|CODE)\>)
+|(?P<image>\<((im(g|age)?)|(IM(G|AGE)?)) *(\d+|[^ \>]+)( +\d+\*\d+)?\>)
+|(?P<teletypeStart>\(\()
+|(?P<teletypeStop>\)\))
+|(?P<strongStart>\{\{)
+|(?P<strongStop>\}\})
+|(?P<emphasisStart>\{)
+|(?P<emphasisStop>\})
+|(?P<footNoteStart>\[\[)
+|(?P<linkStart>\[)
+|(?P<antislash>\\.?)
+|(?P<punctuationAndSpace>[«] )
+|(?P<spaceAndPunctuation> ([:;!\?»%]|\.\.\.))
+|(?P<nonBreakingSpace>~)
+|(?P<word>\w+)
+"""
+ paragraphRegexp = paragraphRegexp.replace('\n', '')
+ paragraphRegexpObject = None
+ posDelta = 0
+ spipRegexp = r"""
+(?P<paragraphStart>\r?\n)
+"""
+ spipRegexp = spipRegexp.replace('\n', '')
+ spipRegexpObject = None
+ states = None
+ tableRegexp = r"""
+(?P<rowStart>^ *\|)
+|(?P<rowStop>\| *\r?\n(?= *\|))
+|(?P<tableStop>\| *(?=\r?\n|$))
+|(?P<cellRestart>\|)
+|(?P<image>\<((im(g|age)?)|(IM(G|AGE)?)) *(\d+|[^ \>]+)( +\d+\*\d+)?\>)
+|(?P<teletypeStart>\(\()
+|(?P<teletypeStop>\)\))
+|(?P<strongStart>\{\{)
+|(?P<strongStop>\}\})
+|(?P<emphasisStart>\{)
+|(?P<emphasisStop>\})
+|(?P<footNoteStart>\[\[)
+|(?P<linkStart>\[)
+|(?P<antislash>\\.?)
+|(?P<punctuationAndSpace>[«] )
+|(?P<spaceAndPunctuation> ([:;!\?»%]|\.\.\.))
+|(?P<nonBreakingSpace>~)
+|(?P<word>\w+)
+"""
+ tableRegexp = tableRegexp.replace('\n', '')
+ tableRegexpObject = None
+
+ def __init__(self):
+ self.codeRegexpObject = re.compile(self.codeRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.footNoteRegexpObject = re.compile(self.footNoteRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.linkNameRegexpObject = re.compile(self.linkNameRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.linkRegexpObject = re.compile(self.linkRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.listRegexpObject = re.compile(self.listRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.paragraphRegexpObject = re.compile(self.paragraphRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.spipRegexpObject = re.compile(self.spipRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.tableRegexpObject = re.compile(self.tableRegexp,
+ re.MULTILINE | re.DOTALL)
+ self.states = []
+ self.pushState({
+ 'state': None,
+ })
+ self.pushState({
+ 'formattedText': '',
+ 'paragraphsCount': 0,
+ 'regexpObject': self.spipRegexpObject,
+ 'state': 'spip',
+ })
+ self.pushState({
+ 'formattedText': '',
+ 'ignoreEmptyParagraph': 0,
+ 'regexpObject': self.paragraphRegexpObject,
+ 'state': 'paragraph',
+ })
+
+ def format(self, formatter, rawText, **keywords):
+ self.formatter = formatter
+ formattedText = self.formatter.open(**keywords)
+ if not rawText:
+ rawText = ''
+ rawText = rawText.expandtabs()
+ pos = 0
+ while pos < len(rawText):
+ regexpObject = self.getVar('regexpObject')
+ matchObject = regexpObject.match(rawText, pos)
+ if matchObject is None or matchObject.lastgroup is None:
+ self.setVar(
+ 'formattedText',
+ self.getVar('formattedText') \
+ + self.formatter.text(rawText[pos]))
+ pos += 1
+ else:
+ groupName = matchObject.lastgroup
+ result = apply(getattr(self, 'handle_' + groupName),
+ (matchObject.group(groupName), ))
+ if result:
+ self.setVar('formattedText',
+ self.getVar('formattedText') + result)
+ pos = matchObject.end(groupName) + self.posDelta
+ if self.posDelta != 0:
+ del self.posDelta
+ while len(self.states) > 2:
+ if self.getVar('state') == 'list':
+ result = self.handle_listStop(None)
+ if result:
+ self.setVar('formattedText',
+ self.getVar('formattedText') + result)
+ elif self.getVar('state') == 'paragraph':
+ result = self.handle_paragraphStop(None, isLastParagraph = 1)
+ if result:
+ self.setVar('formattedText',
+ self.getVar('formattedText') + result)
+ else:
+ self.pullState()
+ formattedText += self.getVar('formattedText')
+ formattedText = formattedText.strip() + '\n'
+ formattedText += self.formatter.close(**keywords)
+ return formattedText
+
+ def getVar(self, name):
+ for state in self.states:
+ if state.has_key(name):
+ return state[name]
+ else:
+ raise KeyError(name)
+
+ def getPreviousVar(self, name):
+ for state in self.states[1:]:
+ if state.has_key(name):
+ return state[name]
+ else:
+ raise KeyError(name)
+
+ def handle_antislash(self, group):
+ character = group[1]
+ if character == 'n':
+ return self.formatter.lineBreak()
+ else:
+ return self.formatter.text(character)
+
+ def handle_cellRestart(self, group):
+ self.handle_cellStop(group)
+ self.handle_cellStart(group)
+ return None
+
+ def handle_cellStart(self, group):
+ table = self.getVar('table')
+ table[-1].append([])
+ return None
+
+ def handle_cellStop(self, group):
+ table = self.getVar('table')
+ table[-1][-1] = self.getVar('formattedText')
+ self.setVar('formattedText', '')
+ return None
+
+ def handle_codeStart(self, group):
+ self.pushState({
+ 'formattedText': '',
+ 'regexpObject': self.codeRegexpObject,
+ 'state': 'code',
+ })
+ return None
+
+ def handle_codeStop(self, group):
+ if self.getVar('state') != 'code':
+ return None
+ formattedText = self.getVar('formattedText')
+ self.pullState()
+ return self.formatter.preformatted(formattedText)
+
+ def handle_emphasisStart(self, group):
+ self.pushState({
+ 'formattedText': '',
+ 'state': 'emphasis',
+ })
+ return None
+
+ def handle_emphasisStop(self, group):
+ if self.getVar('state') != 'emphasis':
+ return None
+ formattedText = self.getVar('formattedText')
+ self.pullState()
+ return self.formatter.emphasis(formattedText)
+
+ def handle_footNoteStart(self, group):
+ self.pushState({
+ 'formattedText': '',
+ 'regexpObject': self.footNoteRegexpObject,
+ 'state': 'footNote',
+ })
+ return None
+
+ def handle_footNoteStop(self, group):
+ if self.getVar('state') != 'footNote':
+ return None
+ formattedText = self.getVar('formattedText')
+ self.pullState()
+ return self.formatter.footNoteCall(formattedText)
+
+ def handle_image(self, group):
+ matchObject = re.match(
+ r'\<((im(g|age)?)|(IM(G|AGE)?)) '\
+ '*((?P<localId>\d+)|(?P<url>[^ \>]+))'\
+ '( +(?P<width>\d+)\*(?P<height>\d+))?\>', group)
+ localId = matchObject.group('localId')
+ url = matchObject.group('url')
+ width = matchObject.group('width')
+ height = matchObject.group('height')
+ name = localId
+ if url:
+ url = '#' + url
+ else:
+ url = '#'
+ fullUrl = '#'
+ return self.formatter.image(name, url, width, height, fullUrl)
+
+ def handle_intertitleStart(self, group):
+ inParagraph = self.getVar('state') == 'paragraph'
+ if inParagraph:
+ result = self.handle_paragraphStop(None, ignoreEmptyParagraph = 1)
+ if result:
+ self.setVar('formattedText',
+ self.getVar('formattedText') + result)
+ self.pushState({
+ 'formattedText': '',
+ 'inParagraph': inParagraph,
+ 'regexpObject': self.paragraphRegexpObject,
+ 'state': 'intertitle',
+ })
+ return None
+
+ def handle_intertitleStop(self, group):
+ if self.getVar('state') == 'intertitle':
+ formattedText = self.getVar('formattedText')
+ inParagraph = self.getVar('inParagraph')
+ self.pullState()
+ formattedText = self.formatter.intertitle(formattedText)
+ self.setVar('formattedText',
+ self.getVar('formattedText') + formattedText)
+ if inParagraph:
+ return self.handle_paragraphStart(None,
+ ignoreEmptyParagraph = 1)
+ else:
+ return None
+ elif self.getVar('state') == 'strong':
+ self.posDelta -= 1
+ return self.handle_strongStop(group[:-1])
+ elif self.getVar('state') == 'emphasis':
+ self.posDelta -= 2
+ return self.handle_emphasisStop(group[:-2])
+ else:
+ return None
+
+ def handle_lineSeparator(self, group):
+ inParagraph = self.getVar('state') == 'paragraph'
+ if inParagraph:
+ result = self.handle_paragraphStop(None, ignoreEmptyParagraph = 1)
+ if result:
+ self.setVar('formattedText',
+ self.getVar('formattedText') + result)
+ formattedText = self.formatter.lineSeparator()
+ self.setVar('formattedText',
+ self.getVar('formattedText') + formattedText)
+ if inParagraph:
+ return self.handle_paragraphStart(None, ignoreEmptyParagraph = 1)
+ else:
+ return None
+
+ def handle_linkMiddle(self, group):
+ if self.getVar('state') != 'linkName':
+ return None
+ name = self.getVar('formattedText')
+ self.pullState()
+ self.pushState({
+ 'formattedText': '',
+ 'name': name,
+ 'regexpObject': self.linkRegexpObject,
+ 'state': 'link',
+ })
+ return None
+
+ def handle_linkStart(self, group):
+ self.pushState({
+ 'formattedText': '',
+ 'regexpObject': self.linkNameRegexpObject,
+ 'state': 'linkName',
+ })
+ return None
+
+ def handle_linkStop(self, group):
+ if self.getVar('state') != 'link':
+ return None
+ title = None
+ name = self.getVar('name')
+ link = self.getVar('formattedText')
+ self.pullState()
+ link = link.strip()
+ title, url = parseSpipLink(link)
+ if not title:
+ title = url
+ if name and '|' in name:
+ oTitle = title
+ try:
+ name, title = name.split('|', 2)
+ except ValueError:
+ # XXX
+ print 'error splitting', repr(name)
+ name, title = oTitle, 'XXX'
+ if not name:
+ name = oTitle
+ elif not name:
+ name = title
+ title = None
+ if name == title or url == title:
+ title = None
+ return self.formatter.link(name.strip(), url, title)
+
+ def handle_linkWithoutMiddleStop(self, group):
+ if self.getVar('state') != 'linkName':
+ return None
+ formattedText = self.getVar('formattedText')
+ self.pullState()
+ return self.formatter.text('[') \
+ + formattedText \
+ + self.formatter.text(']')
+
+ def handle_listLineStart(self, group):
+ matchObject = re.match(
+ r'^(?P<indent> *)-((?P<type>[0-9aAiI])\.)?',
+ group)
+ indent = len(matchObject.group('indent'))
+ type = matchObject.group('type')
+ list = self.getVar('list')
+ list.append([indent, type, None])
+ return None
+
+ def handle_listLineContinuation(self, group):
+ return None
+
+ def handle_listLineStop(self, group):
+ list = self.getVar('list')
+ list[-1][-1] = self.getVar('formattedText').lstrip()
+ self.setVar('formattedText', '')
+ return None
+
+ def handle_listStart(self, group):
+ self.pushState({
+ 'formattedText': '',
+ 'list': [],
+ 'regexpObject': self.listRegexpObject,
+ 'state': 'list',
+ })
+ return self.handle_listLineStart(group)
+
+ def handle_listStop(self, group):
+ if self.getVar('state') != 'list':
+ # The following pullState is mandatory, because group is empty, so
+ # if we don't pull a state, handle_listStop will be called forever.
+ if len(self.states) > 2:
+ self.pullState()
+ return None
+ self.handle_listLineStop(group)
+ list = self.getVar('list')
+ self.pullState()
+ return self.formatter.list(list)
+
+ def handle_nonBreakingSpace(self, group):
+ return self.formatter.nonBreakingSpace()
+
+ def handle_paragraphStart(self, group, ignoreEmptyParagraph = 1):
+ self.pushState({
+ 'formattedText': '',
+ 'ignoreEmptyParagraph': ignoreEmptyParagraph,
+ 'regexpObject': self.paragraphRegexpObject,
+ 'state': 'paragraph',
+ })
+ return None
+
+ def handle_paragraphStop(self, group, ignoreEmptyParagraph = 0,
+ isLastParagraph = 0):
+ if self.getVar('state') != 'paragraph':
+ return None
+ formattedText = self.getVar('formattedText').strip()
+ ignoreEmptyParagraph = ignoreEmptyParagraph \
+ or self.getVar('ignoreEmptyParagraph')
+ self.pullState()
+ self.setVar('paragraphsCount', self.getVar('paragraphsCount') + 1)
+ if ignoreEmptyParagraph and not formattedText:
+ return None
+ elif isLastParagraph and self.getVar('paragraphsCount') == 1:
+ return formattedText
+ elif re.match('^<strong>.+</strong>$', formattedText) and \
+ len(re.findall('<strong>.+?</strong>', formattedText)) == 1:
+ formattedText = formattedText[8:-9]
+ return self.formatter.intertitle(formattedText, sublevel = 1)
+ else:
+ return self.formatter.paragraph(formattedText)
+
+ def handle_punctuationAndSpace(self, punctuation):
+ return self.formatter.punctuationAndSpace(punctuation)
+
+ def handle_rowStart(self, group):
+ table = self.getVar('table')
+ table.append([])
+ return self.handle_cellStart(group)
+
+ def handle_rowStop(self, group):
+ return self.handle_cellStop(group)
+
+ def handle_spaceAndPunctuation(self, punctuation):
+ return self.formatter.spaceAndPunctuation(punctuation)
+
+ def handle_strongStart(self, group):
+ if self.getVar('state') == 'table' and self.getVar('hasHeader'):
+ return None
+ self.pushState({
+ 'formattedText': '',
+ 'state': 'strong',
+ })
+ return None
+
+ def handle_strongStop(self, group):
+ if self.getVar('state') != 'strong':
+ return None
+ formattedText = self.getVar('formattedText')
+ self.pullState()
+ return self.formatter.strong(formattedText)
+
+ def handle_tableStart(self, group, hasHeader = 0):
+ self.pushState({
+ 'formattedText': '',
+ 'hasHeader': hasHeader,
+ 'table': [],
+ 'regexpObject': self.tableRegexpObject,
+ 'state': 'table',
+ })
+ return self.handle_rowStart(group)
+
+ def handle_tableStop(self, group):
+ self.handle_rowStop(group)
+ table = self.getVar('table')
+ hasHeader = self.getVar('hasHeader')
+ self.pullState()
+ return self.formatter.table(table, hasHeader)
+
+ def handle_tableWithHeaderStart(self, group):
+ return self.handle_tableStart(group, hasHeader = 1)
+
+ def handle_teletypeStart(self, group):
+ self.pushState({
+ 'formattedText': '',
+ 'state': 'teletype',
+ })
+ return None
+
+ def handle_teletypeStop(self, group):
+ if self.getVar('state') != 'teletype':
+ return None
+ formattedText = self.getVar('formattedText')
+ self.pullState()
+ return self.formatter.teletype(formattedText)
+
+ def handle_word(self, word):
+ return self.formatter.text(word)
+
+ def handle_wordAsIs(self, word):
+ return word
+
+ def handle_wordInUrl(self, word):
+ return word
+
+ def hasVar(self, name):
+ for state in self.states:
+ if state.has_key(name):
+ return 1
+ else:
+ return 0
+
+ def pullState(self):
+ del self.states[0]
+
+ def pushState(self, state):
+ self.states.insert(0, state)
+
+ def setVar(self, name, value):
+ self.states[0][name] = value
+
+
+def makeHtmlFromSpip(text, simple = 0, inline = 0, **keywords):
+ if not text:
+ return ''
+
+ text = text.replace('\r\n', '\n').replace('<quote></quote>', '')
+ #text = text.replace('\x91', "'").replace('\x92', "'").replace(
+ # '\x93', "'").replace('\x94', "'").replace(
+ # '\x81', "'").replace('\x82', "'").replace(
+ # '’', "'").replace('…', '...').replace(
+ # '–', '--').replace('“', '"').replace(
+ # '´', "'").replace('”', '"').replace(
+ # '‘', "'").replace('œ', 'oe')
+
+ if not inline:
+ # Force the spip parser to produce paragraphs.
+ text += '\n\n'
+ parser = SpipParser()
+ if simple:
+ formatter = FormatterHtmlSimple()
+ else:
+ formatter = FormatterHtmlComplete()
+ return parser.format(formatter, text, **keywords).strip()
+
+
+if __name__ == '__main__':
+ print makeHtmlFromSpip(unicode(file(sys.argv[1]).read(), 'utf-8'))