"""+title+"""

#!/usr/bin/python2.2 import re ### from ruby's mombo: def escape(x): return x.replace('&', '&').replace('<', '<').replace('>', '>') def sanitize(body): body=escape(body) # passthru , , , , ,
,
body=re.sub('<a href="([^"]*)">([^&]*)</a>', '\\2', body) body=re.sub('<a href=\'([^\']*)\'>([^&]*)</a>', '\\2', body) return body def handle(x): rd = (("``", '“'), ("''",'”'), ("`", '‘'), ("'", '\xe2\x80\x99'), ('---', '\xe2\x80\x93'), ('--', '\xe2\x80\x94'), (' \n',' '), ('\n','
\n')) # @@ these two are sorta backwards, but huffman coded... # @@ need some way to escape and ignore in /
for k,v in rd: x = x.replace(k, v) x = re.sub(r'\b_(.*?)_\b', r'\1', x) # @@ I have no idea why I have to use \B here and \b back there: x = re.sub(r'\B\*(.*?)\*\B', r'\1', x) x = re.sub(r'\B\|(.*?)\|\B', r'\1', x) return x def atx(x, full=1): #x = sanitize(x) if x and x[-1] == "\n": x = x[:-1] # trim closing \n if exists paras = x.split('\n\n') #@@ requires all in memory nextp, title = None, '' for i in xrange(len(paras)): p = paras[i] if p == '': continue # blank line elif p[0] == '$' or nextp == 'pre': # p = ""+p+"" nextp = None elif re.match(r'^\#+ ', p): # n=0 while p[n] == '#': n+=1 if n==1: title = p[n:].strip() p = ""+handle(p[n:].strip())+"" elif re.match(r'^ *(\*|\d+\.) ', p): # / #@@ should really do for paragraphed lists if p.strip()[0] == '*': mode = 'ul' else: mode = 'ol' lines = p.split('\n') li = 0 while li < len(lines): l = lines[li] if (mode == 'ul' and l[0] != '*' and l[:2] != ' *') or \ (mode == 'ol' and not re.match(r'^ *\d+\.', l)): del lines[li] lines[li-1] = lines[li-1] + l else: li += 1 for li in xrange(len(lines)): l = lines[li].strip() if mode == 'ul' and l[0] == '*': l = l[1:] else: l = re.sub(r'^ *\d+\.', '', l) l = ' '+handle(l.strip())+'' lines[li] = l p = '<'+mode+'>\n'+'\n'.join(lines)+'\n' elif p[:3] == ' ': # p = ""+handle(p)+"" else: # if p[-2:] == '::': nextp = "pre"; p = p[:-1] p = " "+handle(p)+"" paras[i] = p doc = '\n\n'.join(paras) if full: doc = """ """+title+""" """+'\n\n'.join(paras) + """ """ return doc if __name__ == "__main__": import sys if len(sys.argv) <= 1 or sys.argv[1] == "-": print atx(sys.stdin.read()), else: print atx(open(sys.argv[1]).read()), """ TODO: smarter pants, generalized phrasals, prime characters (4'3", 80's) """