New Python Markdown extension toc_fixer
The toc_fixer
extension is implemented as a post-processor.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
#!/usr/bin/python """ Table of Contents fixer extension for Python-Markdown ===================================================== Fixes up the Table of Contents by removing entries up to and including the "Table of Contents" line (or as defined in the toc extension.) """ # (some standard markdown.extension stuff snipped) def run(self, text): # Locate the <div> holding the Table of Contents match = re.search(r'(?P<toc><div class="toc">.*?</div>)', text, re.DOTALL) if not match: return text start, end = match.span(1) RE_OPEN_TAG = re.compile(r'^<([a-z]{1,2})[a-z]*') RE_CLOSE_TAG = re.compile(r'/[a-z]*>$') RE_OPEN_LI = re.compile(r' *<li>$') RE_A = re.compile(r'(?P<P1> *<a )(?:id="toc-[0-9]{4}" )?(?P<P2>href="#(?P<h1h2_id>.*?)">.*</a>.*)') RE_CLOSE_LI = re.compile(r'</li>$') RE_HREF = r'<a (id="[^"]+" )?href="#[^"]+">' RE_TOC = re.compile(RE_HREF + r'Table of Contents</a>') RE_HREF = re.compile(RE_HREF) RE_EMPTY_ELEMENT=re.compile(r'^((\s*)<([a-z]+)>\n\2</\3>\n)', re.MULTILINE) toc = [] spaces = '' found_TOC_line = False # True = found line reading 'Table of Contents' # Add newline between elements, then process the TOC for line in re.sub(r'><', '>\n<', match.group('toc')).split('\n'): is_href = True if RE_HREF.match(line) else False # Update the leading spaces for an opening tag m = RE_OPEN_TAG.match(line) if m: spaces = spaces + ' ' # Add line to toc[], unless it's an <href> and we haven't found # the 'Table of Contents' line yet if not is_href or found_TOC_line: toc.append(spaces + line) # Have we found the 'Table of Contents' line yet? if RE_TOC.match(line): found_TOC_line = True # Update the leading spaces for a closing tag if RE_CLOSE_TAG.search(line): spaces = spaces[0:len(spaces)-2] # Join three lines (as follows) into one: # <li> # <a id="toc-nnnn" href="#somewhere-in-the-body">Somewhere in the Body</a> # </li> if RE_CLOSE_LI.match(line) and RE_OPEN_LI.match(toc[-3]) and RE_A.match(toc[-2]): toc.pop() a_line = toc.pop() toc.append('{}{}</li>'.format(toc.pop(), a_line.lstrip())) # Remove empty elements from the TOC toc_str = '\n'.join(toc) while RE_EMPTY_ELEMENT.search(toc_str): toc_str = RE_EMPTY_ELEMENT.sub('', toc_str) return text[0:start-1] + toc_str + text[end+1:] |