class link: def __init__(self, content): content = u(content) self.joined = False self.setBody(content) self.setType(u'unknown') self.setLabel(None) self.setLink(u'') self.anchor = None self.joined = False if len(content) > 2: if content[:2] == u'[[' and content[-2:] == u']]': split = content[2:-2].split(u'|') if len(split) in (1, 2): self.setType(u'internal') lnk = split[0] if lnk.find(u':') == -1: lnk = lnk.replace(u'_', u' ') anchor = None if lnk.find(u'#') != -1: lnk, anchor = lnk.split(u'#', 1) self.setAnchor(anchor) self.setLink(lnk) if len(split) == 2: self.setLabel(split[1]) else: self.setLabel(split[0]) self.joined = anchor is None elif content[0] == u'[' and content[-1] == u']': split = content[1:-1].split(u' ', 1) self.setType(u'external') self.setLink(split[0]) if len(split) == 2: self.setLabel(split[1]) else: self.setLabel(None) def getType(self): return u(self.kind) def getBody(self): return u(self.body) def getLink(self, withAnchor=False): if withAnchor and self.getAnchor() is not None: return u(self.link) + u'#' + self.getAnchor() return u(self.link) def getAnchor(self): return self.anchor def getLabel(self): if self.label is None: return None if self.joined: return self.getLink() return u(self.label) def setType(self, kind): self.kind = u(kind) def setBody(self, body): self.body = u(body) def setLink(self, link): link = u(link) if self.getType() == u'internal' and link.find(u'#') != -1: link, anchor = link.split(u'#', 1) self.setAnchor(anchor) self.link = link if self.joined: self.label = u(link) replaceDots = compileRegex(r'(?:\.[a-f\d][a-f\d])+') def _replaceDots(self, g): s = '' g = g.group(0) for i in xrange(0, len(g), 3): s += chr(int(g[i + 1:i + 3], 16)) return s.decode('utf8') def setAnchor(self, anchor): if self.getType() == u'internal': u(anchor).replace(u'_', u' ') try: anchor = link.replaceDots.sub(self._replaceDots, anchor) except: pass self.anchor = anchor def setLabel(self, label): if label is None: self.label = None else: self.label = u(label) if self.joined: self.link = u(label) def __str__(self): return self.__unicode__() def __repr__(self): return u'<Link-' + self.getType() + u': ' + self.__unicode__() + u'>' def __unicode__(self): label = self.getLabel() tmpLink = self.getLink(withAnchor=True) if self.getType() == u'internal': tmpLink2 = tmpLink.replace(u'_', u' ') if label in (tmpLink2, tmpLink) or (label and tmpLink and (label[0].lower() == tmpLink[0].lower() and tmpLink[1:] == label[1:]) or (label[0].lower() == tmpLink2[0].lower() and tmpLink2[1:] == label[1:])): return u'[[' + label + u']]' elif tmpLink and label and len(label) > len(tmpLink) and (label.lower().find(tmpLink2.lower()) == 0 or label.lower().find(tmpLink.lower()) == 0): index = max(label.lower().find(tmpLink2.lower()), label.lower().find(tmpLink.lower())) badchars = (u' ', u'_') nobadchars = True for c in badchars: if label[:index].find(c) != -1 or label[index+len(tmpLink):].find(c) != -1: nobadchars = False if nobadchars: return label[:index] + u(link(u'[[' + tmpLink + u'|' + label[index:index+len(tmpLink)] + u']]')) + label[index+len(tmpLink):] return u'[[' + tmpLink + u'|' + label + u']]' if self.getType() == u'external': if label is None: return u'[' + tmpLink + u']' return u'[' + tmpLink + u' ' + label + u']' return self.getBody()