User:WindBOT/Filters: Difference between revisions

From the Portal Wiki
Jump to navigation Jump to search
mNo edit summary
Line 4: Line 4:
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving.
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving.
To make the bot ignore a certain line, add a "#" in front of it:
To make the bot ignore a certain line, add a "#" in front of it:
  # This line will be ignored
# This line will be ignored
If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''):
If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''):
  """This line will be ignored
"""This line will be ignored
  and this one as well
and this one as well
  and this one is cake
and this one is cake
  and the previous one was a lie but it was still ignored"""
and the previous one was a lie but it was still ignored"""
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything.
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything.
Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot].
Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot].
Line 16: Line 16:


== Page filters ==
== Page filters ==
  addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')


== Semantic filters ==
== Semantic filters ==
Line 26: Line 26:
== Link filters ==
== Link filters ==
=== Wikipedia links filter ===
=== Wikipedia links filter ===
  def wikipediaLinks(link, **kwargs):
def wikipediaLinks(link, **kwargs):
      wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
    wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
      if link.getType() == u'external':
    if link.getType() == u'external':
          linkInfo = wikipediaRegex.search(link.getLink())
        linkInfo = wikipediaRegex.search(link.getLink())
          if linkInfo:
        if linkInfo:
              link.setType(u'internal')
            link.setType(u'internal')
              try:
            try:
                  wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
                wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
              except:
            except:
                  wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
                wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
              if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
            if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
                  link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
                link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
              else:
            else:
                  link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
                link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
              if link.getLabel() is None:
            if link.getLabel() is None:
                  link.setLabel(u'(Wikipedia)')
                link.setLabel(u'(Wikipedia)')
      return link
    return link
  addLinkFilter(wikipediaLinks)
addLinkFilter(wikipediaLinks)


=== Remove trailing slashes from internal links ===
=== Remove trailing slashes from internal links ===
  def removeTrailingSlash(l, **kwargs):
def removeTrailingSlash(l, **kwargs):
      if l.getType() != u'internal':
    if l.getType() != u'internal':
          return l
        return l
      if l.getLink()[-1] == '/':
    if l.getLink()[-1] == '/':
          l.setLink(l.getLink()[:-1])
        l.setLink(l.getLink()[:-1])
      return l
    return l
  addLinkFilter(removeTrailingSlash)
addLinkFilter(removeTrailingSlash)


== Template filters ==
== Template filters ==
=== Template renaming ===
=== Template renaming ===
  def templateRenameMapping(t, **kwargs):
def templateRenameMapping(t, **kwargs):
      templateMap = {
    templateMap = {
          # Format goes like this (without the "#" in front obviously):
        # Format goes like this (without the "#" in front obviously):
          #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
        #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
          # Last line has no comma at the end
        # Last line has no comma at the end
          'Crush': ['pngcrush']
        'Crush': ['pngcrush']
      }
    }
      for n in templateMap:
    for n in templateMap:
          if t.getName().lower() in templateMap[n]:
        if t.getName().lower() in templateMap[n]:
              t.setName(n)
            t.setName(n)
      return t
    return t
  addTemplateFilter(templateRenameMapping)
addTemplateFilter(templateRenameMapping)


=== Remove useless templates ===
=== Remove useless templates ===
  def removeUselessTemplate(t, **kwargs):
def removeUselessTemplate(t, **kwargs):
      if t.getName().lower() in (u'targeted', u'languages'):
    if t.getName().lower() in (u'targeted', u'languages'):
          return None # Delete template
        return None # Delete template
      return t
    return t
  addTemplateFilter(removeUselessTemplate)
addTemplateFilter(removeUselessTemplate)


=== Filter parameters of certain templates ===
=== Filter parameters of certain templates ===
  def templateParamFilter(t, **kwargs):
def templateParamFilter(t, **kwargs):
      params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
          'patch layout': ['before', 'after', 'current'],
        'patch layout': ['before', 'after', 'current'],
          'item infobox': ['released']
        'item infobox': ['released']
      }
    }
      if t.getName().lower() not in params:
    if t.getName().lower() not in params:
          return t
        return t
      for p in params[t.getName().lower()]:
    for p in params[t.getName().lower()]:
          if t.getParam(p):
        if t.getParam(p):
              t.setParam(p, fixContent(t.getParam(p), **kwargs))
            t.setParam(p, fixContent(t.getParam(p), **kwargs))
      return t
    return t
  addTemplateFilter(templateParamFilter)
addTemplateFilter(templateParamFilter)


=== Remove obsolete parameters ===
=== Remove obsolete parameters ===
  def obsoleteParameterFilter(t, **kwargs):
def obsoleteParameterFilter(t, **kwargs):
      params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
      }
    }
      if t.getName().lower() not in params:
    if t.getName().lower() not in params:
          return t
        return t
      for p in params[t.getName().lower()]:
    for p in params[t.getName().lower()]:
          p = u(p)
        p = u(p)
          if p.find(u'#n') != -1:
        if p.find(u'#n') != -1:
              for i in range(10):
            for i in range(10):
                  t.delParam(p.replace(u'#n', str(i)))
                t.delParam(p.replace(u'#n', str(i)))
          else:
        else:
              t.delParam(p)
            t.delParam(p)
      return t
    return t
  addTemplateFilter(obsoleteParameterFilter)
addTemplateFilter(obsoleteParameterFilter)


=== Implement {{tl|Dictionary}} ===
=== Implement {{tl|Dictionary}} ===
  class DictionaryUpdater:
class DictionaryUpdater:
      def __init__(self):
    def __init__(self):
          self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
        self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
          self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
        self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
          self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Error''': Invalid parameter passed.</div>"""</nowiki>
        self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Error''': Invalid parameter passed.</div>"""</nowiki>
          self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
        self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
          self.dictionaries = {
        self.dictionaries = {
              u'Template:Dictionary/items': { # Dictionary page
            u'Template:Dictionary/items': { # Dictionary page
                  'name': 'items', # Dictionary name (used for categorizing)
                'name': 'items', # Dictionary name (used for categorizing)
                  'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
                'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
              },
            },
              u'Template:Dictionary/common strings': { # Warning: no underscore
            u'Template:Dictionary/common strings': { # Warning: no underscore
                  'name': 'common strings',
                'name': 'common strings',
                  'sync': 'Template:Dictionary/common strings/Special:SyncData'
                'sync': 'Template:Dictionary/common strings/Special:SyncData'
              },
            },
              u'Template:Dictionary/price': {
            u'Template:Dictionary/price': {
                  'name': 'price',
                'name': 'price',
                  'sync': 'Template:Dictionary/price/Special:SyncData',
                'sync': 'Template:Dictionary/price/Special:SyncData',
                  'allTemplate': {
                'allTemplate': {
                      'template': 'item price/fmt',
                    'template': 'item price/fmt',
                      'params': {
                    'params': {
                          'tt': <nowiki>'{{{tt|yes}}}'</nowiki>
                        'tt': <nowiki>'{{{tt|yes}}}'</nowiki>
                      }
                    }
                  }
                }
              },
            },
              u'Template:Dictionary/mechanics': {
            u'Template:Dictionary/mechanics': {
                  'name': 'mechanics',
                'name': 'mechanics',
                  'sync': 'Template:Dictionary/mechanics/Special:SyncData'
                'sync': 'Template:Dictionary/mechanics/Special:SyncData'
              },
            },
              u'Template:Dictionary/characters': {
            u'Template:Dictionary/characters': {
                  'name': 'characters',
                'name': 'characters',
                  'sync': 'Template:Dictionary/characters/Special:SyncData'
                'sync': 'Template:Dictionary/characters/Special:SyncData'
              },
            },
              u'Template:Dictionary/demonstration': {
            u'Template:Dictionary/demonstration': {
                  'name': 'demonstration',
                'name': 'demonstration',
                  'sync': 'Template:Dictionary/demonstration/Special:SyncData'
                'sync': 'Template:Dictionary/demonstration/Special:SyncData'
              },
            },
              u'Template:Dictionary/transcripts': {
            u'Template:Dictionary/transcripts': {
                  'name': 'transcripts',
                'name': 'transcripts',
                  'sync': 'Template:Dictionary/transcripts/Special:SyncData'
                'sync': 'Template:Dictionary/transcripts/Special:SyncData'
              },
            },
              u'Template:Dictionary/portal achievements': {
            u'Template:Dictionary/portal achievements': {
                  'name': 'portal achievements',
                'name': 'portal achievements',
                  'sync': 'Template:Dictionary/portal achievements/Special:SyncData'
                'sync': 'Template:Dictionary/portal achievements/Special:SyncData'
              },
            },
              u'Template:Dictionary/portal: still alive achievements': {
            u'Template:Dictionary/portal: still alive achievements': {
                  'name': 'portal: still alive achievements',
                'name': 'portal: still alive achievements',
                  'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData'
                'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData'
              },
            },
              u'Template:Dictionary/portal 2 achievements': {
            u'Template:Dictionary/portal 2 achievements': {
                  'name': 'portal 2 achievements',
                'name': 'portal 2 achievements',
                  'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData'
                'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData'
              },
            },
              u'Template:Dictionary/audio': {
            u'Template:Dictionary/audio': {
                  'name': 'audio',
                'name': 'audio',
                  'sync': 'Template:Dictionary/audio/Special:SyncData'
                'sync': 'Template:Dictionary/audio/Special:SyncData'
              }
            }
          }
        }
          self.subpageSeparator = u'/'
        self.subpageSeparator = u'/'
          # List of supported languages, in prefered order
        # List of supported languages, in prefered order
          self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']
        self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']
          self.defaultLang = u'en'
        self.defaultLang = u'en'
          self.allKeyName = u'_all_'
        self.allKeyName = u'_all_'
          self.filterName = u'Your friendly neighborhood dictionary updater'
        self.filterName = u'Your friendly neighborhood dictionary updater'
          self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->')
        self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->')
          self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
        self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
          self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
        self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
          self.scheduler = BatchScheduler(16)
        self.scheduler = BatchScheduler(16)
          addWhitelistPage(self.dictionaries.keys())
        addWhitelistPage(self.dictionaries.keys())
      def generateSubpage(self, keyName, data, currentDict, syncData):
    def generateSubpage(self, keyName, data, currentDict, syncData):
          h = hashlib.md5()
        h = hashlib.md5()
          if type(data) is type({}): # Subkeys (translations or not)
        if type(data) is type({}): # Subkeys (translations or not)
              isTranslation = True
            isTranslation = True
              subpage = u(self.subpageTemplateLang)
            subpage = u(self.subpageTemplateLang)
              for k in data:
            for k in data:
                  if k not in self.languages:
                if k not in self.languages:
                      isTranslation = False
                    isTranslation = False
                      subpage = u(self.subpageTemplateParam)
                    subpage = u(self.subpageTemplateParam)
                      break
                    break
              ordered = []
            ordered = []
              unordered = {}
            unordered = {}
              if isTranslation:
            if isTranslation:
                  missing = []
                missing = []
                  for lang in self.languages:
                for lang in self.languages:
                      if lang in data:
                    if lang in data:
                          ordered.append(lang + u'=' + data[lang])
                        ordered.append(lang + u'=' + data[lang])
                          unordered[lang] = data[lang]
                        unordered[lang] = data[lang]
                          h.update((lang + u'=' + data[lang]).encode('utf8'))
                        h.update((lang + u'=' + data[lang]).encode('utf8'))
                      else:
                    else:
                          missing.append(lang)
                        missing.append(lang)
                          h.update((u'null-' + lang).encode('utf8'))
                        h.update((u'null-' + lang).encode('utf8'))
                  if self.defaultLang in data:
                if self.defaultLang in data:
                      ordered.insert(0, u'#default=' + data[self.defaultLang])
                    ordered.insert(0, u'#default=' + data[self.defaultLang])
                  if len(missing):
                if len(missing):
                      subpage = subpage.replace(u'%missing%', <nowiki>u"Languages missing: "</nowiki> + u', '.join(missing))
                    subpage = subpage.replace(u'%missing%', <nowiki>u"Languages missing: "</nowiki> + u', '.join(missing))
                  else:
                else:
                      subpage = subpage.replace(u'%missing%', <nowiki>u"Supported languages: all"</nowiki>)
                    subpage = subpage.replace(u'%missing%', <nowiki>u"Supported languages: all"</nowiki>)
              else: # Not a translation
            else: # Not a translation
                  h.update('Any-')
                h.update('Any-')
                  subkeys = data.keys()
                subkeys = data.keys()
                  subkeys.sort()
                subkeys.sort()
                  for k in subkeys:
                for k in subkeys:
                      ordered.append(k + u'=' + data[k])
                    ordered.append(k + u'=' + data[k])
                      unordered[k] = data[k]
                    unordered[k] = data[k]
                      h.update((k + u'=' + data[k]).encode('utf8'))
                    h.update((k + u'=' + data[k]).encode('utf8'))
              if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])):
            if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])):
                  allKey = []
                allKey = []
                  keys = unordered.keys()
                keys = unordered.keys()
                  keys.sort()
                keys.sort()
                  for k in keys:
                for k in keys:
                      allKey.append(k + u'=' + unordered[k])
                    allKey.append(k + u'=' + unordered[k])
                  for p in self.dictionaries[currentDict]['allTemplate']['params']:
                for p in self.dictionaries[currentDict]['allTemplate']['params']:
                      allKey.append(u(p) + u'=' + u(self.dictionaries[currentDict]['allTemplate']['params'][p]))
                    allKey.append(u(p) + u'=' + u(self.dictionaries[currentDict]['allTemplate']['params'][p]))
                  insertIndex = 0
                insertIndex = 0
                  if isTranslation and self.defaultLang in data:
                if isTranslation and self.defaultLang in data:
                      insertIndex = 1
                    insertIndex = 1
                  ordered.insert(insertIndex, u(self.allKeyName) + u'={{' + u(self.dictionaries[currentDict]['allTemplate']['template']) + u'|' + u'|'.join(allKey) + u'}}')
                ordered.insert(insertIndex, u(self.allKeyName) + u'={{' + u(self.dictionaries[currentDict]['allTemplate']['template']) + u'|' + u'|'.join(allKey) + u'}}')
              subpage = subpage.replace(u'%options%', u'|'.join(ordered))
            subpage = subpage.replace(u'%options%', u'|'.join(ordered))
          else: # No subkeys
        else: # No subkeys
              data = u(data)
            data = u(data)
              subpage = self.subpageTemplateID
            subpage = self.subpageTemplateID
              h.update(u(u'ID-' + data).encode('utf8'))
            h.update(u(u'ID-' + data).encode('utf8'))
              subpage = subpage.replace(u'%string%', data)
            subpage = subpage.replace(u'%string%', data)
          h = u(h.hexdigest())
        h = u(h.hexdigest())
          if keyName in syncData and syncData[keyName] == h:
        if keyName in syncData and syncData[keyName] == h:
              return # Same hash
            return # Same hash
          syncData[keyName] = h # Update sync data
        syncData[keyName] = h # Update sync data
          subpage = subpage.replace(u'%dictionary%', currentDict)
        subpage = subpage.replace(u'%dictionary%', currentDict)
          subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
        subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
          subpage = subpage.replace(u'%keyname%', keyName)
        subpage = subpage.replace(u'%keyname%', keyName)
          self.scheduler.schedule(editPage, currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False)
        self.scheduler.schedule(editPage, currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False)
      def processComment(self, commentString, currentDict, definedStrings, syncData):
    def processComment(self, commentString, currentDict, definedStrings, syncData):
          commentContents = []
        commentContents = []
          for extractedStr in self.stringsExtract.finditer(commentString):
        for extractedStr in self.stringsExtract.finditer(commentString):
              comment = u''
            comment = u''
              if extractedStr.group(1):
            if extractedStr.group(1):
                  comment = u'# ' + u(extractedStr.group(1)) + u'\n'
                comment = u'# ' + u(extractedStr.group(1)) + u'\n'
              dataString = u(extractedStr.group(3))
            dataString = u(extractedStr.group(3))
              if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
            if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
                  data = dataString.strip()
                data = dataString.strip()
                  dataWriteback = u' ' + data
                dataWriteback = u' ' + data
              else: # There's subkeys; detect whether this is a translation or not
            else: # There's subkeys; detect whether this is a translation or not
                  data = {}
                data = {}
                  isTranslation = True
                isTranslation = True
                  for translation in self.translationExtract.finditer(dataString.rstrip()):
                for translation in self.translationExtract.finditer(dataString.rstrip()):
                      data[u(translation.group(1))] = u(translation.group(2))
                    data[u(translation.group(1))] = u(translation.group(2))
                      if u(translation.group(1)) not in self.languages:
                    if u(translation.group(1)) not in self.languages:
                          isTranslation = False
                        isTranslation = False
                  ordered = []
                ordered = []
                  if isTranslation:
                if isTranslation:
                      for lang in self.languages:
                    for lang in self.languages:
                          if lang in data:
                        if lang in data:
                              ordered.append(u'  ' + lang + u': ' + data[lang])
                            ordered.append(u'  ' + lang + u': ' + data[lang])
                  else: # Not a translation, so order in alphabetical order
                else: # Not a translation, so order in alphabetical order
                      subkeys = data.keys()
                    subkeys = data.keys()
                      subkeys.sort()
                    subkeys.sort()
                      for subk in subkeys:
                    for subk in subkeys:
                          ordered.append(u'  ' + subk + u': ' + data[subk])
                        ordered.append(u'  ' + subk + u': ' + data[subk])
                  dataWriteback = u'\n' + u'\n'.join(ordered)
                dataWriteback = u'\n' + u'\n'.join(ordered)
              keyNames = u(extractedStr.group(2)).lower().split(u'|')
            keyNames = u(extractedStr.group(2)).lower().split(u'|')
              validKeyNames = []
            validKeyNames = []
              for keyName in keyNames:
            for keyName in keyNames:
                  keyName = keyName.replace(u'_', u' ').strip()
                keyName = keyName.replace(u'_', u' ').strip()
                  if keyName in definedStrings:
                if keyName in definedStrings:
                      continue # Duplicate key
                    continue # Duplicate key
                  definedStrings.append(keyName)
                definedStrings.append(keyName)
                  validKeyNames.append(keyName)
                validKeyNames.append(keyName)
                  self.generateSubpage(keyName, data, currentDict, syncData)
                self.generateSubpage(keyName, data, currentDict, syncData)
              if len(validKeyNames):
            if len(validKeyNames):
                  commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
                commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
          self.scheduler.execute()
        self.scheduler.execute()
          return u'\n\n'.join(commentContents)
        return u'\n\n'.join(commentContents)
      def __call__(self, content, **kwargs):
    def __call__(self, content, **kwargs):
          if 'article' not in kwargs:
        if 'article' not in kwargs:
              return content
            return content
          if u(kwargs['article'].title) not in self.dictionaries:
        if u(kwargs['article'].title) not in self.dictionaries:
              return content
            return content
          currentDict = u(kwargs['article'].title)
        currentDict = u(kwargs['article'].title)
          syncPage = page(self.dictionaries[currentDict]['sync'])
        syncPage = page(self.dictionaries[currentDict]['sync'])
          try:
        try:
              syncDataText = u(syncPage.getWikiText()).split(u'\n')
            syncDataText = u(syncPage.getWikiText()).split(u'\n')
          except: # Page probably doesn't exist
        except: # Page probably doesn't exist
              syncDataText = u''
            syncDataText = u''
          syncData = {}
        syncData = {}
          for sync in syncDataText:
        for sync in syncDataText:
              sync = u(sync.strip())
            sync = u(sync.strip())
              if not sync:
            if not sync:
                  continue
                continue
              sync = sync.split(u':', 2)
            sync = sync.split(u':', 2)
              if len(sync) == 2:
            if len(sync) == 2:
                  syncData[sync[0]] = sync[1]
                syncData[sync[0]] = sync[1]
          oldSyncData = syncData.copy()
        oldSyncData = syncData.copy()
          newContent = u''
        newContent = u''
          previousIndex = 0
        previousIndex = 0
          definedStrings = []
        definedStrings = []
          for comment in self.commentsExtract.finditer(content):
        for comment in self.commentsExtract.finditer(content):
              newContent += content[previousIndex:comment.start()]
            newContent += content[previousIndex:comment.start()]
              previousIndex = comment.end()
            previousIndex = comment.end()
              # Process current comment
            # Process current comment
              newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->'
            newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->'
          newContent += content[previousIndex:]
        newContent += content[previousIndex:]
          # Check if we need to update sync data
        # Check if we need to update sync data
          needUpdate = False
        needUpdate = False
          for k in syncData:
        for k in syncData:
              if k not in oldSyncData or oldSyncData[k] != syncData[k]:
            if k not in oldSyncData or oldSyncData[k] != syncData[k]:
                  needUpdate = True
                needUpdate = True
                  break
                break
          # Check for deleted strings
        # Check for deleted strings
          for k in oldSyncData:
        for k in oldSyncData:
              if k not in definedStrings:
            if k not in definedStrings:
                  try:
                try:
                      deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].')
                    deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].')
                  except:
                except:
                      pass
                    pass
                  if k in syncData:
                if k in syncData:
                      del syncData[k]
                    del syncData[k]
                  needUpdate = True
                needUpdate = True
          if needUpdate:
        if needUpdate:
              # Build syncdata string representation
            # Build syncdata string representation
              syncKeys = syncData.keys()
            syncKeys = syncData.keys()
              syncKeys.sort()
            syncKeys.sort()
              syncLines = []
            syncLines = []
              for k in syncKeys:
            for k in syncKeys:
                  syncLines.append(k + u':' + syncData[k])
                syncLines.append(k + u':' + syncData[k])
              editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False)
            editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False)
          return newContent
        return newContent
      def scheduledRun(self):
    def scheduledRun(self):
          for d in self.dictionaries:
        for d in self.dictionaries:
              fixPage(d)
            fixPage(d)
  dictUpdater = DictionaryUpdater()
dictUpdater = DictionaryUpdater()
  addFilter(dictUpdater)
addFilter(dictUpdater)
  scheduleTask(dictUpdater.scheduledRun, 3)
scheduleTask(dictUpdater.scheduledRun, 3)

Revision as of 16:27, 8 January 2012

How to disable a filter

If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. To make the bot ignore a certain line, add a "#" in front of it:

# This line will be ignored

If there are multiple lines, wrap them inside triple-quotes (you still need to put the two spaces at the beginning of the line):

"""This line will be ignored
and this one as well
and this one is cake
and the previous one was a lie but it was still ignored"""

If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. Or, if the problem really is elsewhere, block the bot.

Page filters

addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')

Semantic filters

None yet~

Language-specific filters

None yet~

Link filters

Wikipedia links filter

def wikipediaLinks(link, **kwargs):
    wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
    if link.getType() == u'external':
        linkInfo = wikipediaRegex.search(link.getLink())
        if linkInfo:
            link.setType(u'internal')
            try:
                wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
            except:
                wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
            if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
                link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
            else:
                link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
            if link.getLabel() is None:
                link.setLabel(u'(Wikipedia)')
    return link
addLinkFilter(wikipediaLinks)

Remove trailing slashes from internal links

def removeTrailingSlash(l, **kwargs):
    if l.getType() != u'internal':
        return l
    if l.getLink()[-1] == '/':
        l.setLink(l.getLink()[:-1])
    return l
addLinkFilter(removeTrailingSlash)

Template filters

Template renaming

def templateRenameMapping(t, **kwargs):
    templateMap = {
        # Format goes like this (without the "#" in front obviously):
        #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
        # Last line has no comma at the end
        'Crush': ['pngcrush']
    }
    for n in templateMap:
        if t.getName().lower() in templateMap[n]:
            t.setName(n)
    return t
addTemplateFilter(templateRenameMapping)

Remove useless templates

def removeUselessTemplate(t, **kwargs):
    if t.getName().lower() in (u'targeted', u'languages'):
        return None # Delete template
    return t
addTemplateFilter(removeUselessTemplate)

Filter parameters of certain templates

def templateParamFilter(t, **kwargs):
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
        'patch layout': ['before', 'after', 'current'],
        'item infobox': ['released']
    }
    if t.getName().lower() not in params:
        return t
    for p in params[t.getName().lower()]:
        if t.getParam(p):
            t.setParam(p, fixContent(t.getParam(p), **kwargs))
    return t
addTemplateFilter(templateParamFilter)

Remove obsolete parameters

def obsoleteParameterFilter(t, **kwargs):
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
    }
    if t.getName().lower() not in params:
        return t
    for p in params[t.getName().lower()]:
        p = u(p)
        if p.find(u'#n') != -1:
            for i in range(10):
                t.delParam(p.replace(u'#n', str(i)))
        else:
            t.delParam(p)
    return t
addTemplateFilter(obsoleteParameterFilter)

Implement {{Dictionary}}

class DictionaryUpdater:
    def __init__(self):
        self.subpageTemplateLang = """{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""
        self.subpageTemplateParam = """{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""
        self.invalidParamError = """<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Error''': Invalid parameter passed.</div>"""
        self.subpageTemplateID = """%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""
        self.dictionaries = {
            u'Template:Dictionary/items': { # Dictionary page
                'name': 'items', # Dictionary name (used for categorizing)
                'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
            },
            u'Template:Dictionary/common strings': { # Warning: no underscore
                'name': 'common strings',
                'sync': 'Template:Dictionary/common strings/Special:SyncData'
            },
            u'Template:Dictionary/price': {
                'name': 'price',
                'sync': 'Template:Dictionary/price/Special:SyncData',
                'allTemplate': {
                    'template': 'item price/fmt',
                    'params': {
                        'tt': '{{{tt|yes}}}'
                    }
                }
            },
            u'Template:Dictionary/mechanics': {
                'name': 'mechanics',
                'sync': 'Template:Dictionary/mechanics/Special:SyncData'
            },
            u'Template:Dictionary/characters': {
                'name': 'characters',
                'sync': 'Template:Dictionary/characters/Special:SyncData'
            },
            u'Template:Dictionary/demonstration': {
                'name': 'demonstration',
                'sync': 'Template:Dictionary/demonstration/Special:SyncData'
            },
            u'Template:Dictionary/transcripts': {
                'name': 'transcripts',
                'sync': 'Template:Dictionary/transcripts/Special:SyncData'
            },
            u'Template:Dictionary/portal achievements': {
                'name': 'portal achievements',
                'sync': 'Template:Dictionary/portal achievements/Special:SyncData'
            },
            u'Template:Dictionary/portal: still alive achievements': {
                'name': 'portal: still alive achievements',
                'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData'
            },
            u'Template:Dictionary/portal 2 achievements': {
                'name': 'portal 2 achievements',
                'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData'
            },
            u'Template:Dictionary/audio': {
                'name': 'audio',
                'sync': 'Template:Dictionary/audio/Special:SyncData'
            }
        }
        self.subpageSeparator = u'/'
        # List of supported languages, in prefered order
        self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']
        self.defaultLang = u'en'
        self.allKeyName = u'_all_'
        self.filterName = u'Your friendly neighborhood dictionary updater'
        self.commentsExtract = compileRegex(r)
        self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
        self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
        self.scheduler = BatchScheduler(16)
        addWhitelistPage(self.dictionaries.keys())
    def generateSubpage(self, keyName, data, currentDict, syncData):
        h = hashlib.md5()
        if type(data) is type({}): # Subkeys (translations or not)
            isTranslation = True
            subpage = u(self.subpageTemplateLang)
            for k in data:
                if k not in self.languages:
                    isTranslation = False
                    subpage = u(self.subpageTemplateParam)
                    break
            ordered = []
            unordered = {}
            if isTranslation:
                missing = []
                for lang in self.languages:
                    if lang in data:
                        ordered.append(lang + u'=' + data[lang])
                        unordered[lang] = data[lang]
                        h.update((lang + u'=' + data[lang]).encode('utf8'))
                    else:
                        missing.append(lang)
                        h.update((u'null-' + lang).encode('utf8'))
                if self.defaultLang in data:
                    ordered.insert(0, u'#default=' + data[self.defaultLang])
                if len(missing):
                    subpage = subpage.replace(u'%missing%', u"Languages missing: " + u', '.join(missing))
                else:
                    subpage = subpage.replace(u'%missing%', u"Supported languages: all")
            else: # Not a translation
                h.update('Any-')
                subkeys = data.keys()
                subkeys.sort()
                for k in subkeys:
                    ordered.append(k + u'=' + data[k])
                    unordered[k] = data[k]
                    h.update((k + u'=' + data[k]).encode('utf8'))
            if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])):
                allKey = []
                keys = unordered.keys()
                keys.sort()
                for k in keys:
                    allKey.append(k + u'=' + unordered[k])
                for p in self.dictionaries[currentDict]['allTemplate']['params']:
                    allKey.append(u(p) + u'=' + u(self.dictionaries[currentDict]['allTemplate']['params'][p]))
                insertIndex = 0
                if isTranslation and self.defaultLang in data:
                    insertIndex = 1
                ordered.insert(insertIndex, u(self.allKeyName) + u'={{' + u(self.dictionaries[currentDict]['allTemplate']['template']) + u'|' + u'|'.join(allKey) + u'}}')
            subpage = subpage.replace(u'%options%', u'|'.join(ordered))
        else: # No subkeys
            data = u(data)
            subpage = self.subpageTemplateID
            h.update(u(u'ID-' + data).encode('utf8'))
            subpage = subpage.replace(u'%string%', data)
        h = u(h.hexdigest())
        if keyName in syncData and syncData[keyName] == h:
            return # Same hash
        syncData[keyName] = h # Update sync data
        subpage = subpage.replace(u'%dictionary%', currentDict)
        subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
        subpage = subpage.replace(u'%keyname%', keyName)
        self.scheduler.schedule(editPage, currentDict + self.subpageSeparator + keyName, subpage, summary=u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".', minor=True, nocreate=False)
    def processComment(self, commentString, currentDict, definedStrings, syncData):
        commentContents = []
        for extractedStr in self.stringsExtract.finditer(commentString):
            comment = u
            if extractedStr.group(1):
                comment = u'# ' + u(extractedStr.group(1)) + u'\n'
            dataString = u(extractedStr.group(3))
            if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
                data = dataString.strip()
                dataWriteback = u' ' + data
            else: # There's subkeys; detect whether this is a translation or not
                data = {}
                isTranslation = True
                for translation in self.translationExtract.finditer(dataString.rstrip()):
                    data[u(translation.group(1))] = u(translation.group(2))
                    if u(translation.group(1)) not in self.languages:
                        isTranslation = False
                ordered = []
                if isTranslation:
                    for lang in self.languages:
                        if lang in data:
                            ordered.append(u'  ' + lang + u': ' + data[lang])
                else: # Not a translation, so order in alphabetical order
                    subkeys = data.keys()
                    subkeys.sort()
                    for subk in subkeys:
                        ordered.append(u'  ' + subk + u': ' + data[subk])
                dataWriteback = u'\n' + u'\n'.join(ordered)
            keyNames = u(extractedStr.group(2)).lower().split(u'|')
            validKeyNames = []
            for keyName in keyNames:
                keyName = keyName.replace(u'_', u' ').strip()
                if keyName in definedStrings:
                    continue # Duplicate key
                definedStrings.append(keyName)
                validKeyNames.append(keyName)
                self.generateSubpage(keyName, data, currentDict, syncData)
            if len(validKeyNames):
                commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
        self.scheduler.execute()
        return u'\n\n'.join(commentContents)
    def __call__(self, content, **kwargs):
        if 'article' not in kwargs:
            return content
        if u(kwargs['article'].title) not in self.dictionaries:
            return content
        currentDict = u(kwargs['article'].title)
        syncPage = page(self.dictionaries[currentDict]['sync'])
        try:
            syncDataText = u(syncPage.getWikiText()).split(u'\n')
        except: # Page probably doesn't exist
            syncDataText = u
        syncData = {}
        for sync in syncDataText:
            sync = u(sync.strip())
            if not sync:
                continue
            sync = sync.split(u':', 2)
            if len(sync) == 2:
                syncData[sync[0]] = sync[1]
        oldSyncData = syncData.copy()
        newContent = u
        previousIndex = 0
        definedStrings = []
        for comment in self.commentsExtract.finditer(content):
            newContent += content[previousIndex:comment.start()]
            previousIndex = comment.end()
            # Process current comment
            newContent += u
        newContent += content[previousIndex:]
        # Check if we need to update sync data
        needUpdate = False
        for k in syncData:
            if k not in oldSyncData or oldSyncData[k] != syncData[k]:
                needUpdate = True
                break
        # Check for deleted strings
        for k in oldSyncData:
            if k not in definedStrings:
                try:
                    deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from ' + currentDict + u'.')
                except:
                    pass
                if k in syncData:
                    del syncData[k]
                needUpdate = True
        if needUpdate:
            # Build syncdata string representation
            syncKeys = syncData.keys()
            syncKeys.sort()
            syncLines = []
            for k in syncKeys:
                syncLines.append(k + u':' + syncData[k])
            editPage(syncPage, u'\n'.join(syncLines), summary=u'Updated synchronization information for [[:' + currentDict + u']].', minor=True, nocreate=False)
        return newContent
    def scheduledRun(self):
        for d in self.dictionaries:
            fixPage(d)
dictUpdater = DictionaryUpdater()
addFilter(dictUpdater)
scheduleTask(dictUpdater.scheduledRun, 3)