User:WindBOT/Filters: Difference between revisions

From the Portal Wiki
Jump to navigation Jump to search
 
(31 intermediate revisions by 3 users not shown)
Line 4: Line 4:
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving.
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving.
To make the bot ignore a certain line, add a "#" in front of it:
To make the bot ignore a certain line, add a "#" in front of it:
  # This line will be ignored
# This line will be ignored
If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''):
If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''):
  """This line will be ignored
"""This line will be ignored
  and this one as well
and this one as well
  and this one is cake
and this one is cake
  and the previous one was a lie but it was still ignored"""
and the previous one was a lie but it was still ignored"""
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything.
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything.
Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot].
Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot].
Line 16: Line 16:


== Page filters ==
== Page filters ==
  addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')


== Semantic filters ==
== Semantic filters ==
None yet~
=== Capitalized words ===
enforceCapitalization('Aperture Science', 'P-body', 'GLaDOS')
enforceCapitalization('Portal Gun', 'Weighted Storage Cube')
enforceCapitalization('Steam', 'Nintendo Switch', 'PlayStation', 'Xbox')
 
=== Common misspellings ===
addSafeFilter(
    wordFilter(u'Doug Rattman', u'Doug Rattmann', u'Doug Ratmann', u'Doug Ratman'),
    wordFilter(u'Rattman', u'Rattmann', u'Rattman', u'Ratmann', u'Ratman'),
    wordFilter(u'Rat Man', u'Ratt Mann', u'Ratt Man', u'Rat Mann'),
    wordFilter('screenshot', 'screen shot'),
    wordFilter('screenshots', 'screen shots'),
    wordFilter('in-game', 'ingame')
)
=== Section headers ===
addSafeFilter(
    wordFilter(u'== Update history ==', u'==+ ?(?:Update history|Previous changes) ?==+'),
    wordFilter(u'== See also ==', u'==+ ?See also ?==+'),
    wordFilter(u'== External links ==', u'==+ ?External links ?==+'),
    wordFilter(u'== Unused content ==', u'==+ ?Unused content ?==+'),
    wordFilter(u'== Related achievements ==', u'==+ ?Related achievements ?==+'),
    wordFilter(u'=== Undocumented changes ===', u'==+ ?Undocumented changes ?==+')
)


== Language-specific filters ==
== Language-specific filters ==
Line 26: Line 49:
== Link filters ==
== Link filters ==
=== Wikipedia links filter ===
=== Wikipedia links filter ===
  def wikipediaLinks(link, **kwargs):
def wikipediaLinks(link, **kwargs):
      wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
    wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
      if link.getType() == u'external':
    if link.getType() == u'external':
          linkInfo = wikipediaRegex.search(link.getLink())
        linkInfo = wikipediaRegex.search(link.getLink())
          if linkInfo:
        if linkInfo:
              link.setType(u'internal')
            link.setType(u'internal')
              try:
            try:
                  wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
                wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
              except:
            except:
                  wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
                wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
              if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
            if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
                  link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
                link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
              else:
            else:
                  link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
                link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
              if link.getLabel() is None:
            if link.getLabel() is None:
                  link.setLabel(u'(Wikipedia)')
                link.setLabel(u'(Wikipedia)')
      return link
    return link
  addLinkFilter(wikipediaLinks)
addLinkFilter(wikipediaLinks)
 
=== HL Wiki to Combine Overwiki links filter ===
def hlwikiLinks(link, **kwargs):
    hlwikiRegex1 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/wiki/(\S+)$')
    hlwikiRegex2 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/w[-_/\w]+?/([^/\s]+)$')
    if link.getType() == 'external':
        linkInfo = hlwikiRegex1.search(link.getLink())
        isMedia = False
        if not linkInfo:
            linkInfo = hlwikiRegex2.search(link.getLink())
            isMedia = True
        if linkInfo:
            link.setType('internal')
            try:
                wikiPage = u(urllib2.unquote(str(linkInfo.group(1))).decode('utf8', 'ignore').replace(u'_', ' '))
            except:
                wikiPage = u(linkInfo.group(1)).replace(u'_', ' ')
            label = wikiPage
            if isMedia:
                if wikiPage[-4:].lower() == '.wav':
                    wikiPage = 'Media:' + wikiPage
                else:
                    wikiPage = ':File:' + wikiPage
            link.setLink('hl2:' + wikiPage)
            if link.getLabel() is None:
                link.setLabel(label)
    return link
addLinkFilter(hlwikiLinks)


=== Remove trailing slashes from internal links ===
=== Convert [[:Category:Patches|patch]] links to {{tl|Patch name}} ===
  def removeTrailingSlash(l, **kwargs):
def patchNameLinkFilter(l, **kwargs):
      if l.getType() != u'internal':
    if l.getType() != u'internal':
          return l
        return l
      if l.getLink()[-1] == '/':
    regPatchName = compileRegex(u'(January|February|March|April|May|June|July|August|September|October|November|December)\\s+(\\d+),\\s+(\\d{4,})\\s+Patch(?:/\\w+)?')
          l.setLink(l.getLink()[:-1])
    result = regPatchName.match(l.getLink())
      return l
    if result is None or l.getLabel().find(result.group(2)) == -1 or l.getLabel().find(result.group(3)) == -1:
  addLinkFilter(removeTrailingSlash)
        return l
    monthNames = ('january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december')
    patchType = u''
    if l.getLink().lower().find(u'portal') != -1:
        patchType = u'|portal'
    elif l.getLink().lower().find(u'tools') != -1:
        patchType = u'|tools'
    return template(u'<nowiki>{{Patch name|' + u(monthNames.index(result.group(1).lower()) + 1) + u'|' + u(result.group(2)) +  u'|' + u(result.group(3)) + patchType + u'}}</nowiki>')
addLinkFilter(patchNameLinkFilter)


== Template filters ==
== Template filters ==
=== Template renaming ===
=== Template renaming ===
  def templateRenameMapping(t, **kwargs):
def templateRenameMapping(t, **kwargs):
      templateMap = {
    templateMap = {
          # Format goes like this (without the "#" in front obviously):
        # Format goes like this (without the "#" in front obviously):
          #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
        #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
          # Last line has no comma at the end
        # Last line has no comma at the end
          'Crush': ['pngcrush']
        'Crush': ['pngcrush'],
      }
        'Chamber infobox': ['test chamber infobox']
      for n in templateMap:
    }
          if t.getName().lower() in templateMap[n]:
    for n in templateMap:
              t.setName(n)
        if t.getName().lower() in templateMap[n]:
      return t
            t.setName(n)
  addTemplateFilter(templateRenameMapping)
    return t
addTemplateFilter(templateRenameMapping)
 
=== Reindent all infoboxes ===
 
def infoboxIndentFilter(t, **kwargs):
    itemInfoboxes = ('game infobox', 'website infobox', 'item infobox', 'company infobox', 'chamber infobox', 'old aperture infobox', 'twtm chamber infobox', 'mel old aperture infobox')
    tName = t.getName().lower()
    if 'infobox' in tName and tName not in itemInfoboxes:
        t.indentationMatters(True)
        t.setDefaultIndentation(2)
    return t
addTemplateFilter(infoboxIndentFilter, lowPriority=True)


=== Remove useless templates ===
=== Remove useless templates ===
  def removeUselessTemplate(t, **kwargs):
def removeUselessTemplate(t, **kwargs):
      if t.getName().lower() in (u'targeted', u'languages'):
    if t.getName().lower() in (u'targeted', u'languages'):
          return None # Delete template
        return None # Delete template
      return t
    return t
  addTemplateFilter(removeUselessTemplate)
addTemplateFilter(removeUselessTemplate)


=== Filter parameters of certain templates ===
=== Filter parameters of certain templates ===
  def templateParamFilter(t, **kwargs):
def templateParamFilter(t, **kwargs):
      params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
          'patch layout': ['before', 'after', 'current'],
        'patch layout': ['before', 'after', 'current'],
          'item infobox': ['released']
        'item infobox': ['released']
      }
    }
      if t.getName().lower() not in params:
    if t.getName().lower() not in params:
          return t
        return t
      for p in params[t.getName().lower()]:
    for p in params[t.getName().lower()]:
          if t.getParam(p):
        if t.getParam(p):
              t.setParam(p, fixContent(t.getParam(p), **kwargs))
            t.setParam(p, fixContent(t.getParam(p), **kwargs))
      return t
    return t
  addTemplateFilter(templateParamFilter)
addTemplateFilter(templateParamFilter)


=== Remove obsolete parameters ===
=== Remove obsolete parameters ===
  def obsoleteParameterFilter(t, **kwargs):
def obsoleteParameterFilter(t, **kwargs):
      params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
      }
    }
      if t.getName().lower() not in params:
    if t.getName().lower() not in params:
          return t
        return t
      for p in params[t.getName().lower()]:
    for p in params[t.getName().lower()]:
          p = u(p)
        p = u(p)
          if p.find(u'#n') != -1:
        if p.find(u'#n') != -1:
              for i in range(10):
            for i in range(10):
                  t.delParam(p.replace(u'#n', str(i)))
                t.delParam(p.replace(u'#n', str(i)))
          else:
        else:
              t.delParam(p)
            t.delParam(p)
      return t
    return t
  addTemplateFilter(obsoleteParameterFilter)
addTemplateFilter(obsoleteParameterFilter)
 
=== Add <code>day</code>/<code>month</code>/<code>year</code> to {{tl|Patch layout}} ===
def patchLayoutFilter(t, **kwargs):
    if t.getName().lower() != 'patch layout' or 'article' not in kwargs:
        return t
    t.setPreferedOrder(['game', 'before', 'day', 'month', 'year', 'after', 'source-title', 'source', 'source-lang'] + [['source-' + str(n) + '-title', 'source-' + str(n), 'source-' + str(n) + '-lang'] for n in xrange(10)] + ['updatelink', 'update', 'update-link', 'update-lang', 'hide-diff'] + [['diff-' + str(n)] for n in xrange(10)] + ['notes'])
    t.delParam('current')
    regPatchName = compileRegex(u'^(January|February|March|April|May|June|July|August|September|October|November|December)\\s+(\\d+),\\s+(\\d{4,})\\s+Patch(?:/\\w+)?')
    result = regPatchName.match(u(kwargs['article'].title))
    if result is not None:
        t.setParam('day', result.group(2))
        t.setParam('month', result.group(1).lower())
        t.setParam('year', result.group(3))
    return t
addTemplateFilter(patchLayoutFilter)


=== Implement {{tl|Dictionary}} ===
=== Implement {{tl|Dictionary}} ===
  class DictionaryUpdater:
class DictionaryUpdater:
      def __init__(self):
    def __init__(self):
          self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
        self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}"""</nowiki>
          self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
        self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}"""</nowiki>
          self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Error''': Invalid parameter passed.</div>"""</nowiki>
        self.invalidParamError = <nowiki>"""<span class="error">Error: invalid param.</span>[[Category:ERROR]]"""</nowiki>
          self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
        self.subpageTemplateID = <nowiki>"""%string%"""</nowiki>
          self.dictionaries = {
        self.partialUpdateThreshold = 750 # Update SyncData every n edits
              u'Template:Dictionary/items': { # Dictionary page
        self.dictionaries = {
                  'name': 'items', # Dictionary name (used for categorizing)
            u'Template:Dictionary/items': { # Dictionary page
                  'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
                'name': 'items', # Dictionary name (used for categorizing)
              },
                'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
              u'Template:Dictionary/common strings': { # Warning: no underscore
            },
                  'name': 'common strings',
            u'Template:Dictionary/common strings': { # Warning: no underscore
                  'sync': 'Template:Dictionary/common strings/Special:SyncData'
                'name': 'common strings',
              },
                'sync': 'Template:Dictionary/common strings/Special:SyncData'
              u'Template:Dictionary/price': {
            },
                  'name': 'price',
            u'Template:Dictionary/price': {
                  'sync': 'Template:Dictionary/price/Special:SyncData'
                'name': 'price',
              },
                'sync': 'Template:Dictionary/price/Special:SyncData',
              u'Template:Dictionary/mechanics': {
                'allTemplate': <nowiki>'{{{{{template|item price/fmt}}}|%options%|tt={{{tt|yes}}}}}'</nowiki>
                  'name': 'mechanics',
            },
                  'sync': 'Template:Dictionary/mechanics/Special:SyncData'
            u'Template:Dictionary/mechanics': {
              },
                'name': 'mechanics',
              u'Template:Dictionary/characters': {
                'sync': 'Template:Dictionary/mechanics/Special:SyncData'
                  'name': 'characters',
            },
                  'sync': 'Template:Dictionary/characters/Special:SyncData'
            u'Template:Dictionary/merchandise': {
              },
                'name': 'merchandise',
              u'Template:Dictionary/demonstration': {
                'sync': 'Template:Dictionary/merchandise/Special:SyncData'
                  'name': 'demonstration',
            },
                  'sync': 'Template:Dictionary/demonstration/Special:SyncData'
            u'Template:Dictionary/characters': {
              },
                'name': 'characters',
              u'Template:Dictionary/portal achievements': {
                'sync': 'Template:Dictionary/characters/Special:SyncData'
                  'name': 'portal achievements',
            },
                  'sync': 'Template:Dictionary/portal achievements/Special:SyncData'
            u'Template:Dictionary/demonstration': {
              },
                'name': 'demonstration',
              u'Template:Dictionary/portal: still alive achievements': {
                'sync': 'Template:Dictionary/demonstration/Special:SyncData'
                  'name': 'portal: still alive achievements',
            },
                  'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData'
            u'Template:Dictionary/transcripts': {
              },
                'name': 'transcripts',
              u'Template:Dictionary/portal 2 achievements': {
                'sync': 'Template:Dictionary/transcripts/Special:SyncData'
                  'name': 'portal 2 achievements',
            },
                  'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData'
            u'Template:Dictionary/portal achievements': {
              },
                'name': 'portal achievements',
              u'Template:Dictionary/audio': {
                'sync': 'Template:Dictionary/portal achievements/Special:SyncData'
                  'name': 'audio',
            },
                  'sync': 'Template:Dictionary/audio/Special:SyncData'
            u'Template:Dictionary/portal: still alive achievements': {
              }
                'name': 'portal: still alive achievements',
          }
                'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData'
          self.subpageSeparator = u'/'
            },
          # List of supported languages, in prefered order
            u'Template:Dictionary/portal 2 achievements': {
          self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']
                'name': 'portal 2 achievements',
          self.defaultLang = u'en'
                'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData'
          self.filterName = u'Your friendly neighborhood dictionary updater'
            },
          self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->')
            u'Template:Dictionary/rexaura achievements': {
          self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
                'name': 'rexaura achievements',
          self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
                'sync': 'Template:Dictionary/rexaura achievements/Special:SyncData'
          addWhitelistPage(self.dictionaries.keys())
            },
      def generateSubpage(self, keyName, data, currentDict, syncData):
            u'Template:Dictionary/portal stories: mel achievements': {
          h = hashlib.md5()
                'name': 'portal stories: mel achievements',
          if type(data) is type({}): # Subkeys (translations or not)
                'sync': 'Template:Dictionary/portal stories: mel achievements/Special:SyncData'
              isTranslation = True
            },
              subpage = u(self.subpageTemplateLang)
            u'Template:Dictionary/portal pinball achievements': {
              for k in data:
                'name': 'portal pinball achievements',
                  if k not in self.languages:
                'sync': 'Template:Dictionary/portal pinball achievements/Special:SyncData'
                      isTranslation = False
            },
                      subpage = u(self.subpageTemplateParam)
            u'Template:Dictionary/bridge constructor portal achievements': {
                      break
                'name': 'bridge constructor portal achievements',
              ordered = []
                'sync': 'Template:Dictionary/bridge constructor portal achievements/Special:SyncData'
              if isTranslation:
            },
                  missing = []
            u'Template:Dictionary/audio': {
                  for lang in self.languages:
                'name': 'audio',
                      if lang in data:
                'sync': 'Template:Dictionary/audio/Special:SyncData'
                          ordered.append(lang + u'=' + data[lang])
            },
                          h.update((lang + u'=' + data[lang]).encode('utf8'))
            u'Template:Dictionary/voice lines/Adventure core': {
                      else:
                'name': 'voice lines/Adventure core',
                          missing.append(lang)
                'sync': 'Template:Dictionary/voice lines/Adventure core/Special:SyncData'
                          h.update((u'null-' + lang).encode('utf8'))
            },
                  if self.defaultLang in data:
            u'Template:Dictionary/voice lines/Announcer': {
                      ordered.append(u'#default=' + data[self.defaultLang])
                'name': 'voice lines/Announcer',
                  if len(missing):
                'sync': 'Template:Dictionary/voice lines/Announcer/Special:SyncData'
                      subpage = subpage.replace(u'%missing%', <nowiki>u"'''Languages missing''': "</nowiki> + u', '.join(missing))
            },
                  else:
            u'Template:Dictionary/voice lines/Ap-Sap': {
                      subpage = subpage.replace(u'%missing%', <nowiki>u"'''Supported languages''': All"</nowiki>)
                'name': 'voice lines/Ap-Sap',
              else: # Not a translation
                'sync': 'Template:Dictionary/voice lines/Ap-Sap/Special:SyncData'
                  h.update('Any-')
            },
                  subkeys = data.keys()
            u'Template:Dictionary/voice lines/Cave Johnson': {
                  subkeys.sort()
                'name': 'voice lines/Cave Johnson',
                  for k in subkeys:
                'sync': 'Template:Dictionary/voice lines/Cave Johnson/Special:SyncData'
                      ordered.append(k + u'=' + data[k])
            },
                      h.update((k + u'=' + data[k]).encode('utf8'))
            u'Template:Dictionary/voice lines/Defective Turret': {
                  #ordered.append(u'#default=' + u(self.invalidParamError))
                'name': 'voice lines/Defective Turret',
              subpage = subpage.replace(u'%options%', u'|'.join(ordered))
                'sync': 'Template:Dictionary/voice lines/Defective Turret/Special:SyncData'
          else: # No subkeys
            },
              data = u(data)
            u'Template:Dictionary/voice lines/Fact core': {
              subpage = self.subpageTemplateID
                'name': 'voice lines/Fact core',
              h.update(u(u'ID-' + data).encode('utf8'))
                'sync': 'Template:Dictionary/voice lines/Fact core/Special:SyncData'
              subpage = subpage.replace(u'%string%', data)
            },
          h = u(h.hexdigest())
            u'Template:Dictionary/voice lines/Floor Turret': {
          if keyName in syncData and syncData[keyName] == h:
                'name': 'voice lines/Floor Turret',
              return # Same hash
                'sync': 'Template:Dictionary/voice lines/Floor Turret/Special:SyncData'
          syncData[keyName] = h # Update sync data
            },
          subpage = subpage.replace(u'%dictionary%', currentDict)
            u'Template:Dictionary/voice lines/GLaDOS': {
          subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
                'name': 'voice lines/GLaDOS',
          subpage = subpage.replace(u'%keyname%', keyName)
                'sync': 'Template:Dictionary/voice lines/GLaDOS/Special:SyncData'
          editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False)
            },
      def processComment(self, commentString, currentDict, definedStrings, syncData):
            u'Template:Dictionary/voice lines/Space core': {
          commentContents = []
                'name': 'voice lines/Space core',
          for extractedStr in self.stringsExtract.finditer(commentString):
                'sync': 'Template:Dictionary/voice lines/Space core/Special:SyncData'
              comment = u''
            },
              if extractedStr.group(1):
            u'Template:Dictionary/voice lines/Wheatley': {
                  comment = u'# ' + u(extractedStr.group(1)) + u'\n'
                'name': 'voice lines/Wheatley',
              dataString = u(extractedStr.group(3))
                'sync': 'Template:Dictionary/voice lines/Wheatley/Special:SyncData'
              if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
            },
                  data = dataString.strip()
            u'Template:Dictionary/voice lines/Grady': {
                  dataWriteback = u' ' + data
                'name': 'voice lines/Grady',
              else: # There's subkeys; detect whether this is a translation or not
                'sync': 'Template:Dictionary/voice lines/Grady/Special:SyncData'
                  data = {}
            },
                  isTranslation = True
            u'Template:Dictionary/voice lines/Desk Job': {
                  for translation in self.translationExtract.finditer(dataString.rstrip()):
                'name': 'voice lines/Desk Job',
                      data[u(translation.group(1))] = u(translation.group(2))
                'sync': 'Template:Dictionary/voice lines/Desk Job/Special:SyncData'
                      if u(translation.group(1)) not in self.languages:
            },
                          isTranslation = False
            u'Template:Dictionary/defindex': {
                  ordered = []
                'name': 'defindex',
                  if isTranslation:
                'sync': 'Template:Dictionary/defindex/Special:SyncData'
                      for lang in self.languages:
            }
                          if lang in data:
        }
                              ordered.append(u' ' + lang + u': ' + data[lang])
        self.subpageSeparator = u'/'
                  else: # Not a translation, so order in alphabetical order
        # List of supported languages, in prefered order
                      subkeys = data.keys()
        self.languages = [u'en', u'ar', u'cs', u'cz', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ka', u'ko', u'nl', u'no', u'pl', u'po', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'sw', u'th', u'tr', u'tu', u'zh-hans', u'zh-hant']
                      subkeys.sort()
        self.defaultLang = u'en'
                      for subk in subkeys:
        self.allKeyName = u'_all_'
                          ordered.append(u'  ' + subk + u': ' + data[subk])
        self.filterName = u'Your friendly neighborhood dictionary updater'
                  dataWriteback = u'\n' + u'\n'.join(ordered)
        self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->')
              keyNames = u(extractedStr.group(2)).lower().split(u'|')
        self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
              validKeyNames = []
        self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
              for keyName in keyNames:
        addWhitelistPage(self.dictionaries.keys())
                  keyName = keyName.replace(u'_', u' ').strip()
        self.editCounts = {}
                  if keyName in definedStrings:
    def updateSyncData(self, currentDict, syncData, note=''):
                      continue # Duplicate key
        # Build syncdata string representation
                  definedStrings.append(keyName)
        syncKeys = syncData.keys()
                  validKeyNames.append(keyName)
        syncKeys.sort()
                  self.generateSubpage(keyName, data, currentDict, syncData)
        syncLines = []
              if len(validKeyNames):
        for k in syncKeys:
                  commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
            syncLines.append(k + u':' + syncData[k])
          return u'\n\n'.join(commentContents)
        if note:
      def __call__(self, content, **kwargs):
            note = u' (' + u(note) + u')'
          if 'article' not in kwargs:
        editPage(self.dictionaries[currentDict]['sync'], u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']]' + note + u'.'</nowiki>, minor=True, nocreate=False)
              return content
    def generateSubpage(self, keyName, data, currentDict, syncData):
          if u(kwargs['article'].title) not in self.dictionaries:
        h = hashlib.md5()
              return content
        if type(data) is type({}): # Subkeys (translations or not)
          currentDict = u(kwargs['article'].title)
            isTranslation = True
          syncPage = page(self.dictionaries[currentDict]['sync'])
            subpage = u(self.subpageTemplateLang)
          try:
            for k in data:
              syncDataText = u(syncPage.getWikiText()).split(u'\n')
                if 'blankString' in self.dictionaries[currentDict] and data[k] == self.dictionaries[currentDict]['blankString']:
          except: # Page probably doesn't exist
                    data[k] = u''
              syncDataText = u''
                if isTranslation and k not in self.languages:
          syncData = {}
                    isTranslation = False
          for sync in syncDataText:
                    subpage = u(self.subpageTemplateParam)
              sync = u(sync.strip())
            ordered = []
              if not sync:
            unordered = {}
                  continue
            if isTranslation:
              sync = sync.split(u':', 2)
                missing = []
              if len(sync) == 2:
                for lang in self.languages:
                  syncData[sync[0]] = sync[1]
                    if lang in data:
          oldSyncData = syncData.copy()
                        ordered.append(lang + u'=' + data[lang])
          newContent = u''
                        unordered[lang] = data[lang]
          previousIndex = 0
                        h.update((lang + u'=' + data[lang]).encode('utf8'))
          definedStrings = []
                    else:
          for comment in self.commentsExtract.finditer(content):
                        missing.append(lang)
              newContent += content[previousIndex:comment.start()]
                        h.update((u'null-' + lang).encode('utf8'))
              previousIndex = comment.end()
                if self.defaultLang in data:
              # Process current comment
                    ordered.insert(0, u'#default=' + data[self.defaultLang])
              newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->'
                if len(missing):
          newContent += content[previousIndex:]
                    subpage = subpage.replace(u'%missing%', <nowiki>u"Languages missing: "</nowiki> + u', '.join(missing))
          # Check if we need to update sync data
                else:
          needUpdate = False
                    subpage = subpage.replace(u'%missing%', <nowiki>u"Supported languages: all"</nowiki>)
          for k in syncData:
            else: # Not a translation
              if k not in oldSyncData or oldSyncData[k] != syncData[k]:
                h.update('Any-')
                  needUpdate = True
                subkeys = data.keys()
                  break
                subkeys.sort()
          # Check for deleted strings
                for k in subkeys:
          for k in oldSyncData:
                    ordered.append(k + u'=' + data[k])
              if k not in definedStrings:
                    unordered[k] = data[k]
                  try:
                    h.update((k + u'=' + data[k]).encode('utf8'))
                      deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].')
            if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])):
                  except:
                allKey = []
                      pass
                keys = unordered.keys()
                  if k in syncData:
                keys.sort()
                      del syncData[k]
                for k in keys:
                  needUpdate = True
                    allKey.append(k + u'=' + unordered[k])
          if needUpdate:
                insertIndex = 0
              # Build syncdata string representation
                if isTranslation and self.defaultLang in data:
              syncKeys = syncData.keys()
                    insertIndex = 1
              syncKeys.sort()
                ordered.insert(insertIndex, u(self.allKeyName) + u'=' + u(self.dictionaries[currentDict]['allTemplate'].replace(u'%options%', u'|'.join(allKey))))
              syncLines = []
            subpage = subpage.replace(u'%options%', u'|'.join(ordered))
              for k in syncKeys:
        else: # No subkeys
                  syncLines.append(k + u':' + syncData[k])
            data = u(data)
              editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False)
            subpage = self.subpageTemplateID
          return newContent
            h.update(u(u'ID-' + data).encode('utf8'))
      def scheduledRun(self):
            subpage = subpage.replace(u'%string%', data)
          for d in self.dictionaries:
        h = u(h.hexdigest())
              fixPage(d)
        if keyName in syncData and syncData[keyName] == h:
  dictUpdater = DictionaryUpdater()
            return # Same hash
  addFilter(dictUpdater)
        subpage = subpage.replace(u'%dictionary%', currentDict)
  scheduleTask(dictUpdater.scheduledRun, 3)
        subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
        subpage = subpage.replace(u'%keyname%', keyName)
        if editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False):
            syncData[keyName] = h # Update sync data
            if currentDict not in self.editCounts:
                self.editCounts[currentDict] = 0
            self.editCounts[currentDict] += 1
            if self.editCounts[currentDict] > self.partialUpdateThreshold:
                self.editCounts[currentDict] = 0
                self.updateSyncData(currentDict, syncData, 'Partial update')
    def processComment(self, commentString, currentDict, definedStrings, syncData):
        commentContents = []
        for extractedStr in self.stringsExtract.finditer(commentString):
            comment = u''
            if extractedStr.group(1):
                comment = u'# ' + u(extractedStr.group(1)) + u'\n'
            dataString = u(extractedStr.group(3))
            if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
                data = dataString.strip()
                dataWriteback = u' ' + data
            else: # There's subkeys; detect whether this is a translation or not
                data = {}
                isTranslation = True
                for translation in self.translationExtract.finditer(dataString.rstrip()):
                    data[u(translation.group(1))] = u(translation.group(2))
                    if u(translation.group(1)) not in self.languages:
                        isTranslation = False
                ordered = []
                if isTranslation:
                    for lang in self.languages:
                        if lang in data:
                            ordered.append(u'  ' + lang + u': ' + data[lang])
                else: # Not a translation, so order in alphabetical order
                    subkeys = data.keys()
                    subkeys.sort()
                    for subk in subkeys:
                        ordered.append(u'  ' + subk + u': ' + data[subk])
                dataWriteback = u'\n' + u'\n'.join(ordered)
            keyNames = u(extractedStr.group(2)).lower().split(u'|')
            validKeyNames = []
            for keyName in keyNames:
                keyName = keyName.replace(u'_', u' ').replace(u'#', u'').strip()
                if keyName in definedStrings:
                    continue # Duplicate key
                definedStrings.append(keyName)
                validKeyNames.append(keyName)
                self.generateSubpage(keyName, data, currentDict, syncData)
            if len(validKeyNames):
                commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
        return u'\n\n'.join(commentContents)
    def __call__(self, content, **kwargs):
        if 'article' not in kwargs:
            return content
        if u(kwargs['article'].title) not in self.dictionaries:
            return content
        currentDict = u(kwargs['article'].title)
        if random.randint(0, 50) == 0: # With probability 2%, ignore syncdata completely. Helps with stale syncdata and people overwriting things.
            syncDataText = u''
        else:
            try:
                syncDataText = u(page(self.dictionaries[currentDict]['sync']).getWikiText()).split(u'\n')
            except: # Page probably doesn't exist
                syncDataText = u''
        syncData = {}
        for sync in syncDataText:
            sync = u(sync.strip())
            if not sync:
                continue
            sync = sync.split(u':', 2)
            if len(sync) == 2:
                syncData[sync[0]] = sync[1]
        oldSyncData = syncData.copy()
        newContent = u''
        previousIndex = 0
        definedStrings = []
        for comment in self.commentsExtract.finditer(content):
            newContent += content[previousIndex:comment.start()]
            previousIndex = comment.end()
            # Process current comment
            newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->'
        newContent += content[previousIndex:]
        # Check for deleted strings
        for k in oldSyncData:
            if k not in definedStrings:
                try:
                    deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].')
                except:
                    pass
                if k in syncData:
                    del syncData[k]
        self.updateSyncData(currentDict, syncData, 'Full update')
        self.editCounts[currentDict] = 0
        return newContent
    def scheduledRun(self):
        for d in self.dictionaries:
            fixPage(d)
dictUpdater = DictionaryUpdater()
addFilter(dictUpdater)
scheduleTask(dictUpdater.scheduledRun, 3)


== File filters ==
=== Update checklists on [[User:WindBOT/Item checklists|list of subscribers]] ===
=== [http://en.wikipedia.org/wiki/Pngcrush PNGCrush]/[http://jpegclub.org/ jpegtran] all PNG/JPG images ===
def itemChecklists():
  class imageCrushFilter:
    game = 620
      def __init__(self):
    cleanItemName = compileRegex(r'^the +')
          self.minRatio = 10 # Compression ratio threshold
    def updateItemChecklist(checklist, schema, support):
          self.minByteDiff = 2048 # Byte difference threshold
        if not checklist.getParam('steamid'):
          self.jpgScanMap = u'0:  0  0 0 0 ;1 2: 0  0 0 0 ;0:  1  8 0 2 ;1:  1  8 0 0 ;2:  1  8 0 0 ;0:  9 63 0 2 ;0:  1 63 2 1 ;0:  1 63 1 0 ;1:  9 63 0 0 ;2:  9 63 0 0 ;'.replace(u';', u';\n')
            checklist.setParam('error', 'Unspecified Steam ID.')
          self.filterName = 'Saved crush information'
            return
          self.extractHash = compileRegex(r'\{\{(?:png)?crush\s*\|\s*(\w+?)\s*\|\s*(\w+?)\s*}}')
        supportedItems = {}
          try:
        for i in support:
              subprocess.call(['pngcrush', '-version'])
            supportedItems[i] = 0
              self.pngenabled = True
        try:
          except:
            steamUser = steam.user.profile(checklist.getParam('steamid')).id64
              print 'Warning: PNGCrush is not installed or not in $PATH'
        except steam.user.ProfileNotFoundError as e:
              self.pngenabled = False
            try:
          try:
                steamUser = steam.user.vanity_url(checklist.getParam('steamid')).id64
              subprocess.call(['jpegtran', '-h'])
            except Exception as e2:
              self.jpgenabled = True
                checklist.setParam('error', u'Cannot find profile: ' + u(e) + u' / ' + u(e2))
          except:
                return
              print 'Warning: jpegtran is not installed or not in $PATH'
        try:
              self.jpgenabled = False
            backpack = steam.items.inventory(game, steamUser, schema)
      def getRandBits(self):
        except Exception as e:
          return random.getrandbits(128)
            checklist.setParam('error', u'Cannot load inventory: ' + u(e))
      def getFileHash(self, filename):
            return
          h = hashlib.md5()
        for item in backpack:
          f = open(filename, 'rb')
            itemName = cleanItemName.sub(u'', u(item.name).lower())
          for i in f.readlines():
            if itemName in supportedItems:
              h.update(i)
                supportedItems[itemName] += 1
          f.close()
        for item in supportedItems:
          return u(h.hexdigest())
            if supportedItems[item] > 1:
      def deleteFile(self, *fs):
                checklist.setParam(item, supportedItems[item])
          for f in fs:
            elif supportedItems[item] == 1:
              try:
                checklist.setParam(item, 'yes')
                  os.remove(tempFile)
            else:
              except:
                p = checklist.getParam(item)
                  pass
                if p is not None:
      def __call__(self, content, article, **kwargs):
                    p = p.lower()
          title = u(article.title).lower()
                if p in (None, 'no', '0'):
          if title[-4:] == '.png':
                    checklist.setParam(item, 'no')
              isPNG = True
                elif p not in ('wanted', 'want', 'do not', 'anti', 'do not want'):
              if not self.pngenabled:
                    checklist.setParam(item, 'had')
                  return content
        return
          elif title[-5:] == '.jpeg' or title[-4:] == '.jpg':
    try:
              isPNG = False
        schema = steamGetGameSchema(game)
              if not self.jpgenabled:
        allItems = []
                  return content
        for item in schema:
          else:
            allItems.append(cleanItemName.sub(u'', u(item.name).lower()))
              return content
    except:
          try: # This is a high-risk filter, lots of I/O, so wrap it in a big try
        return # No schema means no fancy
              filePage = wikitools.wikifile.File(wiki(), article.title)
    support = []
              hashes = [u'', u'']
    templateParams = compileRegex(r'\{\{\{\s*(?:the +)?([^{}|]+?)\s*\|')
              hashResult = self.extractHash.search(content)
    templateCode = page('Template:Item checklist').getWikiText()
              hashTemplate = None
    res = templateParams.search(templateCode)
              if hashResult:
    while res:
                  hashes = [u(hashResult.group(1)), u(hashResult.group(2))]
        item = u(res.group(1)).lower()
                  hashTemplate = <nowiki>u'{{crush|' + hashes[0] + u'|' + hashes[1] + u'}}'</nowiki>
        if item not in support and item in allItems:
              tempFile = getTempFilename()
            support.append(item)
              filePage.download(location=tempFile, urlQuery=u('nocache=true'))
        templateCode = templateCode[res.end():]
              oldHash = self.getFileHash(tempFile)
        res = templateParams.search(templateCode)
              if oldHash in hashes:
    checkPage, checkLinks, checkKeys = linkExtract(page('User:WindBOT/Item_checklists').getWikiText())
                  return content # Already worked on that one
    linksLeft = checkLinks.values()[:]
              hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|None}}'</nowiki>
    for i in range(12):
              tempOutput = getTempFilename()
        randLink = random.choice(linksLeft)
              if isPNG:
        linksLeft.remove(randLink)
                  result = subprocess.call(['pngcrush', '-rem', 'gAMA', '-rem', 'cHRM', '-rem', 'iCCP', '-rem', 'sRGB', '-brute', tempFile, tempOutput])
        checklist = page(randLink.getLink())
              else:
        print 'Updating', checklist
                  mapFile = getTempFilename()
        oldContent = u(checklist.getWikiText())
                  mapFileHandle = open(mapFile, 'wb')
        content, templatelist, templatekeys = templateExtract(oldContent)
                  mapFileHandle.write(self.jpgScanMap.encode('ascii')) # Onoz ASCII
        for t in templatelist.values():
                  mapFileHandle.close()
            if t.getName().lower().find(u'checklist') != -1:
                  result = subprocess.call(['jpegtran', '-o', '-scans', mapFile, '-copy', 'none', '-progressive', '-outfile', tempOutput, tempFile])
                updateItemChecklist(t, schema, support)
                  self.deleteFile(mapFile)
        content = templateRestore(content, templatelist, templatekeys)
              oldSize = os.path.getsize(tempFile)
        if oldContent != content:
              newSize = os.path.getsize(tempOutput)
            editPage(checklist, content, summary=<nowiki>u'Updated Item checklist [[:' + u(checklist.title) + u']]'</nowiki>, minor=True)
              self.deleteFile(tempFile)
scheduleTask(itemChecklists, 365)
              if not result and oldSize > newSize:
                  # Ready to upload... or are we?
                  ratio = int(round(100 * (1.0 - float(newSize) / float(oldSize))))
                  if ratio >= self.minRatio or oldSize - newSize >= self.minByteDiff:
                      newHash = self.getFileHash(tempOutput)
                      if newHash in hashes:
                          self.deleteFile(tempOutput)
                          return content # Already got that result, no need to reupload
                      hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|' + newHash + u'}}'</nowiki>
                      uploadFile(tempOutput, u(article.title), u'Crushed version: ' + u(ratio) + u'% reduction / ' + u(oldSize - newSize) + u' bytes saved; from ' + u(oldSize) + u' to ' + u(newSize) + u' bytes.', overwrite=True, reupload=True)
                      hashes = [oldHash, newHash]
              if hashResult:
                  content = content[:hashResult.start()] + hashTemplate + content[hashResult.end():]
              else:
                  content = content.strip() + u'\n\n' + hashTemplate
              self.deleteFile(tempOutput)
          except:
              pass # Well, that didn't work
          return content
  addFileFilter(imageCrushFilter())

Latest revision as of 15:01, 25 June 2024

How to disable a filter

If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. To make the bot ignore a certain line, add a "#" in front of it:

# This line will be ignored

If there are multiple lines, wrap them inside triple-quotes (you still need to put the two spaces at the beginning of the line):

"""This line will be ignored
and this one as well
and this one is cake
and the previous one was a lie but it was still ignored"""

If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. Or, if the problem really is elsewhere, block the bot.

Page filters

addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')

Semantic filters

Capitalized words

enforceCapitalization('Aperture Science', 'P-body', 'GLaDOS')
enforceCapitalization('Portal Gun', 'Weighted Storage Cube')
enforceCapitalization('Steam', 'Nintendo Switch', 'PlayStation', 'Xbox')

Common misspellings

addSafeFilter(
    wordFilter(u'Doug Rattman', u'Doug Rattmann', u'Doug Ratmann', u'Doug Ratman'),
    wordFilter(u'Rattman', u'Rattmann', u'Rattman', u'Ratmann', u'Ratman'),
    wordFilter(u'Rat Man', u'Ratt Mann', u'Ratt Man', u'Rat Mann'),
    wordFilter('screenshot', 'screen shot'),
    wordFilter('screenshots', 'screen shots'),
    wordFilter('in-game', 'ingame')
)

Section headers

addSafeFilter(
    wordFilter(u'== Update history ==', u'==+ ?(?:Update history|Previous changes) ?==+'),
    wordFilter(u'== See also ==', u'==+ ?See also ?==+'),
    wordFilter(u'== External links ==', u'==+ ?External links ?==+'),
    wordFilter(u'== Unused content ==', u'==+ ?Unused content ?==+'),
    wordFilter(u'== Related achievements ==', u'==+ ?Related achievements ?==+'),
    wordFilter(u'=== Undocumented changes ===', u'==+ ?Undocumented changes ?==+')
)

Language-specific filters

None yet~

Link filters

Wikipedia links filter

def wikipediaLinks(link, **kwargs):
    wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
    if link.getType() == u'external':
        linkInfo = wikipediaRegex.search(link.getLink())
        if linkInfo:
            link.setType(u'internal')
            try:
                wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
            except:
                wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
            if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
                link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
            else:
                link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
            if link.getLabel() is None:
                link.setLabel(u'(Wikipedia)')
    return link
addLinkFilter(wikipediaLinks)

HL Wiki to Combine Overwiki links filter

def hlwikiLinks(link, **kwargs):
    hlwikiRegex1 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/wiki/(\S+)$')
    hlwikiRegex2 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/w[-_/\w]+?/([^/\s]+)$')
    if link.getType() == 'external':
        linkInfo = hlwikiRegex1.search(link.getLink())
        isMedia = False
        if not linkInfo:
            linkInfo = hlwikiRegex2.search(link.getLink())
            isMedia = True
        if linkInfo:
            link.setType('internal')
            try:
                wikiPage = u(urllib2.unquote(str(linkInfo.group(1))).decode('utf8', 'ignore').replace(u'_', ' '))
            except:
                wikiPage = u(linkInfo.group(1)).replace(u'_', ' ')
            label = wikiPage
            if isMedia:
                if wikiPage[-4:].lower() == '.wav':
                    wikiPage = 'Media:' + wikiPage
                else:
                    wikiPage = ':File:' + wikiPage
            link.setLink('hl2:' + wikiPage)
            if link.getLabel() is None:
                link.setLabel(label)
    return link
addLinkFilter(hlwikiLinks)

Convert patch links to {{Patch name}}

def patchNameLinkFilter(l, **kwargs):
    if l.getType() != u'internal':
        return l
    regPatchName = compileRegex(u'(January|February|March|April|May|June|July|August|September|October|November|December)\\s+(\\d+),\\s+(\\d{4,})\\s+Patch(?:/\\w+)?')
    result = regPatchName.match(l.getLink())
    if result is None or l.getLabel().find(result.group(2)) == -1 or l.getLabel().find(result.group(3)) == -1:
        return l
    monthNames = ('january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december')
    patchType = u
    if l.getLink().lower().find(u'portal') != -1:
        patchType = u'|portal'
    elif l.getLink().lower().find(u'tools') != -1:
        patchType = u'|tools'
    return template(u'{{Patch name|' + u(monthNames.index(result.group(1).lower()) + 1) + u'|' + u(result.group(2)) +  u'|' + u(result.group(3)) + patchType + u'}}')
addLinkFilter(patchNameLinkFilter)

Template filters

Template renaming

def templateRenameMapping(t, **kwargs):
    templateMap = {
        # Format goes like this (without the "#" in front obviously):
        #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
        # Last line has no comma at the end
        'Crush': ['pngcrush'],
        'Chamber infobox': ['test chamber infobox']
    }
    for n in templateMap:
        if t.getName().lower() in templateMap[n]:
            t.setName(n)
    return t
addTemplateFilter(templateRenameMapping)

Reindent all infoboxes

def infoboxIndentFilter(t, **kwargs):
    itemInfoboxes = ('game infobox', 'website infobox', 'item infobox', 'company infobox', 'chamber infobox', 'old aperture infobox', 'twtm chamber infobox', 'mel old aperture infobox')
    tName = t.getName().lower()
    if 'infobox' in tName and tName not in itemInfoboxes:
        t.indentationMatters(True)
        t.setDefaultIndentation(2)
    return t
addTemplateFilter(infoboxIndentFilter, lowPriority=True)

Remove useless templates

def removeUselessTemplate(t, **kwargs):
    if t.getName().lower() in (u'targeted', u'languages'):
        return None # Delete template
    return t
addTemplateFilter(removeUselessTemplate)

Filter parameters of certain templates

def templateParamFilter(t, **kwargs):
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
        'patch layout': ['before', 'after', 'current'],
        'item infobox': ['released']
    }
    if t.getName().lower() not in params:
        return t
    for p in params[t.getName().lower()]:
        if t.getParam(p):
            t.setParam(p, fixContent(t.getParam(p), **kwargs))
    return t
addTemplateFilter(templateParamFilter)

Remove obsolete parameters

def obsoleteParameterFilter(t, **kwargs):
    params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
    }
    if t.getName().lower() not in params:
        return t
    for p in params[t.getName().lower()]:
        p = u(p)
        if p.find(u'#n') != -1:
            for i in range(10):
                t.delParam(p.replace(u'#n', str(i)))
        else:
            t.delParam(p)
    return t
addTemplateFilter(obsoleteParameterFilter)

Add day/month/year to {{Patch layout}}

def patchLayoutFilter(t, **kwargs):
    if t.getName().lower() != 'patch layout' or 'article' not in kwargs:
        return t
    t.setPreferedOrder(['game', 'before', 'day', 'month', 'year', 'after', 'source-title', 'source', 'source-lang'] + [['source-' + str(n) + '-title', 'source-' + str(n), 'source-' + str(n) + '-lang'] for n in xrange(10)] + ['updatelink', 'update', 'update-link', 'update-lang', 'hide-diff'] + [['diff-' + str(n)] for n in xrange(10)] + ['notes'])
    t.delParam('current')
    regPatchName = compileRegex(u'^(January|February|March|April|May|June|July|August|September|October|November|December)\\s+(\\d+),\\s+(\\d{4,})\\s+Patch(?:/\\w+)?')
    result = regPatchName.match(u(kwargs['article'].title))
    if result is not None:
        t.setParam('day', result.group(2))
        t.setParam('month', result.group(1).lower())
        t.setParam('year', result.group(3))
    return t
addTemplateFilter(patchLayoutFilter)

Implement {{Dictionary}}

class DictionaryUpdater:
    def __init__(self):
        self.subpageTemplateLang = """{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}"""
        self.subpageTemplateParam = """{{#switch:{{{1|}}}|%options%}}"""
        self.invalidParamError = """<span class="error">Error: invalid param.</span>[[Category:ERROR]]"""
        self.subpageTemplateID = """%string%"""
        self.partialUpdateThreshold = 750 # Update SyncData every n edits
        self.dictionaries = {
            u'Template:Dictionary/items': { # Dictionary page
                'name': 'items', # Dictionary name (used for categorizing)
                'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
            },
            u'Template:Dictionary/common strings': { # Warning: no underscore
                'name': 'common strings',
                'sync': 'Template:Dictionary/common strings/Special:SyncData'
            },
            u'Template:Dictionary/price': {
                'name': 'price',
                'sync': 'Template:Dictionary/price/Special:SyncData',
                'allTemplate': '{{{{{template|item price/fmt}}}|%options%|tt={{{tt|yes}}}}}'
            },
            u'Template:Dictionary/mechanics': {
                'name': 'mechanics',
                'sync': 'Template:Dictionary/mechanics/Special:SyncData'
            },
            u'Template:Dictionary/merchandise': {
                'name': 'merchandise',
                'sync': 'Template:Dictionary/merchandise/Special:SyncData'
            },
            u'Template:Dictionary/characters': {
                'name': 'characters',
                'sync': 'Template:Dictionary/characters/Special:SyncData'
            },
            u'Template:Dictionary/demonstration': {
                'name': 'demonstration',
                'sync': 'Template:Dictionary/demonstration/Special:SyncData'
            },
            u'Template:Dictionary/transcripts': {
                'name': 'transcripts',
                'sync': 'Template:Dictionary/transcripts/Special:SyncData'
            },
            u'Template:Dictionary/portal achievements': {
                'name': 'portal achievements',
                'sync': 'Template:Dictionary/portal achievements/Special:SyncData'
            },
            u'Template:Dictionary/portal: still alive achievements': {
                'name': 'portal: still alive achievements',
                'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData'
            },
            u'Template:Dictionary/portal 2 achievements': {
                'name': 'portal 2 achievements',
                'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData'
            },
            u'Template:Dictionary/rexaura achievements': {
                'name': 'rexaura achievements',
                'sync': 'Template:Dictionary/rexaura achievements/Special:SyncData'
            },
            u'Template:Dictionary/portal stories: mel achievements': {
                'name': 'portal stories: mel achievements',
                'sync': 'Template:Dictionary/portal stories: mel achievements/Special:SyncData'
            },
            u'Template:Dictionary/portal pinball achievements': {
                'name': 'portal pinball achievements',
                'sync': 'Template:Dictionary/portal pinball achievements/Special:SyncData'
            },
            u'Template:Dictionary/bridge constructor portal achievements': {
                'name': 'bridge constructor portal achievements',
                'sync': 'Template:Dictionary/bridge constructor portal achievements/Special:SyncData'
            },
            u'Template:Dictionary/audio': {
                'name': 'audio',
                'sync': 'Template:Dictionary/audio/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Adventure core': {
                'name': 'voice lines/Adventure core',
                'sync': 'Template:Dictionary/voice lines/Adventure core/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Announcer': {
                'name': 'voice lines/Announcer',
                'sync': 'Template:Dictionary/voice lines/Announcer/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Ap-Sap': {
                'name': 'voice lines/Ap-Sap',
                'sync': 'Template:Dictionary/voice lines/Ap-Sap/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Cave Johnson': {
                'name': 'voice lines/Cave Johnson',
                'sync': 'Template:Dictionary/voice lines/Cave Johnson/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Defective Turret': {
                'name': 'voice lines/Defective Turret',
                'sync': 'Template:Dictionary/voice lines/Defective Turret/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Fact core': {
                'name': 'voice lines/Fact core',
                'sync': 'Template:Dictionary/voice lines/Fact core/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Floor Turret': {
                'name': 'voice lines/Floor Turret',
                'sync': 'Template:Dictionary/voice lines/Floor Turret/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/GLaDOS': {
                'name': 'voice lines/GLaDOS',
                'sync': 'Template:Dictionary/voice lines/GLaDOS/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Space core': {
                'name': 'voice lines/Space core',
                'sync': 'Template:Dictionary/voice lines/Space core/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Wheatley': {
                'name': 'voice lines/Wheatley',
                'sync': 'Template:Dictionary/voice lines/Wheatley/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Grady': {
                'name': 'voice lines/Grady',
                'sync': 'Template:Dictionary/voice lines/Grady/Special:SyncData'
            },
            u'Template:Dictionary/voice lines/Desk Job': {
                'name': 'voice lines/Desk Job',
                'sync': 'Template:Dictionary/voice lines/Desk Job/Special:SyncData'
            },
            u'Template:Dictionary/defindex': {
               'name': 'defindex',
               'sync': 'Template:Dictionary/defindex/Special:SyncData'
           }
        }
        self.subpageSeparator = u'/'
        # List of supported languages, in prefered order
        self.languages = [u'en', u'ar', u'cs', u'cz', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ka', u'ko', u'nl', u'no', u'pl', u'po', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'sw', u'th', u'tr', u'tu', u'zh-hans', u'zh-hant']
        self.defaultLang = u'en'
        self.allKeyName = u'_all_'
        self.filterName = u'Your friendly neighborhood dictionary updater'
        self.commentsExtract = compileRegex(r)
        self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
        self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
        addWhitelistPage(self.dictionaries.keys())
        self.editCounts = {}
    def updateSyncData(self, currentDict, syncData, note=):
        # Build syncdata string representation
        syncKeys = syncData.keys()
        syncKeys.sort()
        syncLines = []
        for k in syncKeys:
            syncLines.append(k + u':' + syncData[k])
        if note:
            note = u' (' + u(note) + u')'
        editPage(self.dictionaries[currentDict]['sync'], u'\n'.join(syncLines), summary=u'Updated synchronization information for [[:' + currentDict + u']]' + note + u'.', minor=True, nocreate=False)
    def generateSubpage(self, keyName, data, currentDict, syncData):
        h = hashlib.md5()
        if type(data) is type({}): # Subkeys (translations or not)
            isTranslation = True
            subpage = u(self.subpageTemplateLang)
            for k in data:
                if 'blankString' in self.dictionaries[currentDict] and data[k] == self.dictionaries[currentDict]['blankString']:
                    data[k] = u
                if isTranslation and k not in self.languages:
                    isTranslation = False
                    subpage = u(self.subpageTemplateParam)
            ordered = []
            unordered = {}
            if isTranslation:
                missing = []
                for lang in self.languages:
                    if lang in data:
                        ordered.append(lang + u'=' + data[lang])
                        unordered[lang] = data[lang]
                        h.update((lang + u'=' + data[lang]).encode('utf8'))
                    else:
                        missing.append(lang)
                        h.update((u'null-' + lang).encode('utf8'))
                if self.defaultLang in data:
                    ordered.insert(0, u'#default=' + data[self.defaultLang])
                if len(missing):
                    subpage = subpage.replace(u'%missing%', u"Languages missing: " + u', '.join(missing))
                else:
                    subpage = subpage.replace(u'%missing%', u"Supported languages: all")
            else: # Not a translation
                h.update('Any-')
                subkeys = data.keys()
                subkeys.sort()
                for k in subkeys:
                    ordered.append(k + u'=' + data[k])
                    unordered[k] = data[k]
                    h.update((k + u'=' + data[k]).encode('utf8'))
            if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])):
                allKey = []
                keys = unordered.keys()
                keys.sort()
                for k in keys:
                    allKey.append(k + u'=' + unordered[k])
                insertIndex = 0
                if isTranslation and self.defaultLang in data:
                    insertIndex = 1
                ordered.insert(insertIndex, u(self.allKeyName) + u'=' + u(self.dictionaries[currentDict]['allTemplate'].replace(u'%options%', u'|'.join(allKey))))
            subpage = subpage.replace(u'%options%', u'|'.join(ordered))
        else: # No subkeys
            data = u(data)
            subpage = self.subpageTemplateID
            h.update(u(u'ID-' + data).encode('utf8'))
            subpage = subpage.replace(u'%string%', data)
        h = u(h.hexdigest())
        if keyName in syncData and syncData[keyName] == h:
            return # Same hash
        subpage = subpage.replace(u'%dictionary%', currentDict)
        subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
        subpage = subpage.replace(u'%keyname%', keyName)
        if editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".', minor=True, nocreate=False):
            syncData[keyName] = h # Update sync data
            if currentDict not in self.editCounts:
                self.editCounts[currentDict] = 0
            self.editCounts[currentDict] += 1
            if self.editCounts[currentDict] > self.partialUpdateThreshold:
                self.editCounts[currentDict] = 0
                self.updateSyncData(currentDict, syncData, 'Partial update')
    def processComment(self, commentString, currentDict, definedStrings, syncData):
        commentContents = []
        for extractedStr in self.stringsExtract.finditer(commentString):
            comment = u
            if extractedStr.group(1):
                comment = u'# ' + u(extractedStr.group(1)) + u'\n'
            dataString = u(extractedStr.group(3))
            if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
                data = dataString.strip()
                dataWriteback = u' ' + data
            else: # There's subkeys; detect whether this is a translation or not
                data = {}
                isTranslation = True
                for translation in self.translationExtract.finditer(dataString.rstrip()):
                    data[u(translation.group(1))] = u(translation.group(2))
                    if u(translation.group(1)) not in self.languages:
                        isTranslation = False
                ordered = []
                if isTranslation:
                    for lang in self.languages:
                        if lang in data:
                            ordered.append(u'  ' + lang + u': ' + data[lang])
                else: # Not a translation, so order in alphabetical order
                    subkeys = data.keys()
                    subkeys.sort()
                    for subk in subkeys:
                        ordered.append(u'  ' + subk + u': ' + data[subk])
                dataWriteback = u'\n' + u'\n'.join(ordered)
            keyNames = u(extractedStr.group(2)).lower().split(u'|')
            validKeyNames = []
            for keyName in keyNames:
                keyName = keyName.replace(u'_', u' ').replace(u'#', u).strip()
                if keyName in definedStrings:
                    continue # Duplicate key
                definedStrings.append(keyName)
                validKeyNames.append(keyName)
                self.generateSubpage(keyName, data, currentDict, syncData)
            if len(validKeyNames):
                commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
        return u'\n\n'.join(commentContents)
    def __call__(self, content, **kwargs):
        if 'article' not in kwargs:
            return content
        if u(kwargs['article'].title) not in self.dictionaries:
            return content
        currentDict = u(kwargs['article'].title)
        if random.randint(0, 50) == 0: # With probability 2%, ignore syncdata completely. Helps with stale syncdata and people overwriting things.
            syncDataText = u
        else:
            try:
                syncDataText = u(page(self.dictionaries[currentDict]['sync']).getWikiText()).split(u'\n')
            except: # Page probably doesn't exist
                syncDataText = u
        syncData = {}
        for sync in syncDataText:
            sync = u(sync.strip())
            if not sync:
                continue
            sync = sync.split(u':', 2)
            if len(sync) == 2:
                syncData[sync[0]] = sync[1]
        oldSyncData = syncData.copy()
        newContent = u
        previousIndex = 0
        definedStrings = []
        for comment in self.commentsExtract.finditer(content):
            newContent += content[previousIndex:comment.start()]
            previousIndex = comment.end()
            # Process current comment
            newContent += u
        newContent += content[previousIndex:]
        # Check for deleted strings
        for k in oldSyncData:
            if k not in definedStrings:
                try:
                    deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from ' + currentDict + u'.')
                except:
                    pass
                if k in syncData:
                    del syncData[k]
        self.updateSyncData(currentDict, syncData, 'Full update')
        self.editCounts[currentDict] = 0
        return newContent
    def scheduledRun(self):
        for d in self.dictionaries:
            fixPage(d)
dictUpdater = DictionaryUpdater()
addFilter(dictUpdater)
scheduleTask(dictUpdater.scheduledRun, 3)

Update checklists on list of subscribers

def itemChecklists():
    game = 620
    cleanItemName = compileRegex(r'^the +')
    def updateItemChecklist(checklist, schema, support):
        if not checklist.getParam('steamid'):
            checklist.setParam('error', 'Unspecified Steam ID.')
            return
        supportedItems = {}
        for i in support:
            supportedItems[i] = 0
        try:
            steamUser = steam.user.profile(checklist.getParam('steamid')).id64
        except steam.user.ProfileNotFoundError as e:
            try:
                steamUser = steam.user.vanity_url(checklist.getParam('steamid')).id64
            except Exception as e2:
                checklist.setParam('error', u'Cannot find profile: ' + u(e) + u' / ' + u(e2))
                return
        try:
            backpack = steam.items.inventory(game, steamUser, schema)
        except Exception as e:
            checklist.setParam('error', u'Cannot load inventory: ' + u(e))
            return
        for item in backpack:
            itemName = cleanItemName.sub(u, u(item.name).lower())
            if itemName in supportedItems:
                supportedItems[itemName] += 1
        for item in supportedItems:
            if supportedItems[item] > 1:
                checklist.setParam(item, supportedItems[item])
            elif supportedItems[item] == 1:
                checklist.setParam(item, 'yes')
            else:
                p = checklist.getParam(item)
                if p is not None:
                    p = p.lower()
                if p in (None, 'no', '0'):
                    checklist.setParam(item, 'no')
                elif p not in ('wanted', 'want', 'do not', 'anti', 'do not want'):
                    checklist.setParam(item, 'had')
        return
    try:
        schema = steamGetGameSchema(game)
        allItems = []
        for item in schema:
            allItems.append(cleanItemName.sub(u, u(item.name).lower()))
    except:
        return # No schema means no fancy
    support = []
    templateParams = compileRegex(r'\{\{\{\s*(?:the +)?([^{}|]+?)\s*\|')
    templateCode = page('Template:Item checklist').getWikiText()
    res = templateParams.search(templateCode)
    while res:
        item = u(res.group(1)).lower()
        if item not in support and item in allItems:
            support.append(item)
        templateCode = templateCode[res.end():]
        res = templateParams.search(templateCode)
    checkPage, checkLinks, checkKeys = linkExtract(page('User:WindBOT/Item_checklists').getWikiText())
    linksLeft = checkLinks.values()[:]
    for i in range(12):
        randLink = random.choice(linksLeft)
        linksLeft.remove(randLink)
        checklist = page(randLink.getLink())
        print 'Updating', checklist
        oldContent = u(checklist.getWikiText())
        content, templatelist, templatekeys = templateExtract(oldContent)
        for t in templatelist.values():
            if t.getName().lower().find(u'checklist') != -1:
                updateItemChecklist(t, schema, support)
        content = templateRestore(content, templatelist, templatekeys)
        if oldContent != content:
            editPage(checklist, content, summary=u'Updated Item checklist [[:' + u(checklist.title) + u']]', minor=True)
scheduleTask(itemChecklists, 365)