User:WindBOT/Filters: Difference between revisions

From the Portal Wiki
Jump to navigation Jump to search
(Created page with "{{User:WindBOT/Header}} == How to disable a filter == If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting d...")
 
No edit summary
Line 15: Line 15:
__TOC__
__TOC__


No filters yet!
== Page filters ==
  addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')
 
== Semantic filters ==
None yet~
 
== Language-specific filters ==
None yet~
 
== Link filters ==
=== Wikipedia links filter ===
  def wikipediaLinks(link, **kwargs):
      wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
      if link.getType() == u'external':
          linkInfo = wikipediaRegex.search(link.getLink())
          if linkInfo:
              link.setType(u'internal')
              try:
                  wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
              except:
                  wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
              if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
                  link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
              else:
                  link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
              if link.getLabel() is None:
                  link.setLabel(u'(Wikipedia)')
      return link
  addLinkFilter(wikipediaLinks)
 
=== Remove trailing slashes from internal links ===
  def removeTrailingSlash(l, **kwargs):
      if l.getType() != u'internal':
          return l
      if l.getLink()[-1] == '/':
          l.setLink(l.getLink()[:-1])
      return l
  addLinkFilter(removeTrailingSlash)
 
== Template filters ==
=== Template renaming ===
  def templateRenameMapping(t, **kwargs):
      templateMap = {
          # Format goes like this (without the "#" in front obviously):
          #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
          # Last line has no comma at the end
          'Crush': ['pngcrush']
      }
      for n in templateMap:
          if t.getName().lower() in templateMap[n]:
              t.setName(n)
      return t
  addTemplateFilter(templateRenameMapping)
 
=== Remove useless templates ===
  def removeUselessTemplate(t, **kwargs):
      if t.getName().lower() in (u'targeted', u'languages'):
          return None # Delete template
      return t
  addTemplateFilter(removeUselessTemplate)
 
=== Filter parameters of certain templates ===
  def templateParamFilter(t, **kwargs):
      params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
          'patch layout': ['before', 'after', 'current'],
          'item infobox': ['released']
      }
      if t.getName().lower() not in params:
          return t
      for p in params[t.getName().lower()]:
          if t.getParam(p):
              t.setParam(p, fixContent(t.getParam(p), **kwargs))
      return t
  addTemplateFilter(templateParamFilter)
 
=== Remove obsolete parameters ===
  def obsoleteParameterFilter(t, **kwargs):
      params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
      }
      if t.getName().lower() not in params:
          return t
      for p in params[t.getName().lower()]:
          p = u(p)
          if p.find(u'#n') != -1:
              for i in range(10):
                  t.delParam(p.replace(u'#n', str(i)))
          else:
              t.delParam(p)
      return t
  addTemplateFilter(obsoleteParameterFilter)
 
=== Implement {{tl|Dictionary}} ===
  class DictionaryUpdater:
      def __init__(self):
          self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
          self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
          self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Error''': Invalid parameter passed.</div>"""</nowiki>
          self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]]&nbsp;'''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>
          self.dictionaries = {
              u'Template:Dictionary/items': { # Dictionary page
                  'name': 'items', # Dictionary name (used for categorizing)
                  'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
              },
              u'Template:Dictionary/common strings': { # Warning: no underscore
                  'name': 'common strings',
                  'sync': 'Template:Dictionary/common strings/Special:SyncData'
              },
              u'Template:Dictionary/price': {
                  'name': 'price',
                  'sync': 'Template:Dictionary/price/Special:SyncData'
              }
          }
          self.subpageSeparator = u'/'
          # List of supported languages, in prefered order
          self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']
          self.defaultLang = u'en'
          self.filterName = u'Your friendly neighborhood dictionary updater'
          self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->')
          self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([ \t]*[^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
          self.translationExtract = compileRegex(r'^[ \t]*([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
          addWhitelistPage(self.dictionaries.keys())
      def generateSubpage(self, keyName, data, currentDict, syncData):
          h = hashlib.md5()
          if type(data) is type({}): # Subkeys (translations or not)
              isTranslation = True
              subpage = u(self.subpageTemplateLang)
              for k in data:
                  if k not in self.languages:
                      isTranslation = False
                      subpage = u(self.subpageTemplateParam)
                      break
              ordered = []
              if isTranslation:
                  missing = []
                  for lang in self.languages:
                      if lang in data:
                          ordered.append(lang + u'=' + data[lang])
                          h.update((lang + u'=' + data[lang]).encode('utf8'))
                      else:
                          missing.append(lang)
                          h.update((u'null-' + lang).encode('utf8'))
                  if self.defaultLang in data:
                      ordered.append(u'#default=' + data[self.defaultLang])
                  if len(missing):
                      subpage = subpage.replace(u'%missing%', <nowiki>u"'''Languages missing''': "</nowiki> + u', '.join(missing))
                  else:
                      subpage = subpage.replace(u'%missing%', <nowiki>u"'''Supported languages''': All"</nowiki>)
              else: # Not a translation
                  h.update('Any-')
                  subkeys = data.keys()
                  subkeys.sort()
                  for k in subkeys:
                      ordered.append(k + u'=' + data[k])
                      h.update((k + u'=' + data[k]).encode('utf8'))
                  #ordered.append(u'#default=' + u(self.invalidParamError))
              subpage = subpage.replace(u'%options%', u'|'.join(ordered))
          else: # No subkeys
              data = u(data)
              subpage = self.subpageTemplateID
              h.update(u(u'ID-' + data).encode('utf8'))
              subpage = subpage.replace(u'%string%', data)
          h = u(h.hexdigest())
          if keyName in syncData and syncData[keyName] == h:
              return # Same hash
          syncData[keyName] = h # Update sync data
          subpage = subpage.replace(u'%dictionary%', currentDict)
          subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
          subpage = subpage.replace(u'%keyname%', keyName)
          editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False)
      def processComment(self, commentString, currentDict, definedStrings, syncData):
          commentContents = []
          for extractedStr in self.stringsExtract.finditer(commentString):
              comment = u''
              if extractedStr.group(1):
                  comment = u'# ' + u(extractedStr.group(1)) + u'\n'
              dataString = u(extractedStr.group(3))
              if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
                  data = dataString.strip()
                  dataWriteback = u' ' + data
              else: # There's subkeys; detect whether this is a translation or not
                  data = {}
                  isTranslation = True
                  for translation in self.translationExtract.finditer(dataString.strip()):
                      data[u(translation.group(1))] = u(translation.group(2))
                      if u(translation.group(1)) not in self.languages:
                          isTranslation = False
                  ordered = []
                  if isTranslation:
                      for lang in self.languages:
                          if lang in data:
                              ordered.append(u'  ' + lang + u': ' + data[lang])
                  else: # Not a translation, so order in alphabetical order
                      subkeys = data.keys()
                      subkeys.sort()
                      for subk in subkeys:
                          ordered.append(u'  ' + subk + u': ' + data[subk])
                  dataWriteback = u'\n' + u'\n'.join(ordered)
              keyNames = u(extractedStr.group(2)).lower().split(u'|')
              validKeyNames = []
              for keyName in keyNames:
                  keyName = keyName.replace(u'_', u' ').strip()
                  if keyName in definedStrings:
                      continue # Duplicate key
                  definedStrings.append(keyName)
                  validKeyNames.append(keyName)
                  self.generateSubpage(keyName, data, currentDict, syncData)
              if len(validKeyNames):
                  commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
          return u'\n\n'.join(commentContents)
      def __call__(self, content, **kwargs):
          if 'article' not in kwargs:
              return content
          if u(kwargs['article'].title) not in self.dictionaries:
              return content
          currentDict = u(kwargs['article'].title)
          syncPage = page(self.dictionaries[currentDict]['sync'])
          try:
              syncDataText = u(syncPage.getWikiText()).split(u'\n')
          except: # Page probably doesn't exist
              syncDataText = u''
          syncData = {}
          for sync in syncDataText:
              sync = u(sync.strip())
              if not sync:
                  continue
              sync = sync.split(u':', 2)
              if len(sync) == 2:
                  syncData[sync[0]] = sync[1]
          oldSyncData = syncData.copy()
          newContent = u''
          previousIndex = 0
          definedStrings = []
          for comment in self.commentsExtract.finditer(content):
              newContent += content[previousIndex:comment.start()]
              previousIndex = comment.end()
              # Process current comment
              newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->'
          newContent += content[previousIndex:]
          # Check if we need to update sync data
          needUpdate = False
          for k in syncData:
              if k not in oldSyncData or oldSyncData[k] != syncData[k]:
                  needUpdate = True
                  break
          # Check for deleted strings
          for k in oldSyncData:
              if k not in definedStrings:
                  try:
                      deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].')
                  except:
                      pass
                  if k in syncData:
                      del syncData[k]
                  needUpdate = True
          if needUpdate:
              # Build syncdata string representation
              syncKeys = syncData.keys()
              syncKeys.sort()
              syncLines = []
              for k in syncKeys:
                  syncLines.append(k + u':' + syncData[k])
              editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False)
          return newContent
  addFilter(DictionaryUpdater())
 
== File filters ==
=== [http://en.wikipedia.org/wiki/Pngcrush PNGCrush]/[http://jpegclub.org/ jpegtran] all PNG/JPG images ===
  class imageCrushFilter:
      def __init__(self):
          self.minRatio = 10 # Compression ratio threshold
          self.minByteDiff = 2048 # Byte difference threshold
          self.jpgScanMap = u'0:  0  0 0 0 ;1 2: 0  0 0 0 ;0:  1  8 0 2 ;1:  1  8 0 0 ;2:  1  8 0 0 ;0:  9 63 0 2 ;0:  1 63 2 1 ;0:  1 63 1 0 ;1:  9 63 0 0 ;2:  9 63 0 0 ;'.replace(u';', u';\n')
          self.filterName = 'Saved crush information'
          self.extractHash = compileRegex(r'\{\{(?:png)?crush\s*\|\s*(\w+?)\s*\|\s*(\w+?)\s*}}')
          try:
              subprocess.call(['pngcrush', '-version'])
              self.pngenabled = True
          except:
              print 'Warning: PNGCrush is not installed or not in $PATH'
              self.pngenabled = False
          try:
              subprocess.call(['jpegtran', '-h'])
              self.jpgenabled = True
          except:
              print 'Warning: jpegtran is not installed or not in $PATH'
              self.jpgenabled = False
      def getRandBits(self):
          return random.getrandbits(128)
      def getFileHash(self, filename):
          h = hashlib.md5()
          f = open(filename, 'rb')
          for i in f.readlines():
              h.update(i)
          f.close()
          return u(h.hexdigest())
      def deleteFile(self, *fs):
          for f in fs:
              try:
                  os.remove(tempFile)
              except:
                  pass
      def __call__(self, content, article, **kwargs):
          title = u(article.title).lower()
          if title[-4:] == '.png':
              isPNG = True
              if not self.pngenabled:
                  return content
          elif title[-5:] == '.jpeg' or title[-4:] == '.jpg':
              isPNG = False
              if not self.jpgenabled:
                  return content
          else:
              return content
          try: # This is a high-risk filter, lots of I/O, so wrap it in a big try
              filePage = wikitools.wikifile.File(wiki(), article.title)
              hashes = [u'', u'']
              hashResult = self.extractHash.search(content)
              hashTemplate = None
              if hashResult:
                  hashes = [u(hashResult.group(1)), u(hashResult.group(2))]
                  hashTemplate = <nowiki>u'{{crush|' + hashes[0] + u'|' + hashes[1] + u'}}'</nowiki>
              tempFile = getTempFilename()
              filePage.download(location=tempFile, urlQuery=u(self.getRandBits()))
              oldHash = self.getFileHash(tempFile)
              if oldHash in hashes:
                  return content # Already worked on that one
              hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|None}}'</nowiki>
              tempOutput = getTempFilename()
              if isPNG:
                  result = subprocess.call(['pngcrush', '-rem', 'gAMA', '-rem', 'cHRM', '-rem', 'iCCP', '-rem', 'sRGB', '-brute', tempFile, tempOutput])
              else:
                  mapFile = getTempFilename()
                  mapFileHandle = open(mapFile, 'wb')
                  mapFileHandle.write(self.jpgScanMap.encode('ascii')) # Onoz ASCII
                  mapFileHandle.close()
                  result = subprocess.call(['jpegtran', '-o', '-scans', mapFile, '-copy', 'none', '-progressive', '-outfile', tempOutput, tempFile])
                  self.deleteFile(mapFile)
              oldSize = os.path.getsize(tempFile)
              newSize = os.path.getsize(tempOutput)
              self.deleteFile(tempFile)
              if not result and oldSize > newSize:
                  # Ready to upload... or are we?
                  ratio = int(round(100 * (1.0 - float(newSize) / float(oldSize))))
                  if ratio >= self.minRatio or oldSize - newSize >= self.minByteDiff:
                      newHash = self.getFileHash(tempOutput)
                      if newHash in hashes:
                          self.deleteFile(tempOutput)
                          return content # Already got that result, no need to reupload
                      hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|' + newHash + u'}}'</nowiki>
                      uploadFile(tempOutput, u(article.title), u'Crushed version: ' + u(ratio) + u'% reduction / ' + u(oldSize - newSize) + u' bytes saved; from ' + u(oldSize) + u' to ' + u(newSize) + u' bytes.', overwrite=True, reupload=True)
                      hashes = [oldHash, newHash]
              if hashResult:
                  content = content[:hashResult.start()] + hashTemplate + content[hashResult.end():]
              else:
                  content = content.strip() + u'\n\n' + hashTemplate
              self.deleteFile(tempOutput)
          except:
              pass # Well, that didn't work
          return content
  addFileFilter(imageCrushFilter())

Revision as of 22:00, 26 April 2011

How to disable a filter

If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. To make the bot ignore a certain line, add a "#" in front of it:

 # This line will be ignored

If there are multiple lines, wrap them inside triple-quotes (you still need to put the two spaces at the beginning of the line:

 """This line will be ignored
 and this one as well
 and this one is cake
 and the previous one was a lie but it was still ignored"""

If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. Or, if the problem really is elsewhere, block the bot.

Page filters

 addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')

Semantic filters

None yet~

Language-specific filters

None yet~

Link filters

Wikipedia links filter

 def wikipediaLinks(link, **kwargs):
     wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')
     if link.getType() == u'external':
         linkInfo = wikipediaRegex.search(link.getLink())
         if linkInfo:
             link.setType(u'internal')
             try:
                 wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')
             except:
                 wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')
             if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':
                 link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia
             else:
                 link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia
             if link.getLabel() is None:
                 link.setLabel(u'(Wikipedia)')
     return link
 addLinkFilter(wikipediaLinks)

Remove trailing slashes from internal links

 def removeTrailingSlash(l, **kwargs):
     if l.getType() != u'internal':
         return l
     if l.getLink()[-1] == '/':
         l.setLink(l.getLink()[:-1])
     return l
 addLinkFilter(removeTrailingSlash)

Template filters

Template renaming

 def templateRenameMapping(t, **kwargs):
     templateMap = {
         # Format goes like this (without the "#" in front obviously):
         #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],
         # Last line has no comma at the end
         'Crush': ['pngcrush']
     }
     for n in templateMap:
         if t.getName().lower() in templateMap[n]:
             t.setName(n)
     return t
 addTemplateFilter(templateRenameMapping)

Remove useless templates

 def removeUselessTemplate(t, **kwargs):
     if t.getName().lower() in (u'targeted', u'languages'):
         return None # Delete template
     return t
 addTemplateFilter(removeUselessTemplate)

Filter parameters of certain templates

 def templateParamFilter(t, **kwargs):
     params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']
         'patch layout': ['before', 'after', 'current'],
         'item infobox': ['released']
     }
     if t.getName().lower() not in params:
         return t
     for p in params[t.getName().lower()]:
         if t.getParam(p):
             t.setParam(p, fixContent(t.getParam(p), **kwargs))
     return t
 addTemplateFilter(templateParamFilter)

Remove obsolete parameters

 def obsoleteParameterFilter(t, **kwargs):
     params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']
     }
     if t.getName().lower() not in params:
         return t
     for p in params[t.getName().lower()]:
         p = u(p)
         if p.find(u'#n') != -1:
             for i in range(10):
                 t.delParam(p.replace(u'#n', str(i)))
         else:
             t.delParam(p)
     return t
 addTemplateFilter(obsoleteParameterFilter)

Implement {{Dictionary}}

 class DictionaryUpdater:
     def __init__(self):
         self.subpageTemplateLang = """{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""
         self.subpageTemplateParam = """{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""
         self.invalidParamError = """<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Error''': Invalid parameter passed.</div>"""
         self.subpageTemplateID = """%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""
         self.dictionaries = {
             u'Template:Dictionary/items': { # Dictionary page
                 'name': 'items', # Dictionary name (used for categorizing)
                 'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data
             },
             u'Template:Dictionary/common strings': { # Warning: no underscore
                 'name': 'common strings',
                 'sync': 'Template:Dictionary/common strings/Special:SyncData'
             },
             u'Template:Dictionary/price': {
                 'name': 'price',
                 'sync': 'Template:Dictionary/price/Special:SyncData'
             }
         }
         self.subpageSeparator = u'/'
         # List of supported languages, in prefered order
         self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']
         self.defaultLang = u'en'
         self.filterName = u'Your friendly neighborhood dictionary updater'
         self.commentsExtract = compileRegex(r)
         self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([ \t]*[^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)
         self.translationExtract = compileRegex(r'^[ \t]*([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)
         addWhitelistPage(self.dictionaries.keys())
     def generateSubpage(self, keyName, data, currentDict, syncData):
         h = hashlib.md5()
         if type(data) is type({}): # Subkeys (translations or not)
             isTranslation = True
             subpage = u(self.subpageTemplateLang)
             for k in data:
                 if k not in self.languages:
                     isTranslation = False
                     subpage = u(self.subpageTemplateParam)
                     break
             ordered = []
             if isTranslation:
                 missing = []
                 for lang in self.languages:
                     if lang in data:
                         ordered.append(lang + u'=' + data[lang])
                         h.update((lang + u'=' + data[lang]).encode('utf8'))
                     else:
                         missing.append(lang)
                         h.update((u'null-' + lang).encode('utf8'))
                 if self.defaultLang in data:
                     ordered.append(u'#default=' + data[self.defaultLang])
                 if len(missing):
                     subpage = subpage.replace(u'%missing%', u"'''Languages missing''': " + u', '.join(missing))
                 else:
                     subpage = subpage.replace(u'%missing%', u"'''Supported languages''': All")
             else: # Not a translation
                 h.update('Any-')
                 subkeys = data.keys()
                 subkeys.sort()
                 for k in subkeys:
                     ordered.append(k + u'=' + data[k])
                     h.update((k + u'=' + data[k]).encode('utf8'))
                 #ordered.append(u'#default=' + u(self.invalidParamError))
             subpage = subpage.replace(u'%options%', u'|'.join(ordered))
         else: # No subkeys
             data = u(data)
             subpage = self.subpageTemplateID
             h.update(u(u'ID-' + data).encode('utf8'))
             subpage = subpage.replace(u'%string%', data)
         h = u(h.hexdigest())
         if keyName in syncData and syncData[keyName] == h:
             return # Same hash
         syncData[keyName] = h # Update sync data
         subpage = subpage.replace(u'%dictionary%', currentDict)
         subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])
         subpage = subpage.replace(u'%keyname%', keyName)
         editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".', minor=True, nocreate=False)
     def processComment(self, commentString, currentDict, definedStrings, syncData):
         commentContents = []
         for extractedStr in self.stringsExtract.finditer(commentString):
             comment = u
             if extractedStr.group(1):
                 comment = u'# ' + u(extractedStr.group(1)) + u'\n'
             dataString = u(extractedStr.group(3))
             if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys
                 data = dataString.strip()
                 dataWriteback = u' ' + data
             else: # There's subkeys; detect whether this is a translation or not
                 data = {}
                 isTranslation = True
                 for translation in self.translationExtract.finditer(dataString.strip()):
                     data[u(translation.group(1))] = u(translation.group(2))
                     if u(translation.group(1)) not in self.languages:
                         isTranslation = False
                 ordered = []
                 if isTranslation:
                     for lang in self.languages:
                         if lang in data:
                             ordered.append(u'  ' + lang + u': ' + data[lang])
                 else: # Not a translation, so order in alphabetical order
                     subkeys = data.keys()
                     subkeys.sort()
                     for subk in subkeys:
                         ordered.append(u'  ' + subk + u': ' + data[subk])
                 dataWriteback = u'\n' + u'\n'.join(ordered)
             keyNames = u(extractedStr.group(2)).lower().split(u'|')
             validKeyNames = []
             for keyName in keyNames:
                 keyName = keyName.replace(u'_', u' ').strip()
                 if keyName in definedStrings:
                     continue # Duplicate key
                 definedStrings.append(keyName)
                 validKeyNames.append(keyName)
                 self.generateSubpage(keyName, data, currentDict, syncData)
             if len(validKeyNames):
                 commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)
         return u'\n\n'.join(commentContents)
     def __call__(self, content, **kwargs):
         if 'article' not in kwargs:
             return content
         if u(kwargs['article'].title) not in self.dictionaries:
             return content
         currentDict = u(kwargs['article'].title)
         syncPage = page(self.dictionaries[currentDict]['sync'])
         try:
             syncDataText = u(syncPage.getWikiText()).split(u'\n')
         except: # Page probably doesn't exist
             syncDataText = u
         syncData = {}
         for sync in syncDataText:
             sync = u(sync.strip())
             if not sync:
                 continue
             sync = sync.split(u':', 2)
             if len(sync) == 2:
                 syncData[sync[0]] = sync[1]
         oldSyncData = syncData.copy()
         newContent = u
         previousIndex = 0
         definedStrings = []
         for comment in self.commentsExtract.finditer(content):
             newContent += content[previousIndex:comment.start()]
             previousIndex = comment.end()
             # Process current comment
             newContent += u
         newContent += content[previousIndex:]
         # Check if we need to update sync data
         needUpdate = False
         for k in syncData:
             if k not in oldSyncData or oldSyncData[k] != syncData[k]:
                 needUpdate = True
                 break
         # Check for deleted strings
         for k in oldSyncData:
             if k not in definedStrings:
                 try:
                     deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from ' + currentDict + u'.')
                 except:
                     pass
                 if k in syncData:
                     del syncData[k]
                 needUpdate = True
         if needUpdate:
             # Build syncdata string representation
             syncKeys = syncData.keys()
             syncKeys.sort()
             syncLines = []
             for k in syncKeys:
                 syncLines.append(k + u':' + syncData[k])
             editPage(syncPage, u'\n'.join(syncLines), summary=u'Updated synchronization information for [[:' + currentDict + u']].', minor=True, nocreate=False)
         return newContent
 addFilter(DictionaryUpdater())

File filters

PNGCrush/jpegtran all PNG/JPG images

 class imageCrushFilter:
     def __init__(self):
         self.minRatio = 10 # Compression ratio threshold
         self.minByteDiff = 2048 # Byte difference threshold
         self.jpgScanMap = u'0:   0  0 0 0 ;1 2: 0  0 0 0 ;0:   1  8 0 2 ;1:   1  8 0 0 ;2:   1  8 0 0 ;0:   9 63 0 2 ;0:   1 63 2 1 ;0:   1 63 1 0 ;1:   9 63 0 0 ;2:   9 63 0 0 ;'.replace(u';', u';\n')
         self.filterName = 'Saved crush information'
         self.extractHash = compileRegex(r'\{\{(?:png)?crush\s*\|\s*(\w+?)\s*\|\s*(\w+?)\s*}}')
         try:
             subprocess.call(['pngcrush', '-version'])
             self.pngenabled = True
         except:
             print 'Warning: PNGCrush is not installed or not in $PATH'
             self.pngenabled = False
         try:
             subprocess.call(['jpegtran', '-h'])
             self.jpgenabled = True
         except:
             print 'Warning: jpegtran is not installed or not in $PATH'
             self.jpgenabled = False
     def getRandBits(self):
         return random.getrandbits(128)
     def getFileHash(self, filename):
         h = hashlib.md5()
         f = open(filename, 'rb')
         for i in f.readlines():
             h.update(i)
         f.close()
         return u(h.hexdigest())
     def deleteFile(self, *fs):
         for f in fs:
             try:
                 os.remove(tempFile)
             except:
                 pass
     def __call__(self, content, article, **kwargs):
         title = u(article.title).lower()
         if title[-4:] == '.png':
             isPNG = True
             if not self.pngenabled:
                 return content
         elif title[-5:] == '.jpeg' or title[-4:] == '.jpg':
             isPNG = False
             if not self.jpgenabled:
                 return content
         else:
             return content
         try: # This is a high-risk filter, lots of I/O, so wrap it in a big try
             filePage = wikitools.wikifile.File(wiki(), article.title)
             hashes = [u, u]
             hashResult = self.extractHash.search(content)
             hashTemplate = None
             if hashResult:
                 hashes = [u(hashResult.group(1)), u(hashResult.group(2))]
                 hashTemplate = u'{{crush|' + hashes[0] + u'|' + hashes[1] + u'}}'
             tempFile = getTempFilename()
             filePage.download(location=tempFile, urlQuery=u(self.getRandBits()))
             oldHash = self.getFileHash(tempFile)
             if oldHash in hashes:
                 return content # Already worked on that one
             hashTemplate = u'{{crush|' + oldHash + u'|None}}'
             tempOutput = getTempFilename()
             if isPNG:
                 result = subprocess.call(['pngcrush', '-rem', 'gAMA', '-rem', 'cHRM', '-rem', 'iCCP', '-rem', 'sRGB', '-brute', tempFile, tempOutput])
             else:
                 mapFile = getTempFilename()
                 mapFileHandle = open(mapFile, 'wb')
                 mapFileHandle.write(self.jpgScanMap.encode('ascii')) # Onoz ASCII
                 mapFileHandle.close()
                 result = subprocess.call(['jpegtran', '-o', '-scans', mapFile, '-copy', 'none', '-progressive', '-outfile', tempOutput, tempFile])
                 self.deleteFile(mapFile)
             oldSize = os.path.getsize(tempFile)
             newSize = os.path.getsize(tempOutput)
             self.deleteFile(tempFile)
             if not result and oldSize > newSize:
                 # Ready to upload... or are we?
                 ratio = int(round(100 * (1.0 - float(newSize) / float(oldSize))))
                 if ratio >= self.minRatio or oldSize - newSize >= self.minByteDiff:
                     newHash = self.getFileHash(tempOutput)
                     if newHash in hashes:
                         self.deleteFile(tempOutput)
                         return content # Already got that result, no need to reupload
                     hashTemplate = u'{{crush|' + oldHash + u'|' + newHash + u'}}'
                     uploadFile(tempOutput, u(article.title), u'Crushed version: ' + u(ratio) + u'% reduction / ' + u(oldSize - newSize) + u' bytes saved; from ' + u(oldSize) + u' to ' + u(newSize) + u' bytes.', overwrite=True, reupload=True)
                     hashes = [oldHash, newHash]
             if hashResult:
                 content = content[:hashResult.start()] + hashTemplate + content[hashResult.end():]
             else:
                 content = content.strip() + u'\n\n' + hashTemplate
             self.deleteFile(tempOutput)
         except:
             pass # Well, that didn't work
         return content
 addFileFilter(imageCrushFilter())