User:WindBOT/Filters

How to disable a filter
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. To make the bot ignore a certain line, add a "#" in front of it: # This line will be ignored If there are multiple lines, wrap them inside triple-quotes (you still need to put the two spaces at the beginning of the line): """This line will be ignored and this one as well  and this one is cake  and the previous one was a lie but it was still ignored""" If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. Or, if the problem really is elsewhere, [ block the bot].

Page filters
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')

Semantic filters
None yet~

Language-specific filters
None yet~

Wikipedia links filter
def wikipediaLinks(link, **kwargs): wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)') if link.getType == u'external': linkInfo = wikipediaRegex.search(link.getLink) if linkInfo: link.setType(u'internal') try: wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ') except: wikiPage = u(linkInfo.group(2)).replace(u'_', ' ') if not linkInfo.group(1) or linkInfo.group(1).lower == u'en': link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia else: link.setLink(u'Wikipedia:' + linkInfo.group(1).lower + u':' + wikiPage) # Non-english Wikipedia if link.getLabel is None: link.setLabel(u'(Wikipedia)') return link addLinkFilter(wikipediaLinks)

Remove trailing slashes from internal links
def removeTrailingSlash(l, **kwargs): if l.getType != u'internal': return l     if l.getLink[-1] == '/': l.setLink(l.getLink[:-1]) return l addLinkFilter(removeTrailingSlash)

Template renaming
def templateRenameMapping(t, **kwargs): templateMap = { # Format goes like this (without the "#" in front obviously): #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'], # Last line has no comma at the end 'Crush': ['pngcrush'] }     for n in templateMap: if t.getName.lower in templateMap[n]: t.setName(n) return t addTemplateFilter(templateRenameMapping)

Remove useless templates
def removeUselessTemplate(t, **kwargs): if t.getName.lower in (u'targeted', u'languages'): return None # Delete template return t addTemplateFilter(removeUselessTemplate)

Filter parameters of certain templates
def templateParamFilter(t, **kwargs): params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter'] 'patch layout': ['before', 'after', 'current'], 'item infobox': ['released'] }     if t.getName.lower not in params: return t     for p in params[t.getName.lower]: if t.getParam(p): t.setParam(p, fixContent(t.getParam(p), **kwargs)) return t addTemplateFilter(templateParamFilter)

Remove obsolete parameters
def obsoleteParameterFilter(t, **kwargs): params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete'] }     if t.getName.lower not in params: return t     for p in params[t.getName.lower]: p = u(p) if p.find(u'#n') != -1: for i in range(10): t.delParam(p.replace(u'#n', str(i))) else: t.delParam(p) return t addTemplateFilter(obsoleteParameterFilter)

Implement Dictionary
class DictionaryUpdater: def __init__(self): self.subpageTemplateLang = """  \n: Note: Any changes made here will be automatically overwritten by a bot. Please do not make changes here as they will be lost. Edit the master page instead.\n:%missing% """ self.subpageTemplateParam = """  \n: Note: Any changes made here will be automatically overwritten by a bot. Please do not make changes here as they will be lost. Edit the master page instead. """ self.invalidParamError = """ \n: Error: Invalid parameter passed. """ self.subpageTemplateID = """%string%  \n: Note: Any changes made here will be automatically overwritten by a bot. Please do not make changes here as they will be lost. Edit the master page instead. """ self.dictionaries = { u'Template:Dictionary/items': { # Dictionary page 'name': 'items', # Dictionary name (used for categorizing) 'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data },             u'Template:Dictionary/common strings': { # Warning: no underscore 'name': 'common strings', 'sync': 'Template:Dictionary/common strings/Special:SyncData' },             u'Template:Dictionary/price': { 'name': 'price', 'sync': 'Template:Dictionary/price/Special:SyncData' },             u'Template:Dictionary/mechanics': { 'name': 'mechanics', 'sync': 'Template:Dictionary/mechanics/Special:SyncData' },             u'Template:Dictionary/characters': { 'name': 'characters', 'sync': 'Template:Dictionary/characters/Special:SyncData' },             u'Template:Dictionary/demonstration': { 'name': 'demonstration', 'sync': 'Template:Dictionary/demonstration/Special:SyncData' }         }          self.subpageSeparator = u'/' # List of supported languages, in prefered order self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant'] self.defaultLang = u'en' self.filterName = u'Your friendly neighborhood dictionary updater' self.commentsExtract = compileRegex(r'') self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([ \t]*[^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE) self.translationExtract = compileRegex(r'^[ \t]*([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE) addWhitelistPage(self.dictionaries.keys) def generateSubpage(self, keyName, data, currentDict, syncData): h = hashlib.md5 if type(data) is type({}): # Subkeys (translations or not) isTranslation = True subpage = u(self.subpageTemplateLang) for k in data: if k not in self.languages: isTranslation = False subpage = u(self.subpageTemplateParam) break ordered = [] if isTranslation: missing = [] for lang in self.languages: if lang in data: ordered.append(lang + u'=' + data[lang]) h.update((lang + u'=' + data[lang]).encode('utf8')) else: missing.append(lang) h.update((u'null-' + lang).encode('utf8')) if self.defaultLang in data: ordered.append(u'#default=' + data[self.defaultLang]) if len(missing): subpage = subpage.replace(u'%missing%', u"Languages missing: " + u', '.join(missing)) else: subpage = subpage.replace(u'%missing%', u"Supported languages: All" ) else: # Not a translation h.update('Any-') subkeys = data.keys subkeys.sort for k in subkeys: ordered.append(k + u'=' + data[k]) h.update((k + u'=' + data[k]).encode('utf8')) #ordered.append(u'#default=' + u(self.invalidParamError)) subpage = subpage.replace(u'%options%', u'|'.join(ordered)) else: # No subkeys data = u(data) subpage = self.subpageTemplateID h.update(u(u'ID-' + data).encode('utf8')) subpage = subpage.replace(u'%string%', data) h = u(h.hexdigest) if keyName in syncData and syncData[keyName] == h:             return # Same hash syncData[keyName] = h # Update sync data subpage = subpage.replace(u'%dictionary%', currentDict) subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name']) subpage = subpage.replace(u'%keyname%', keyName) editPage(currentDict + self.subpageSeparator + keyName, subpage, summary= u'Pushed changes from ' + currentDict + u' for string "' + keyName + u'".', minor=True, nocreate=False) def processComment(self, commentString, currentDict, definedStrings, syncData): commentContents = [] for extractedStr in self.stringsExtract.finditer(commentString): comment = u'' if extractedStr.group(1): comment = u'# ' + u(extractedStr.group(1)) + u'\n' dataString = u(extractedStr.group(3)) if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys data = dataString.strip dataWriteback = u' ' + data else: # There's subkeys; detect whether this is a translation or not data = {} isTranslation = True for translation in self.translationExtract.finditer(dataString.strip): data[u(translation.group(1))] = u(translation.group(2)) if u(translation.group(1)) not in self.languages: isTranslation = False ordered = [] if isTranslation: for lang in self.languages: if lang in data: ordered.append(u' ' + lang + u': ' + data[lang]) else: # Not a translation, so order in alphabetical order subkeys = data.keys subkeys.sort for subk in subkeys: ordered.append(u' ' + subk + u': ' + data[subk]) dataWriteback = u'\n' + u'\n'.join(ordered) keyNames = u(extractedStr.group(2)).lower.split(u'|') validKeyNames = [] for keyName in keyNames: keyName = keyName.replace(u'_', u' ').strip if keyName in definedStrings: continue # Duplicate key definedStrings.append(keyName) validKeyNames.append(keyName) self.generateSubpage(keyName, data, currentDict, syncData) if len(validKeyNames): commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback) return u'\n\n'.join(commentContents) def __call__(self, content, **kwargs): if 'article' not in kwargs: return content if u(kwargs['article'].title) not in self.dictionaries: return content currentDict = u(kwargs['article'].title) syncPage = page(self.dictionaries[currentDict]['sync']) try: syncDataText = u(syncPage.getWikiText).split(u'\n') except: # Page probably doesn't exist syncDataText = u'' syncData = {} for sync in syncDataText: sync = u(sync.strip) if not sync: continue sync = sync.split(u':', 2) if len(sync) == 2: syncData[sync[0]] = sync[1] oldSyncData = syncData.copy newContent = u'' previousIndex = 0 definedStrings = [] for comment in self.commentsExtract.finditer(content): newContent += content[previousIndex:comment.start] previousIndex = comment.end # Process current comment newContent += u'' newContent += content[previousIndex:] # Check if we need to update sync data needUpdate = False for k in syncData: if k not in oldSyncData or oldSyncData[k] != syncData[k]: needUpdate = True break # Check for deleted strings for k in oldSyncData: if k not in definedStrings: try: deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from ' + currentDict + u'.') except: pass if k in syncData: del syncData[k] needUpdate = True if needUpdate: # Build syncdata string representation syncKeys = syncData.keys syncKeys.sort syncLines = [] for k in syncKeys: syncLines.append(k + u':' + syncData[k]) editPage(syncPage, u'\n'.join(syncLines), summary= u'Updated synchronization information for ' + currentDict + u'.', minor=True, nocreate=False) return newContent addFilter(DictionaryUpdater)

PNGCrush/jpegtran all PNG/JPG images
class imageCrushFilter: def __init__(self): self.minRatio = 10 # Compression ratio threshold self.minByteDiff = 2048 # Byte difference threshold self.jpgScanMap = u'0:  0  0 0 0 ;1 2: 0  0 0 0 ;0:   1  8 0 2 ;1:   1  8 0 0 ;2:   1  8 0 0 ;0:   9 63 0 2 ;0:   1 63 2 1 ;0:   1 63 1 0 ;1:   9 63 0 0 ;2:   9 63 0 0 ;'.replace(u';', u';\n') self.filterName = 'Saved crush information' self.extractHash = compileRegex(r'\{\{(?:png)?crush\s*\|\s*(\w+?)\s*\|\s*(\w+?)\s*}}') try: subprocess.call(['pngcrush', '-version']) self.pngenabled = True except: print 'Warning: PNGCrush is not installed or not in $PATH' self.pngenabled = False try: subprocess.call(['jpegtran', '-h']) self.jpgenabled = True except: print 'Warning: jpegtran is not installed or not in $PATH' self.jpgenabled = False def getRandBits(self): return random.getrandbits(128) def getFileHash(self, filename): h = hashlib.md5 f = open(filename, 'rb') for i in f.readlines: h.update(i) f.close return u(h.hexdigest) def deleteFile(self, *fs): for f in fs: try: os.remove(tempFile) except: pass def __call__(self, content, article, **kwargs): title = u(article.title).lower if title[-4:] == '.png': isPNG = True if not self.pngenabled: return content elif title[-5:] == '.jpeg' or title[-4:] == '.jpg': isPNG = False if not self.jpgenabled: return content else: return content try: # This is a high-risk filter, lots of I/O, so wrap it in a big try filePage = wikitools.wikifile.File(wiki, article.title) hashes = [u, u] hashResult = self.extractHash.search(content) hashTemplate = None if hashResult: hashes = [u(hashResult.group(1)), u(hashResult.group(2))] hashTemplate = u'' tempFile = getTempFilename filePage.download(location=tempFile, urlQuery=u(self.getRandBits)) oldHash = self.getFileHash(tempFile) if oldHash in hashes: return content # Already worked on that one hashTemplate = u'' tempOutput = getTempFilename if isPNG: result = subprocess.call(['pngcrush', '-rem', 'gAMA', '-rem', 'cHRM', '-rem', 'iCCP', '-rem', 'sRGB', '-brute', tempFile, tempOutput]) else: mapFile = getTempFilename mapFileHandle = open(mapFile, 'wb') mapFileHandle.write(self.jpgScanMap.encode('ascii')) # Onoz ASCII mapFileHandle.close result = subprocess.call(['jpegtran', '-o', '-scans', mapFile, '-copy', 'none', '-progressive', '-outfile', tempOutput, tempFile]) self.deleteFile(mapFile) oldSize = os.path.getsize(tempFile) newSize = os.path.getsize(tempOutput) self.deleteFile(tempFile) if not result and oldSize > newSize: # Ready to upload... or are we? ratio = int(round(100 * (1.0 - float(newSize) / float(oldSize)))) if ratio >= self.minRatio or oldSize - newSize >= self.minByteDiff: newHash = self.getFileHash(tempOutput) if newHash in hashes: self.deleteFile(tempOutput) return content # Already got that result, no need to reupload hashTemplate = u'' uploadFile(tempOutput, u(article.title), u'Crushed version: ' + u(ratio) + u'% reduction / ' + u(oldSize - newSize) + u' bytes saved; from ' + u(oldSize) + u' to ' + u(newSize) + u' bytes.', overwrite=True, reupload=True) hashes = [oldHash, newHash] if hashResult: content = content[:hashResult.start] + hashTemplate + content[hashResult.end:] else: content = content.strip + u'\n\n' + hashTemplate self.deleteFile(tempOutput) except: pass # Well, that didn't work return content addFileFilter(imageCrushFilter)