Bureaucrats, coding, Administrators
2,634
edits
(Created page with "{{User:WindBOT/Header}} == How to disable a filter == If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting d...") |
No edit summary |
||
Line 15: | Line 15: | ||
__TOC__ | __TOC__ | ||
== Page filters == | |||
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):') | |||
== Semantic filters == | |||
None yet~ | |||
== Language-specific filters == | |||
None yet~ | |||
== Link filters == | |||
=== Wikipedia links filter === | |||
def wikipediaLinks(link, **kwargs): | |||
wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)') | |||
if link.getType() == u'external': | |||
linkInfo = wikipediaRegex.search(link.getLink()) | |||
if linkInfo: | |||
link.setType(u'internal') | |||
try: | |||
wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ') | |||
except: | |||
wikiPage = u(linkInfo.group(2)).replace(u'_', ' ') | |||
if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en': | |||
link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia | |||
else: | |||
link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia | |||
if link.getLabel() is None: | |||
link.setLabel(u'(Wikipedia)') | |||
return link | |||
addLinkFilter(wikipediaLinks) | |||
=== Remove trailing slashes from internal links === | |||
def removeTrailingSlash(l, **kwargs): | |||
if l.getType() != u'internal': | |||
return l | |||
if l.getLink()[-1] == '/': | |||
l.setLink(l.getLink()[:-1]) | |||
return l | |||
addLinkFilter(removeTrailingSlash) | |||
== Template filters == | |||
=== Template renaming === | |||
def templateRenameMapping(t, **kwargs): | |||
templateMap = { | |||
# Format goes like this (without the "#" in front obviously): | |||
#'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'], | |||
# Last line has no comma at the end | |||
'Crush': ['pngcrush'] | |||
} | |||
for n in templateMap: | |||
if t.getName().lower() in templateMap[n]: | |||
t.setName(n) | |||
return t | |||
addTemplateFilter(templateRenameMapping) | |||
=== Remove useless templates === | |||
def removeUselessTemplate(t, **kwargs): | |||
if t.getName().lower() in (u'targeted', u'languages'): | |||
return None # Delete template | |||
return t | |||
addTemplateFilter(removeUselessTemplate) | |||
=== Filter parameters of certain templates === | |||
def templateParamFilter(t, **kwargs): | |||
params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter'] | |||
'patch layout': ['before', 'after', 'current'], | |||
'item infobox': ['released'] | |||
} | |||
if t.getName().lower() not in params: | |||
return t | |||
for p in params[t.getName().lower()]: | |||
if t.getParam(p): | |||
t.setParam(p, fixContent(t.getParam(p), **kwargs)) | |||
return t | |||
addTemplateFilter(templateParamFilter) | |||
=== Remove obsolete parameters === | |||
def obsoleteParameterFilter(t, **kwargs): | |||
params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete'] | |||
} | |||
if t.getName().lower() not in params: | |||
return t | |||
for p in params[t.getName().lower()]: | |||
p = u(p) | |||
if p.find(u'#n') != -1: | |||
for i in range(10): | |||
t.delParam(p.replace(u'#n', str(i))) | |||
else: | |||
t.delParam(p) | |||
return t | |||
addTemplateFilter(obsoleteParameterFilter) | |||
=== Implement {{tl|Dictionary}} === | |||
class DictionaryUpdater: | |||
def __init__(self): | |||
self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki> | |||
self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki> | |||
self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Error''': Invalid parameter passed.</div>"""</nowiki> | |||
self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki> | |||
self.dictionaries = { | |||
u'Template:Dictionary/items': { # Dictionary page | |||
'name': 'items', # Dictionary name (used for categorizing) | |||
'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data | |||
}, | |||
u'Template:Dictionary/common strings': { # Warning: no underscore | |||
'name': 'common strings', | |||
'sync': 'Template:Dictionary/common strings/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/price': { | |||
'name': 'price', | |||
'sync': 'Template:Dictionary/price/Special:SyncData' | |||
} | |||
} | |||
self.subpageSeparator = u'/' | |||
# List of supported languages, in prefered order | |||
self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant'] | |||
self.defaultLang = u'en' | |||
self.filterName = u'Your friendly neighborhood dictionary updater' | |||
self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->') | |||
self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([ \t]*[^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE) | |||
self.translationExtract = compileRegex(r'^[ \t]*([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE) | |||
addWhitelistPage(self.dictionaries.keys()) | |||
def generateSubpage(self, keyName, data, currentDict, syncData): | |||
h = hashlib.md5() | |||
if type(data) is type({}): # Subkeys (translations or not) | |||
isTranslation = True | |||
subpage = u(self.subpageTemplateLang) | |||
for k in data: | |||
if k not in self.languages: | |||
isTranslation = False | |||
subpage = u(self.subpageTemplateParam) | |||
break | |||
ordered = [] | |||
if isTranslation: | |||
missing = [] | |||
for lang in self.languages: | |||
if lang in data: | |||
ordered.append(lang + u'=' + data[lang]) | |||
h.update((lang + u'=' + data[lang]).encode('utf8')) | |||
else: | |||
missing.append(lang) | |||
h.update((u'null-' + lang).encode('utf8')) | |||
if self.defaultLang in data: | |||
ordered.append(u'#default=' + data[self.defaultLang]) | |||
if len(missing): | |||
subpage = subpage.replace(u'%missing%', <nowiki>u"'''Languages missing''': "</nowiki> + u', '.join(missing)) | |||
else: | |||
subpage = subpage.replace(u'%missing%', <nowiki>u"'''Supported languages''': All"</nowiki>) | |||
else: # Not a translation | |||
h.update('Any-') | |||
subkeys = data.keys() | |||
subkeys.sort() | |||
for k in subkeys: | |||
ordered.append(k + u'=' + data[k]) | |||
h.update((k + u'=' + data[k]).encode('utf8')) | |||
#ordered.append(u'#default=' + u(self.invalidParamError)) | |||
subpage = subpage.replace(u'%options%', u'|'.join(ordered)) | |||
else: # No subkeys | |||
data = u(data) | |||
subpage = self.subpageTemplateID | |||
h.update(u(u'ID-' + data).encode('utf8')) | |||
subpage = subpage.replace(u'%string%', data) | |||
h = u(h.hexdigest()) | |||
if keyName in syncData and syncData[keyName] == h: | |||
return # Same hash | |||
syncData[keyName] = h # Update sync data | |||
subpage = subpage.replace(u'%dictionary%', currentDict) | |||
subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name']) | |||
subpage = subpage.replace(u'%keyname%', keyName) | |||
editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False) | |||
def processComment(self, commentString, currentDict, definedStrings, syncData): | |||
commentContents = [] | |||
for extractedStr in self.stringsExtract.finditer(commentString): | |||
comment = u'' | |||
if extractedStr.group(1): | |||
comment = u'# ' + u(extractedStr.group(1)) + u'\n' | |||
dataString = u(extractedStr.group(3)) | |||
if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys | |||
data = dataString.strip() | |||
dataWriteback = u' ' + data | |||
else: # There's subkeys; detect whether this is a translation or not | |||
data = {} | |||
isTranslation = True | |||
for translation in self.translationExtract.finditer(dataString.strip()): | |||
data[u(translation.group(1))] = u(translation.group(2)) | |||
if u(translation.group(1)) not in self.languages: | |||
isTranslation = False | |||
ordered = [] | |||
if isTranslation: | |||
for lang in self.languages: | |||
if lang in data: | |||
ordered.append(u' ' + lang + u': ' + data[lang]) | |||
else: # Not a translation, so order in alphabetical order | |||
subkeys = data.keys() | |||
subkeys.sort() | |||
for subk in subkeys: | |||
ordered.append(u' ' + subk + u': ' + data[subk]) | |||
dataWriteback = u'\n' + u'\n'.join(ordered) | |||
keyNames = u(extractedStr.group(2)).lower().split(u'|') | |||
validKeyNames = [] | |||
for keyName in keyNames: | |||
keyName = keyName.replace(u'_', u' ').strip() | |||
if keyName in definedStrings: | |||
continue # Duplicate key | |||
definedStrings.append(keyName) | |||
validKeyNames.append(keyName) | |||
self.generateSubpage(keyName, data, currentDict, syncData) | |||
if len(validKeyNames): | |||
commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback) | |||
return u'\n\n'.join(commentContents) | |||
def __call__(self, content, **kwargs): | |||
if 'article' not in kwargs: | |||
return content | |||
if u(kwargs['article'].title) not in self.dictionaries: | |||
return content | |||
currentDict = u(kwargs['article'].title) | |||
syncPage = page(self.dictionaries[currentDict]['sync']) | |||
try: | |||
syncDataText = u(syncPage.getWikiText()).split(u'\n') | |||
except: # Page probably doesn't exist | |||
syncDataText = u'' | |||
syncData = {} | |||
for sync in syncDataText: | |||
sync = u(sync.strip()) | |||
if not sync: | |||
continue | |||
sync = sync.split(u':', 2) | |||
if len(sync) == 2: | |||
syncData[sync[0]] = sync[1] | |||
oldSyncData = syncData.copy() | |||
newContent = u'' | |||
previousIndex = 0 | |||
definedStrings = [] | |||
for comment in self.commentsExtract.finditer(content): | |||
newContent += content[previousIndex:comment.start()] | |||
previousIndex = comment.end() | |||
# Process current comment | |||
newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->' | |||
newContent += content[previousIndex:] | |||
# Check if we need to update sync data | |||
needUpdate = False | |||
for k in syncData: | |||
if k not in oldSyncData or oldSyncData[k] != syncData[k]: | |||
needUpdate = True | |||
break | |||
# Check for deleted strings | |||
for k in oldSyncData: | |||
if k not in definedStrings: | |||
try: | |||
deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].') | |||
except: | |||
pass | |||
if k in syncData: | |||
del syncData[k] | |||
needUpdate = True | |||
if needUpdate: | |||
# Build syncdata string representation | |||
syncKeys = syncData.keys() | |||
syncKeys.sort() | |||
syncLines = [] | |||
for k in syncKeys: | |||
syncLines.append(k + u':' + syncData[k]) | |||
editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False) | |||
return newContent | |||
addFilter(DictionaryUpdater()) | |||
== File filters == | |||
=== [http://en.wikipedia.org/wiki/Pngcrush PNGCrush]/[http://jpegclub.org/ jpegtran] all PNG/JPG images === | |||
class imageCrushFilter: | |||
def __init__(self): | |||
self.minRatio = 10 # Compression ratio threshold | |||
self.minByteDiff = 2048 # Byte difference threshold | |||
self.jpgScanMap = u'0: 0 0 0 0 ;1 2: 0 0 0 0 ;0: 1 8 0 2 ;1: 1 8 0 0 ;2: 1 8 0 0 ;0: 9 63 0 2 ;0: 1 63 2 1 ;0: 1 63 1 0 ;1: 9 63 0 0 ;2: 9 63 0 0 ;'.replace(u';', u';\n') | |||
self.filterName = 'Saved crush information' | |||
self.extractHash = compileRegex(r'\{\{(?:png)?crush\s*\|\s*(\w+?)\s*\|\s*(\w+?)\s*}}') | |||
try: | |||
subprocess.call(['pngcrush', '-version']) | |||
self.pngenabled = True | |||
except: | |||
print 'Warning: PNGCrush is not installed or not in $PATH' | |||
self.pngenabled = False | |||
try: | |||
subprocess.call(['jpegtran', '-h']) | |||
self.jpgenabled = True | |||
except: | |||
print 'Warning: jpegtran is not installed or not in $PATH' | |||
self.jpgenabled = False | |||
def getRandBits(self): | |||
return random.getrandbits(128) | |||
def getFileHash(self, filename): | |||
h = hashlib.md5() | |||
f = open(filename, 'rb') | |||
for i in f.readlines(): | |||
h.update(i) | |||
f.close() | |||
return u(h.hexdigest()) | |||
def deleteFile(self, *fs): | |||
for f in fs: | |||
try: | |||
os.remove(tempFile) | |||
except: | |||
pass | |||
def __call__(self, content, article, **kwargs): | |||
title = u(article.title).lower() | |||
if title[-4:] == '.png': | |||
isPNG = True | |||
if not self.pngenabled: | |||
return content | |||
elif title[-5:] == '.jpeg' or title[-4:] == '.jpg': | |||
isPNG = False | |||
if not self.jpgenabled: | |||
return content | |||
else: | |||
return content | |||
try: # This is a high-risk filter, lots of I/O, so wrap it in a big try | |||
filePage = wikitools.wikifile.File(wiki(), article.title) | |||
hashes = [u'', u''] | |||
hashResult = self.extractHash.search(content) | |||
hashTemplate = None | |||
if hashResult: | |||
hashes = [u(hashResult.group(1)), u(hashResult.group(2))] | |||
hashTemplate = <nowiki>u'{{crush|' + hashes[0] + u'|' + hashes[1] + u'}}'</nowiki> | |||
tempFile = getTempFilename() | |||
filePage.download(location=tempFile, urlQuery=u(self.getRandBits())) | |||
oldHash = self.getFileHash(tempFile) | |||
if oldHash in hashes: | |||
return content # Already worked on that one | |||
hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|None}}'</nowiki> | |||
tempOutput = getTempFilename() | |||
if isPNG: | |||
result = subprocess.call(['pngcrush', '-rem', 'gAMA', '-rem', 'cHRM', '-rem', 'iCCP', '-rem', 'sRGB', '-brute', tempFile, tempOutput]) | |||
else: | |||
mapFile = getTempFilename() | |||
mapFileHandle = open(mapFile, 'wb') | |||
mapFileHandle.write(self.jpgScanMap.encode('ascii')) # Onoz ASCII | |||
mapFileHandle.close() | |||
result = subprocess.call(['jpegtran', '-o', '-scans', mapFile, '-copy', 'none', '-progressive', '-outfile', tempOutput, tempFile]) | |||
self.deleteFile(mapFile) | |||
oldSize = os.path.getsize(tempFile) | |||
newSize = os.path.getsize(tempOutput) | |||
self.deleteFile(tempFile) | |||
if not result and oldSize > newSize: | |||
# Ready to upload... or are we? | |||
ratio = int(round(100 * (1.0 - float(newSize) / float(oldSize)))) | |||
if ratio >= self.minRatio or oldSize - newSize >= self.minByteDiff: | |||
newHash = self.getFileHash(tempOutput) | |||
if newHash in hashes: | |||
self.deleteFile(tempOutput) | |||
return content # Already got that result, no need to reupload | |||
hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|' + newHash + u'}}'</nowiki> | |||
uploadFile(tempOutput, u(article.title), u'Crushed version: ' + u(ratio) + u'% reduction / ' + u(oldSize - newSize) + u' bytes saved; from ' + u(oldSize) + u' to ' + u(newSize) + u' bytes.', overwrite=True, reupload=True) | |||
hashes = [oldHash, newHash] | |||
if hashResult: | |||
content = content[:hashResult.start()] + hashTemplate + content[hashResult.end():] | |||
else: | |||
content = content.strip() + u'\n\n' + hashTemplate | |||
self.deleteFile(tempOutput) | |||
except: | |||
pass # Well, that didn't work | |||
return content | |||
addFileFilter(imageCrushFilter()) |