User:WindBOT/Filters: Difference between revisions

Line 15:

__TOC__

No filters yet!

== Page filters ==

addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')

== Semantic filters ==

None yet~

== Language-specific filters ==

None yet~

== Link filters ==

=== Wikipedia links filter ===

def wikipediaLinks(link, **kwargs):

wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)')

if link.getType() == u'external':

linkInfo = wikipediaRegex.search(link.getLink())

if linkInfo:

link.setType(u'internal')

try:

wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ')

except:

wikiPage = u(linkInfo.group(2)).replace(u'_', ' ')

if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en':

link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia

else:

link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia

if link.getLabel() is None:

link.setLabel(u'(Wikipedia)')

return link

addLinkFilter(wikipediaLinks)

=== Remove trailing slashes from internal links ===

def removeTrailingSlash(l, **kwargs):

if l.getType() != u'internal':

return l

if l.getLink()[-1] == '/':

l.setLink(l.getLink()[:-1])

return l

addLinkFilter(removeTrailingSlash)

== Template filters ==

=== Template renaming ===

def templateRenameMapping(t, **kwargs):

templateMap = {

# Format goes like this (without the "#" in front obviously):

#'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'],

# Last line has no comma at the end

'Crush': ['pngcrush']

}

for n in templateMap:

if t.getName().lower() in templateMap[n]:

t.setName(n)

return t

addTemplateFilter(templateRenameMapping)

=== Remove useless templates ===

def removeUselessTemplate(t, **kwargs):

if t.getName().lower() in (u'targeted', u'languages'):

return None # Delete template

return t

addTemplateFilter(removeUselessTemplate)

=== Filter parameters of certain templates ===

def templateParamFilter(t, **kwargs):

params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter']

'patch layout': ['before', 'after', 'current'],

'item infobox': ['released']

}

if t.getName().lower() not in params:

return t

for p in params[t.getName().lower()]:

if t.getParam(p):

t.setParam(p, fixContent(t.getParam(p), **kwargs))

return t

addTemplateFilter(templateParamFilter)

=== Remove obsolete parameters ===

def obsoleteParameterFilter(t, **kwargs):

params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete']

}

if t.getName().lower() not in params:

return t

for p in params[t.getName().lower()]:

p = u(p)

if p.find(u'#n') != -1:

for i in range(10):

t.delParam(p.replace(u'#n', str(i)))

else:

t.delParam(p)

return t

addTemplateFilter(obsoleteParameterFilter)

=== Implement {{tl|Dictionary}} ===

class DictionaryUpdater:

def __init__(self):

self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>

self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>

self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Error''': Invalid parameter passed.</div>"""</nowiki>

self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki>

self.dictionaries = {

u'Template:Dictionary/items': { # Dictionary page

'name': 'items', # Dictionary name (used for categorizing)

'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data

},

u'Template:Dictionary/common strings': { # Warning: no underscore

'name': 'common strings',

'sync': 'Template:Dictionary/common strings/Special:SyncData'

},

u'Template:Dictionary/price': {

'name': 'price',

'sync': 'Template:Dictionary/price/Special:SyncData'

}

self.subpageSeparator = u'/'

# List of supported languages, in prefered order

self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant']

self.defaultLang = u'en'

self.filterName = u'Your friendly neighborhood dictionary updater'

self.commentsExtract = compileRegex(r'')

self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([ \t]*[^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE)

self.translationExtract = compileRegex(r'^[ \t]*([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE)

addWhitelistPage(self.dictionaries.keys())

def generateSubpage(self, keyName, data, currentDict, syncData):

h = hashlib.md5()

if type(data) is type({}): # Subkeys (translations or not)

isTranslation = True

subpage = u(self.subpageTemplateLang)

for k in data:

if k not in self.languages:

isTranslation = False

subpage = u(self.subpageTemplateParam)

break

ordered = []

if isTranslation:

missing = []

for lang in self.languages:

if lang in data:

ordered.append(lang + u'=' + data[lang])

h.update((lang + u'=' + data[lang]).encode('utf8'))

else:

missing.append(lang)

h.update((u'null-' + lang).encode('utf8'))

if self.defaultLang in data:

ordered.append(u'#default=' + data[self.defaultLang])

if len(missing):

subpage = subpage.replace(u'%missing%', <nowiki>u"'''Languages missing''': "</nowiki> + u', '.join(missing))

else:

subpage = subpage.replace(u'%missing%', <nowiki>u"'''Supported languages''': All"</nowiki>)

else: # Not a translation

h.update('Any-')

subkeys = data.keys()

subkeys.sort()

for k in subkeys:

ordered.append(k + u'=' + data[k])

h.update((k + u'=' + data[k]).encode('utf8'))

#ordered.append(u'#default=' + u(self.invalidParamError))

subpage = subpage.replace(u'%options%', u'|'.join(ordered))

else: # No subkeys

data = u(data)

subpage = self.subpageTemplateID

h.update(u(u'ID-' + data).encode('utf8'))

subpage = subpage.replace(u'%string%', data)

h = u(h.hexdigest())

if keyName in syncData and syncData[keyName] == h:

return # Same hash

syncData[keyName] = h # Update sync data

subpage = subpage.replace(u'%dictionary%', currentDict)

subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name'])

subpage = subpage.replace(u'%keyname%', keyName)

editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False)

def processComment(self, commentString, currentDict, definedStrings, syncData):

commentContents = []

for extractedStr in self.stringsExtract.finditer(commentString):

comment = u''

if extractedStr.group(1):

comment = u'# ' + u(extractedStr.group(1)) + u'\n'

dataString = u(extractedStr.group(3))

if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys

data = dataString.strip()

dataWriteback = u' ' + data

else: # There's subkeys; detect whether this is a translation or not

data = {}

isTranslation = True

for translation in self.translationExtract.finditer(dataString.strip()):

data[u(translation.group(1))] = u(translation.group(2))

if u(translation.group(1)) not in self.languages:

isTranslation = False

ordered = []

if isTranslation:

for lang in self.languages:

if lang in data:

ordered.append(u' ' + lang + u': ' + data[lang])

else: # Not a translation, so order in alphabetical order

subkeys = data.keys()

subkeys.sort()

for subk in subkeys:

ordered.append(u' ' + subk + u': ' + data[subk])

dataWriteback = u'\n' + u'\n'.join(ordered)

keyNames = u(extractedStr.group(2)).lower().split(u'|')

validKeyNames = []

for keyName in keyNames:

keyName = keyName.replace(u'_', u' ').strip()

if keyName in definedStrings:

continue # Duplicate key

definedStrings.append(keyName)

validKeyNames.append(keyName)

self.generateSubpage(keyName, data, currentDict, syncData)

if len(validKeyNames):

commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback)

return u'\n\n'.join(commentContents)

def __call__(self, content, **kwargs):

if 'article' not in kwargs:

return content

if u(kwargs['article'].title) not in self.dictionaries:

return content

currentDict = u(kwargs['article'].title)

syncPage = page(self.dictionaries[currentDict]['sync'])

try:

syncDataText = u(syncPage.getWikiText()).split(u'\n')

except: # Page probably doesn't exist

syncDataText = u''

syncData = {}

for sync in syncDataText:

sync = u(sync.strip())

if not sync:

continue

sync = sync.split(u':', 2)

if len(sync) == 2:

syncData[sync[0]] = sync[1]

oldSyncData = syncData.copy()

newContent = u''

previousIndex = 0

definedStrings = []

for comment in self.commentsExtract.finditer(content):

newContent += content[previousIndex:comment.start()]

previousIndex = comment.end()

# Process current comment

newContent += u''

newContent += content[previousIndex:]

# Check if we need to update sync data

needUpdate = False

for k in syncData:

if k not in oldSyncData or oldSyncData[k] != syncData[k]:

needUpdate = True

break

# Check for deleted strings

for k in oldSyncData:

if k not in definedStrings:

try:

deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].')

except:

pass

if k in syncData:

del syncData[k]

needUpdate = True

if needUpdate:

# Build syncdata string representation

syncKeys = syncData.keys()

syncKeys.sort()

syncLines = []

for k in syncKeys:

syncLines.append(k + u':' + syncData[k])

editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False)

return newContent

addFilter(DictionaryUpdater())

== File filters ==

=== [http://en.wikipedia.org/wiki/Pngcrush PNGCrush]/[http://jpegclub.org/ jpegtran] all PNG/JPG images ===

class imageCrushFilter:

def __init__(self):

self.minRatio = 10 # Compression ratio threshold

self.minByteDiff = 2048 # Byte difference threshold

self.jpgScanMap = u'0: 0 0 0 0 ;1 2: 0 0 0 0 ;0: 1 8 0 2 ;1: 1 8 0 0 ;2: 1 8 0 0 ;0: 9 63 0 2 ;0: 1 63 2 1 ;0: 1 63 1 0 ;1: 9 63 0 0 ;2: 9 63 0 0 ;'.replace(u';', u';\n')

self.filterName = 'Saved crush information'

self.extractHash = compileRegex(r'\{\{(?:png)?crush\s*\|\s*(\w+?)\s*\|\s*(\w+?)\s*}}')

try:

subprocess.call(['pngcrush', '-version'])

self.pngenabled = True

except:

print 'Warning: PNGCrush is not installed or not in $PATH'

self.pngenabled = False

try:

subprocess.call(['jpegtran', '-h'])

self.jpgenabled = True

except:

print 'Warning: jpegtran is not installed or not in $PATH'

self.jpgenabled = False

def getRandBits(self):

return random.getrandbits(128)

def getFileHash(self, filename):

h = hashlib.md5()

f = open(filename, 'rb')

for i in f.readlines():

h.update(i)

f.close()

return u(h.hexdigest())

def deleteFile(self, *fs):

for f in fs:

try:

os.remove(tempFile)

except:

pass

def __call__(self, content, article, **kwargs):

title = u(article.title).lower()

if title[-4:] == '.png':

isPNG = True

if not self.pngenabled:

return content

elif title[-5:] == '.jpeg' or title[-4:] == '.jpg':

isPNG = False

if not self.jpgenabled:

return content

else:

return content

try: # This is a high-risk filter, lots of I/O, so wrap it in a big try

filePage = wikitools.wikifile.File(wiki(), article.title)

hashes = [u'', u'']

hashResult = self.extractHash.search(content)

hashTemplate = None

if hashResult:

hashes = [u(hashResult.group(1)), u(hashResult.group(2))]

hashTemplate = <nowiki>u'{{crush|' + hashes[0] + u'|' + hashes[1] + u'}}'</nowiki>

tempFile = getTempFilename()

filePage.download(location=tempFile, urlQuery=u(self.getRandBits()))

oldHash = self.getFileHash(tempFile)

if oldHash in hashes:

return content # Already worked on that one

hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|None}}'</nowiki>

tempOutput = getTempFilename()

if isPNG:

result = subprocess.call(['pngcrush', '-rem', 'gAMA', '-rem', 'cHRM', '-rem', 'iCCP', '-rem', 'sRGB', '-brute', tempFile, tempOutput])

else:

mapFile = getTempFilename()

mapFileHandle = open(mapFile, 'wb')

mapFileHandle.write(self.jpgScanMap.encode('ascii')) # Onoz ASCII

mapFileHandle.close()

result = subprocess.call(['jpegtran', '-o', '-scans', mapFile, '-copy', 'none', '-progressive', '-outfile', tempOutput, tempFile])

self.deleteFile(mapFile)

oldSize = os.path.getsize(tempFile)

newSize = os.path.getsize(tempOutput)

self.deleteFile(tempFile)

if not result and oldSize > newSize:

# Ready to upload... or are we?

ratio = int(round(100 * (1.0 - float(newSize) / float(oldSize))))

if ratio >= self.minRatio or oldSize - newSize >= self.minByteDiff:

newHash = self.getFileHash(tempOutput)

if newHash in hashes:

self.deleteFile(tempOutput)

return content # Already got that result, no need to reupload

hashTemplate = <nowiki>u'{{crush|' + oldHash + u'|' + newHash + u'}}'</nowiki>

uploadFile(tempOutput, u(article.title), u'Crushed version: ' + u(ratio) + u'% reduction / ' + u(oldSize - newSize) + u' bytes saved; from ' + u(oldSize) + u' to ' + u(newSize) + u' bytes.', overwrite=True, reupload=True)

hashes = [oldHash, newHash]

if hashResult:

content = content[:hashResult.start()] + hashTemplate + content[hashResult.end():]

else:

content = content.strip() + u'\n\n' + hashTemplate

self.deleteFile(tempOutput)

except:

pass # Well, that didn't work

return content

addFileFilter(imageCrushFilter())

User:WindBOT/Filters: Difference between revisions

Navigation menu

Search