Bureaucrats, coding, Administrators
2,634
edits
m (→Implement {{tl|Dictionary}}: backport) |
mNo edit summary |
||
Line 4: | Line 4: | ||
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. | If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. | ||
To make the bot ignore a certain line, add a "#" in front of it: | To make the bot ignore a certain line, add a "#" in front of it: | ||
# This line will be ignored | |||
If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''): | If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''): | ||
"""This line will be ignored | |||
and this one as well | |||
and this one is cake | |||
and the previous one was a lie but it was still ignored""" | |||
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. | If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. | ||
Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot]. | Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot]. | ||
Line 16: | Line 16: | ||
== Page filters == | == Page filters == | ||
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):') | |||
== Semantic filters == | == Semantic filters == | ||
Line 26: | Line 26: | ||
== Link filters == | == Link filters == | ||
=== Wikipedia links filter === | === Wikipedia links filter === | ||
def wikipediaLinks(link, **kwargs): | |||
wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)') | |||
if link.getType() == u'external': | |||
linkInfo = wikipediaRegex.search(link.getLink()) | |||
if linkInfo: | |||
link.setType(u'internal') | |||
try: | |||
wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ') | |||
except: | |||
wikiPage = u(linkInfo.group(2)).replace(u'_', ' ') | |||
if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en': | |||
link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia | |||
else: | |||
link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia | |||
if link.getLabel() is None: | |||
link.setLabel(u'(Wikipedia)') | |||
return link | |||
addLinkFilter(wikipediaLinks) | |||
=== Remove trailing slashes from internal links === | === Remove trailing slashes from internal links === | ||
def removeTrailingSlash(l, **kwargs): | |||
if l.getType() != u'internal': | |||
return l | |||
if l.getLink()[-1] == '/': | |||
l.setLink(l.getLink()[:-1]) | |||
return l | |||
addLinkFilter(removeTrailingSlash) | |||
== Template filters == | == Template filters == | ||
=== Template renaming === | === Template renaming === | ||
def templateRenameMapping(t, **kwargs): | |||
templateMap = { | |||
# Format goes like this (without the "#" in front obviously): | |||
#'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'], | |||
# Last line has no comma at the end | |||
'Crush': ['pngcrush'] | |||
} | |||
for n in templateMap: | |||
if t.getName().lower() in templateMap[n]: | |||
t.setName(n) | |||
return t | |||
addTemplateFilter(templateRenameMapping) | |||
=== Remove useless templates === | === Remove useless templates === | ||
def removeUselessTemplate(t, **kwargs): | |||
if t.getName().lower() in (u'targeted', u'languages'): | |||
return None # Delete template | |||
return t | |||
addTemplateFilter(removeUselessTemplate) | |||
=== Filter parameters of certain templates === | === Filter parameters of certain templates === | ||
def templateParamFilter(t, **kwargs): | |||
params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter'] | |||
'patch layout': ['before', 'after', 'current'], | |||
'item infobox': ['released'] | |||
} | |||
if t.getName().lower() not in params: | |||
return t | |||
for p in params[t.getName().lower()]: | |||
if t.getParam(p): | |||
t.setParam(p, fixContent(t.getParam(p), **kwargs)) | |||
return t | |||
addTemplateFilter(templateParamFilter) | |||
=== Remove obsolete parameters === | === Remove obsolete parameters === | ||
def obsoleteParameterFilter(t, **kwargs): | |||
params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete'] | |||
} | |||
if t.getName().lower() not in params: | |||
return t | |||
for p in params[t.getName().lower()]: | |||
p = u(p) | |||
if p.find(u'#n') != -1: | |||
for i in range(10): | |||
t.delParam(p.replace(u'#n', str(i))) | |||
else: | |||
t.delParam(p) | |||
return t | |||
addTemplateFilter(obsoleteParameterFilter) | |||
=== Implement {{tl|Dictionary}} === | === Implement {{tl|Dictionary}} === | ||
class DictionaryUpdater: | |||
def __init__(self): | |||
self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.\n:%missing%</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki> | |||
self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki> | |||
self.invalidParamError = <nowiki>"""<div style="font-size: 95%; color: #CC0000;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Error''': Invalid parameter passed.</div>"""</nowiki> | |||
self.subpageTemplateID = <nowiki>"""%string%<noinclude><hr style="margin: 1em 0em;" /><div style="font-size: 95%;">\n:[[File:Pictogram info.png|15px|text-top|link=]] '''Note''': Any changes made here will be automatically overwritten by a bot. Please ''do not'' make changes here as they will be lost. Edit '''[[:%dictionary%|the master page]]''' instead.</div>[[Category:Template dictionary|%dictionaryname%/%keyname%]]</noinclude>"""</nowiki> | |||
self.dictionaries = { | |||
u'Template:Dictionary/items': { # Dictionary page | |||
'name': 'items', # Dictionary name (used for categorizing) | |||
'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data | |||
}, | |||
u'Template:Dictionary/common strings': { # Warning: no underscore | |||
'name': 'common strings', | |||
'sync': 'Template:Dictionary/common strings/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/price': { | |||
'name': 'price', | |||
'sync': 'Template:Dictionary/price/Special:SyncData', | |||
'allTemplate': { | |||
'template': 'item price/fmt', | |||
'params': { | |||
'tt': <nowiki>'{{{tt|yes}}}'</nowiki> | |||
} | |||
} | |||
}, | |||
u'Template:Dictionary/mechanics': { | |||
'name': 'mechanics', | |||
'sync': 'Template:Dictionary/mechanics/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/characters': { | |||
'name': 'characters', | |||
'sync': 'Template:Dictionary/characters/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/demonstration': { | |||
'name': 'demonstration', | |||
'sync': 'Template:Dictionary/demonstration/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/transcripts': { | |||
'name': 'transcripts', | |||
'sync': 'Template:Dictionary/transcripts/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal achievements': { | |||
'name': 'portal achievements', | |||
'sync': 'Template:Dictionary/portal achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal: still alive achievements': { | |||
'name': 'portal: still alive achievements', | |||
'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal 2 achievements': { | |||
'name': 'portal 2 achievements', | |||
'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/audio': { | |||
'name': 'audio', | |||
'sync': 'Template:Dictionary/audio/Special:SyncData' | |||
} | |||
} | |||
self.subpageSeparator = u'/' | |||
# List of supported languages, in prefered order | |||
self.languages = [u'en', u'ar', u'cs', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ko', u'nl', u'no', u'pl', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'zh-hans', u'zh-hant'] | |||
self.defaultLang = u'en' | |||
self.allKeyName = u'_all_' | |||
self.filterName = u'Your friendly neighborhood dictionary updater' | |||
self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->') | |||
self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE) | |||
self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE) | |||
self.scheduler = BatchScheduler(16) | |||
addWhitelistPage(self.dictionaries.keys()) | |||
def generateSubpage(self, keyName, data, currentDict, syncData): | |||
h = hashlib.md5() | |||
if type(data) is type({}): # Subkeys (translations or not) | |||
isTranslation = True | |||
subpage = u(self.subpageTemplateLang) | |||
for k in data: | |||
if k not in self.languages: | |||
isTranslation = False | |||
subpage = u(self.subpageTemplateParam) | |||
break | |||
ordered = [] | |||
unordered = {} | |||
if isTranslation: | |||
missing = [] | |||
for lang in self.languages: | |||
if lang in data: | |||
ordered.append(lang + u'=' + data[lang]) | |||
unordered[lang] = data[lang] | |||
h.update((lang + u'=' + data[lang]).encode('utf8')) | |||
else: | |||
missing.append(lang) | |||
h.update((u'null-' + lang).encode('utf8')) | |||
if self.defaultLang in data: | |||
ordered.insert(0, u'#default=' + data[self.defaultLang]) | |||
if len(missing): | |||
subpage = subpage.replace(u'%missing%', <nowiki>u"Languages missing: "</nowiki> + u', '.join(missing)) | |||
else: | |||
subpage = subpage.replace(u'%missing%', <nowiki>u"Supported languages: all"</nowiki>) | |||
else: # Not a translation | |||
h.update('Any-') | |||
subkeys = data.keys() | |||
subkeys.sort() | |||
for k in subkeys: | |||
ordered.append(k + u'=' + data[k]) | |||
unordered[k] = data[k] | |||
h.update((k + u'=' + data[k]).encode('utf8')) | |||
if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])): | |||
allKey = [] | |||
keys = unordered.keys() | |||
keys.sort() | |||
for k in keys: | |||
allKey.append(k + u'=' + unordered[k]) | |||
for p in self.dictionaries[currentDict]['allTemplate']['params']: | |||
allKey.append(u(p) + u'=' + u(self.dictionaries[currentDict]['allTemplate']['params'][p])) | |||
insertIndex = 0 | |||
if isTranslation and self.defaultLang in data: | |||
insertIndex = 1 | |||
ordered.insert(insertIndex, u(self.allKeyName) + u'={{' + u(self.dictionaries[currentDict]['allTemplate']['template']) + u'|' + u'|'.join(allKey) + u'}}') | |||
subpage = subpage.replace(u'%options%', u'|'.join(ordered)) | |||
else: # No subkeys | |||
data = u(data) | |||
subpage = self.subpageTemplateID | |||
h.update(u(u'ID-' + data).encode('utf8')) | |||
subpage = subpage.replace(u'%string%', data) | |||
h = u(h.hexdigest()) | |||
if keyName in syncData and syncData[keyName] == h: | |||
return # Same hash | |||
syncData[keyName] = h # Update sync data | |||
subpage = subpage.replace(u'%dictionary%', currentDict) | |||
subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name']) | |||
subpage = subpage.replace(u'%keyname%', keyName) | |||
self.scheduler.schedule(editPage, currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False) | |||
def processComment(self, commentString, currentDict, definedStrings, syncData): | |||
commentContents = [] | |||
for extractedStr in self.stringsExtract.finditer(commentString): | |||
comment = u'' | |||
if extractedStr.group(1): | |||
comment = u'# ' + u(extractedStr.group(1)) + u'\n' | |||
dataString = u(extractedStr.group(3)) | |||
if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys | |||
data = dataString.strip() | |||
dataWriteback = u' ' + data | |||
else: # There's subkeys; detect whether this is a translation or not | |||
data = {} | |||
isTranslation = True | |||
for translation in self.translationExtract.finditer(dataString.rstrip()): | |||
data[u(translation.group(1))] = u(translation.group(2)) | |||
if u(translation.group(1)) not in self.languages: | |||
isTranslation = False | |||
ordered = [] | |||
if isTranslation: | |||
for lang in self.languages: | |||
if lang in data: | |||
ordered.append(u' ' + lang + u': ' + data[lang]) | |||
else: # Not a translation, so order in alphabetical order | |||
subkeys = data.keys() | |||
subkeys.sort() | |||
for subk in subkeys: | |||
ordered.append(u' ' + subk + u': ' + data[subk]) | |||
dataWriteback = u'\n' + u'\n'.join(ordered) | |||
keyNames = u(extractedStr.group(2)).lower().split(u'|') | |||
validKeyNames = [] | |||
for keyName in keyNames: | |||
keyName = keyName.replace(u'_', u' ').strip() | |||
if keyName in definedStrings: | |||
continue # Duplicate key | |||
definedStrings.append(keyName) | |||
validKeyNames.append(keyName) | |||
self.generateSubpage(keyName, data, currentDict, syncData) | |||
if len(validKeyNames): | |||
commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback) | |||
self.scheduler.execute() | |||
return u'\n\n'.join(commentContents) | |||
def __call__(self, content, **kwargs): | |||
if 'article' not in kwargs: | |||
return content | |||
if u(kwargs['article'].title) not in self.dictionaries: | |||
return content | |||
currentDict = u(kwargs['article'].title) | |||
syncPage = page(self.dictionaries[currentDict]['sync']) | |||
try: | |||
syncDataText = u(syncPage.getWikiText()).split(u'\n') | |||
except: # Page probably doesn't exist | |||
syncDataText = u'' | |||
syncData = {} | |||
for sync in syncDataText: | |||
sync = u(sync.strip()) | |||
if not sync: | |||
continue | |||
sync = sync.split(u':', 2) | |||
if len(sync) == 2: | |||
syncData[sync[0]] = sync[1] | |||
oldSyncData = syncData.copy() | |||
newContent = u'' | |||
previousIndex = 0 | |||
definedStrings = [] | |||
for comment in self.commentsExtract.finditer(content): | |||
newContent += content[previousIndex:comment.start()] | |||
previousIndex = comment.end() | |||
# Process current comment | |||
newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->' | |||
newContent += content[previousIndex:] | |||
# Check if we need to update sync data | |||
needUpdate = False | |||
for k in syncData: | |||
if k not in oldSyncData or oldSyncData[k] != syncData[k]: | |||
needUpdate = True | |||
break | |||
# Check for deleted strings | |||
for k in oldSyncData: | |||
if k not in definedStrings: | |||
try: | |||
deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].') | |||
except: | |||
pass | |||
if k in syncData: | |||
del syncData[k] | |||
needUpdate = True | |||
if needUpdate: | |||
# Build syncdata string representation | |||
syncKeys = syncData.keys() | |||
syncKeys.sort() | |||
syncLines = [] | |||
for k in syncKeys: | |||
syncLines.append(k + u':' + syncData[k]) | |||
editPage(syncPage, u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']].'</nowiki>, minor=True, nocreate=False) | |||
return newContent | |||
def scheduledRun(self): | |||
for d in self.dictionaries: | |||
fixPage(d) | |||
dictUpdater = DictionaryUpdater() | |||
addFilter(dictUpdater) | |||
scheduleTask(dictUpdater.scheduledRun, 3) |