User:WindBOT/Filters: Difference between revisions
Jump to navigation
Jump to search
Implement
(→Semantic filters: I hope this won't break anything!) |
|||
(39 intermediate revisions by 5 users not shown) | |||
Line 4: | Line 4: | ||
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. | If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. | ||
To make the bot ignore a certain line, add a "#" in front of it: | To make the bot ignore a certain line, add a "#" in front of it: | ||
# This line will be ignored | |||
If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line''': | If there are multiple lines, wrap them inside triple-quotes ('''you still need to put the two spaces at the beginning of the line'''): | ||
"""This line will be ignored | |||
and this one as well | |||
and this one is cake | |||
and the previous one was a lie but it was still ignored""" | |||
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. | If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. | ||
Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot]. | Or, if the problem really is elsewhere, [{{fullurl:Special:Block|wpBlockAddress={{BASEPAGENAMEE}}&wpBlockExpiry=infinite&wpAnonOnly=0&wpEnableAutoblock=0&wpCreateAccount=0&wpBlockReason=Bot%20gone%20crazy:%20}} block the bot]. | ||
Line 16: | Line 16: | ||
== Page filters == | == Page filters == | ||
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):') | |||
== Semantic filters == | == Semantic filters == | ||
None yet~ | |||
== Language-specific filters == | == Language-specific filters == | ||
Line 28: | Line 26: | ||
== Link filters == | == Link filters == | ||
=== Wikipedia links filter === | === Wikipedia links filter === | ||
def wikipediaLinks(link, **kwargs): | |||
wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)') | |||
if link.getType() == u'external': | |||
linkInfo = wikipediaRegex.search(link.getLink()) | |||
if linkInfo: | |||
link.setType(u'internal') | |||
try: | |||
wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ') | |||
except: | |||
wikiPage = u(linkInfo.group(2)).replace(u'_', ' ') | |||
if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en': | |||
link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia | |||
else: | |||
link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia | |||
if link.getLabel() is None: | |||
link.setLabel(u'(Wikipedia)') | |||
return link | |||
addLinkFilter(wikipediaLinks) | |||
=== | === HL Wiki to Combine Overwiki links filter === | ||
def hlwikiLinks(link, **kwargs): | |||
hlwikiRegex1 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/wiki/(\S+)$') | |||
hlwikiRegex2 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/w[-_/\w]+?/([^/\s]+)$') | |||
if link.getType() == 'external': | |||
linkInfo = hlwikiRegex1.search(link.getLink()) | |||
isMedia = False | |||
if not linkInfo: | |||
linkInfo = hlwikiRegex2.search(link.getLink()) | |||
isMedia = True | |||
if linkInfo: | |||
link.setType('internal') | |||
try: | |||
wikiPage = u(urllib2.unquote(str(linkInfo.group(1))).decode('utf8', 'ignore').replace(u'_', ' ')) | |||
except: | |||
wikiPage = u(linkInfo.group(1)).replace(u'_', ' ') | |||
label = wikiPage | |||
if isMedia: | |||
if wikiPage[-4:].lower() == '.wav': | |||
wikiPage = 'Media:' + wikiPage | |||
else: | |||
wikiPage = ':File:' + wikiPage | |||
link.setLink('hl2:' + wikiPage) | |||
if link.getLabel() is None: | |||
link.setLabel(label) | |||
return link | |||
addLinkFilter(hlwikiLinks) | |||
== Template filters == | == Template filters == | ||
=== Template renaming === | === Template renaming === | ||
def templateRenameMapping(t, **kwargs): | |||
templateMap = { | |||
# Format goes like this (without the "#" in front obviously): | |||
#'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'], | |||
# Last line has no comma at the end | |||
'Crush': ['pngcrush'] | |||
} | |||
for n in templateMap: | |||
if t.getName().lower() in templateMap[n]: | |||
t.setName(n) | |||
return t | |||
addTemplateFilter(templateRenameMapping) | |||
=== Remove useless templates === | === Remove useless templates === | ||
def removeUselessTemplate(t, **kwargs): | |||
if t.getName().lower() in (u'targeted', u'languages'): | |||
return None # Delete template | |||
return t | |||
addTemplateFilter(removeUselessTemplate) | |||
=== Filter parameters of certain templates === | === Filter parameters of certain templates === | ||
def templateParamFilter(t, **kwargs): | |||
params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter'] | |||
'patch layout': ['before', 'after', 'current'], | |||
'item infobox': ['released'] | |||
} | |||
if t.getName().lower() not in params: | |||
return t | |||
for p in params[t.getName().lower()]: | |||
if t.getParam(p): | |||
t.setParam(p, fixContent(t.getParam(p), **kwargs)) | |||
return t | |||
addTemplateFilter(templateParamFilter) | |||
=== Remove obsolete parameters === | === Remove obsolete parameters === | ||
def obsoleteParameterFilter(t, **kwargs): | |||
params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete'] | |||
} | |||
if t.getName().lower() not in params: | |||
return t | |||
for p in params[t.getName().lower()]: | |||
p = u(p) | |||
if p.find(u'#n') != -1: | |||
for i in range(10): | |||
t.delParam(p.replace(u'#n', str(i))) | |||
else: | |||
t.delParam(p) | |||
return t | |||
addTemplateFilter(obsoleteParameterFilter) | |||
=== Implement {{tl|Dictionary}} === | === Implement {{tl|Dictionary}} === | ||
class DictionaryUpdater: | |||
def __init__(self): | |||
self.subpageTemplateLang = <nowiki>"""{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}"""</nowiki> | |||
self.subpageTemplateParam = <nowiki>"""{{#switch:{{{1|}}}|%options%}}"""</nowiki> | |||
self.invalidParamError = <nowiki>"""<span class="error">Error: invalid param.</span>[[Category:ERROR]]"""</nowiki> | |||
self.subpageTemplateID = <nowiki>"""%string%"""</nowiki> | |||
self.partialUpdateThreshold = 750 # Update SyncData every n edits | |||
self.dictionaries = { | |||
u'Template:Dictionary/items': { # Dictionary page | |||
'name': 'items', # Dictionary name (used for categorizing) | |||
'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data | |||
}, | |||
u'Template:Dictionary/common strings': { # Warning: no underscore | |||
'name': 'common strings', | |||
'sync': 'Template:Dictionary/common strings/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/price': { | |||
'name': 'price', | |||
'sync': 'Template:Dictionary/price/Special:SyncData', | |||
'allTemplate': <nowiki>'{{{{{template|item price/fmt}}}|%options%|tt={{{tt|yes}}}}}'</nowiki> | |||
}, | |||
u'Template:Dictionary/mechanics': { | |||
'name': 'mechanics', | |||
'sync': 'Template:Dictionary/mechanics/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/characters': { | |||
'name': 'characters', | |||
'sync': 'Template:Dictionary/characters/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/demonstration': { | |||
'name': 'demonstration', | |||
'sync': 'Template:Dictionary/demonstration/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/transcripts': { | |||
'name': 'transcripts', | |||
'sync': 'Template:Dictionary/transcripts/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal achievements': { | |||
'name': 'portal achievements', | |||
'sync': 'Template:Dictionary/portal achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal: still alive achievements': { | |||
'name': 'portal: still alive achievements', | |||
'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal 2 achievements': { | |||
'name': 'portal 2 achievements', | |||
'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/rexaura achievements': { | |||
'name': 'rexaura achievements', | |||
'sync': 'Template:Dictionary/rexaura achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal stories: mel achievements': { | |||
'name': 'portal stories: mel achievements', | |||
'sync': 'Template:Dictionary/portal stories: mel achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/portal pinball achievements': { | |||
'name': 'portal pinball achievements', | |||
'sync': 'Template:Dictionary/portal pinball achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/bridge constructor portal achievements': { | |||
'name': 'bridge constructor portal achievements', | |||
'sync': 'Template:Dictionary/bridge constructor portal achievements/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/audio': { | |||
'name': 'audio', | |||
'sync': 'Template:Dictionary/audio/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Adventure core': { | |||
'name': 'voice lines/Adventure core', | |||
'sync': 'Template:Dictionary/voice lines/Adventure core/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Announcer': { | |||
'name': 'voice lines/Announcer', | |||
'sync': 'Template:Dictionary/voice lines/Announcer/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Ap-Sap': { | |||
'name': 'voice lines/Ap-Sap', | |||
'sync': 'Template:Dictionary/voice lines/Ap-Sap/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Cave Johnson': { | |||
'name': 'voice lines/Cave Johnson', | |||
'sync': 'Template:Dictionary/voice lines/Cave Johnson/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Defective Turret': { | |||
'name': 'voice lines/Defective Turret', | |||
'sync': 'Template:Dictionary/voice lines/Defective Turret/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Fact core': { | |||
'name': 'voice lines/Fact core', | |||
'sync': 'Template:Dictionary/voice lines/Fact core/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Floor Turret': { | |||
'name': 'voice lines/Floor Turret', | |||
'sync': 'Template:Dictionary/voice lines/Floor Turret/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/GLaDOS': { | |||
'name': 'voice lines/GLaDOS', | |||
'sync': 'Template:Dictionary/voice lines/GLaDOS/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Space core': { | |||
'name': 'voice lines/Space core', | |||
'sync': 'Template:Dictionary/voice lines/Space core/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Wheatley': { | |||
'name': 'voice lines/Wheatley', | |||
'sync': 'Template:Dictionary/voice lines/Wheatley/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Grady': { | |||
'name': 'voice lines/Grady', | |||
'sync': 'Template:Dictionary/voice lines/Grady/Special:SyncData' | |||
}, | |||
u'Template:Dictionary/voice lines/Desk Job': { | |||
'name': 'voice lines/Desk Job', | |||
'sync': 'Template:Dictionary/voice lines/Desk Job/Special:SyncData' | |||
} | |||
} | |||
self.subpageSeparator = u'/' | |||
# List of supported languages, in prefered order | |||
self.languages = [u'en', u'ar', u'cs', u'cz', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ka', u'ko', u'nl', u'no', u'pl', u'po', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'sw', u'th', u'tr', u'tu', u'zh-hans', u'zh-hant'] | |||
self.defaultLang = u'en' | |||
self.allKeyName = u'_all_' | |||
self.filterName = u'Your friendly neighborhood dictionary updater' | |||
self.commentsExtract = compileRegex(r'<!--([\S\s]+?)-->') | |||
self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE) | |||
self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE) | |||
addWhitelistPage(self.dictionaries.keys()) | |||
self.editCounts = {} | |||
def updateSyncData(self, currentDict, syncData, note=''): | |||
# Build syncdata string representation | |||
syncKeys = syncData.keys() | |||
syncKeys.sort() | |||
syncLines = [] | |||
for k in syncKeys: | |||
syncLines.append(k + u':' + syncData[k]) | |||
if note: | |||
note = u' (' + u(note) + u')' | |||
editPage(self.dictionaries[currentDict]['sync'], u'\n'.join(syncLines), summary=<nowiki>u'Updated synchronization information for [[:' + currentDict + u']]' + note + u'.'</nowiki>, minor=True, nocreate=False) | |||
def generateSubpage(self, keyName, data, currentDict, syncData): | |||
h = hashlib.md5() | |||
if type(data) is type({}): # Subkeys (translations or not) | |||
isTranslation = True | |||
subpage = u(self.subpageTemplateLang) | |||
for k in data: | |||
if 'blankString' in self.dictionaries[currentDict] and data[k] == self.dictionaries[currentDict]['blankString']: | |||
data[k] = u'' | |||
if isTranslation and k not in self.languages: | |||
isTranslation = False | |||
subpage = u(self.subpageTemplateParam) | |||
ordered = [] | |||
unordered = {} | |||
if isTranslation: | |||
missing = [] | |||
for lang in self.languages: | |||
if lang in data: | |||
ordered.append(lang + u'=' + data[lang]) | |||
unordered[lang] = data[lang] | |||
h.update((lang + u'=' + data[lang]).encode('utf8')) | |||
else: | |||
missing.append(lang) | |||
h.update((u'null-' + lang).encode('utf8')) | |||
if self.defaultLang in data: | |||
ordered.insert(0, u'#default=' + data[self.defaultLang]) | |||
if len(missing): | |||
subpage = subpage.replace(u'%missing%', <nowiki>u"Languages missing: "</nowiki> + u', '.join(missing)) | |||
else: | |||
subpage = subpage.replace(u'%missing%', <nowiki>u"Supported languages: all"</nowiki>) | |||
else: # Not a translation | |||
h.update('Any-') | |||
subkeys = data.keys() | |||
subkeys.sort() | |||
for k in subkeys: | |||
ordered.append(k + u'=' + data[k]) | |||
unordered[k] = data[k] | |||
h.update((k + u'=' + data[k]).encode('utf8')) | |||
if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])): | |||
allKey = [] | |||
keys = unordered.keys() | |||
keys.sort() | |||
for k in keys: | |||
allKey.append(k + u'=' + unordered[k]) | |||
insertIndex = 0 | |||
if isTranslation and self.defaultLang in data: | |||
insertIndex = 1 | |||
ordered.insert(insertIndex, u(self.allKeyName) + u'=' + u(self.dictionaries[currentDict]['allTemplate'].replace(u'%options%', u'|'.join(allKey)))) | |||
subpage = subpage.replace(u'%options%', u'|'.join(ordered)) | |||
else: # No subkeys | |||
data = u(data) | |||
subpage = self.subpageTemplateID | |||
h.update(u(u'ID-' + data).encode('utf8')) | |||
subpage = subpage.replace(u'%string%', data) | |||
h = u(h.hexdigest()) | |||
if keyName in syncData and syncData[keyName] == h: | |||
return # Same hash | |||
subpage = subpage.replace(u'%dictionary%', currentDict) | |||
subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name']) | |||
subpage = subpage.replace(u'%keyname%', keyName) | |||
if editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=<nowiki>u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".'</nowiki>, minor=True, nocreate=False): | |||
syncData[keyName] = h # Update sync data | |||
if currentDict not in self.editCounts: | |||
self.editCounts[currentDict] = 0 | |||
self.editCounts[currentDict] += 1 | |||
if self.editCounts[currentDict] > self.partialUpdateThreshold: | |||
self.editCounts[currentDict] = 0 | |||
self.updateSyncData(currentDict, syncData, 'Partial update') | |||
def processComment(self, commentString, currentDict, definedStrings, syncData): | |||
commentContents = [] | |||
for extractedStr in self.stringsExtract.finditer(commentString): | |||
comment = u'' | |||
if extractedStr.group(1): | |||
comment = u'# ' + u(extractedStr.group(1)) + u'\n' | |||
dataString = u(extractedStr.group(3)) | |||
if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys | |||
data = dataString.strip() | |||
dataWriteback = u' ' + data | |||
else: # There's subkeys; detect whether this is a translation or not | |||
data = {} | |||
isTranslation = True | |||
for translation in self.translationExtract.finditer(dataString.rstrip()): | |||
data[u(translation.group(1))] = u(translation.group(2)) | |||
if u(translation.group(1)) not in self.languages: | |||
isTranslation = False | |||
ordered = [] | |||
if isTranslation: | |||
for lang in self.languages: | |||
if lang in data: | |||
ordered.append(u' ' + lang + u': ' + data[lang]) | |||
else: # Not a translation, so order in alphabetical order | |||
subkeys = data.keys() | |||
subkeys.sort() | |||
for subk in subkeys: | |||
ordered.append(u' ' + subk + u': ' + data[subk]) | |||
dataWriteback = u'\n' + u'\n'.join(ordered) | |||
keyNames = u(extractedStr.group(2)).lower().split(u'|') | |||
validKeyNames = [] | |||
for keyName in keyNames: | |||
keyName = keyName.replace(u'_', u' ').replace(u'#', u'').strip() | |||
if keyName in definedStrings: | |||
continue # Duplicate key | |||
definedStrings.append(keyName) | |||
validKeyNames.append(keyName) | |||
self.generateSubpage(keyName, data, currentDict, syncData) | |||
if len(validKeyNames): | |||
commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback) | |||
return u'\n\n'.join(commentContents) | |||
def __call__(self, content, **kwargs): | |||
if 'article' not in kwargs: | |||
return content | |||
if u(kwargs['article'].title) not in self.dictionaries: | |||
return content | |||
currentDict = u(kwargs['article'].title) | |||
if random.randint(0, 50) == 0: # With probability 2%, ignore syncdata completely. Helps with stale syncdata and people overwriting things. | |||
syncDataText = u'' | |||
else: | |||
try: | |||
syncDataText = u(page(self.dictionaries[currentDict]['sync']).getWikiText()).split(u'\n') | |||
except: # Page probably doesn't exist | |||
syncDataText = u'' | |||
syncData = {} | |||
for sync in syncDataText: | |||
sync = u(sync.strip()) | |||
if not sync: | |||
continue | |||
sync = sync.split(u':', 2) | |||
if len(sync) == 2: | |||
syncData[sync[0]] = sync[1] | |||
oldSyncData = syncData.copy() | |||
newContent = u'' | |||
previousIndex = 0 | |||
definedStrings = [] | |||
for comment in self.commentsExtract.finditer(content): | |||
newContent += content[previousIndex:comment.start()] | |||
previousIndex = comment.end() | |||
# Process current comment | |||
newContent += u'<!--\n\n' + self.processComment(u(comment.group(1)).strip(), currentDict, definedStrings, syncData) + u'\n\n-->' | |||
newContent += content[previousIndex:] | |||
# Check for deleted strings | |||
for k in oldSyncData: | |||
if k not in definedStrings: | |||
try: | |||
deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from [[:' + currentDict + u']].') | |||
except: | |||
pass | |||
if k in syncData: | |||
del syncData[k] | |||
self.updateSyncData(currentDict, syncData, 'Full update') | |||
self.editCounts[currentDict] = 0 | |||
return newContent | |||
def scheduledRun(self): | |||
for d in self.dictionaries: | |||
fixPage(d) | |||
dictUpdater = DictionaryUpdater() | |||
addFilter(dictUpdater) | |||
scheduleTask(dictUpdater.scheduledRun, 3) | |||
=== Update checklists on [[User:WindBOT/Item checklists|list of subscribers]] === | |||
=== [ | def itemChecklists(): | ||
game = 620 | |||
cleanItemName = compileRegex(r'^the +') | |||
def updateItemChecklist(checklist, schema, support): | |||
if not checklist.getParam('steamid'): | |||
checklist.setParam('error', 'Unspecified Steam ID.') | |||
return | |||
supportedItems = {} | |||
for i in support: | |||
supportedItems[i] = 0 | |||
try: | |||
steamUser = steam.user.profile(checklist.getParam('steamid')).id64 | |||
except steam.user.ProfileNotFoundError as e: | |||
try: | |||
steamUser = steam.user.vanity_url(checklist.getParam('steamid')).id64 | |||
except Exception as e2: | |||
checklist.setParam('error', u'Cannot find profile: ' + u(e) + u' / ' + u(e2)) | |||
return | |||
try: | |||
backpack = steam.items.inventory(game, steamUser, schema) | |||
except Exception as e: | |||
checklist.setParam('error', u'Cannot load inventory: ' + u(e)) | |||
return | |||
for item in backpack: | |||
itemName = cleanItemName.sub(u'', u(item.name).lower()) | |||
if itemName in supportedItems: | |||
supportedItems[itemName] += 1 | |||
for item in supportedItems: | |||
if supportedItems[item] > 1: | |||
checklist.setParam(item, supportedItems[item]) | |||
elif supportedItems[item] == 1: | |||
checklist.setParam(item, 'yes') | |||
else: | |||
p = checklist.getParam(item) | |||
if p is not None: | |||
p = p.lower() | |||
if p in (None, 'no', '0'): | |||
checklist.setParam(item, 'no') | |||
elif p not in ('wanted', 'want', 'do not', 'anti', 'do not want'): | |||
checklist.setParam(item, 'had') | |||
return | |||
try: | |||
schema = steamGetGameSchema(game) | |||
allItems = [] | |||
for item in schema: | |||
allItems.append(cleanItemName.sub(u'', u(item.name).lower())) | |||
except: | |||
return # No schema means no fancy | |||
support = [] | |||
templateParams = compileRegex(r'\{\{\{\s*(?:the +)?([^{}|]+?)\s*\|') | |||
templateCode = page('Template:Item checklist').getWikiText() | |||
res = templateParams.search(templateCode) | |||
while res: | |||
item = u(res.group(1)).lower() | |||
if item not in support and item in allItems: | |||
support.append(item) | |||
templateCode = templateCode[res.end():] | |||
res = templateParams.search(templateCode) | |||
checkPage, checkLinks, checkKeys = linkExtract(page('User:WindBOT/Item_checklists').getWikiText()) | |||
linksLeft = checkLinks.values()[:] | |||
for i in range(12): | |||
randLink = random.choice(linksLeft) | |||
linksLeft.remove(randLink) | |||
checklist = page(randLink.getLink()) | |||
print 'Updating', checklist | |||
oldContent = u(checklist.getWikiText()) | |||
content, templatelist, templatekeys = templateExtract(oldContent) | |||
for t in templatelist.values(): | |||
if t.getName().lower().find(u'checklist') != -1: | |||
updateItemChecklist(t, schema, support) | |||
content = templateRestore(content, templatelist, templatekeys) | |||
if oldContent != content: | |||
editPage(checklist, content, summary=<nowiki>u'Updated Item checklist [[:' + u(checklist.title) + u']]'</nowiki>, minor=True) | |||
scheduleTask(itemChecklists, 365) | |||
Revision as of 02:19, 11 March 2022
How to disable a filter
If the bot is malfunctioning, chances are that the problem lies in one of these blocks of code. Thus, instead of shutting down the whole bot, it would be wiser to disable only the chunk of code that is misbehaving. To make the bot ignore a certain line, add a "#" in front of it:
# This line will be ignored
If there are multiple lines, wrap them inside triple-quotes (you still need to put the two spaces at the beginning of the line):
"""This line will be ignored and this one as well and this one is cake and the previous one was a lie but it was still ignored"""
If all else fails, you can simply delete the block from the page. The bot can't come up with code by itself yet, so it won't run anything. Or, if the problem really is elsewhere, block the bot.
Page filters
addPageFilter(r'^user:', r'(?:talk|help|wiki|template):')
Semantic filters
None yet~
Language-specific filters
None yet~
Link filters
Wikipedia links filter
def wikipediaLinks(link, **kwargs): wikipediaRegex = compileRegex(r'^https?://(?:(\w+)\.)?wikipedia\.org/wiki/(\S+)') if link.getType() == u'external': linkInfo = wikipediaRegex.search(link.getLink()) if linkInfo: link.setType(u'internal') try: wikiPage = urllib2.unquote(str(linkInfo.group(2))).decode('utf8', 'ignore').replace(u'_', ' ') except: wikiPage = u(linkInfo.group(2)).replace(u'_', ' ') if not linkInfo.group(1) or linkInfo.group(1).lower() == u'en': link.setLink(u'Wikipedia:' + wikiPage) # English Wikipedia else: link.setLink(u'Wikipedia:' + linkInfo.group(1).lower() + u':' + wikiPage) # Non-english Wikipedia if link.getLabel() is None: link.setLabel(u'(Wikipedia)') return link addLinkFilter(wikipediaLinks)
HL Wiki to Combine Overwiki links filter
def hlwikiLinks(link, **kwargs): hlwikiRegex1 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/wiki/(\S+)$') hlwikiRegex2 = compileRegex(r'^https?://[-.\w]*half-life\.wikia\.com/w[-_/\w]+?/([^/\s]+)$') if link.getType() == 'external': linkInfo = hlwikiRegex1.search(link.getLink()) isMedia = False if not linkInfo: linkInfo = hlwikiRegex2.search(link.getLink()) isMedia = True if linkInfo: link.setType('internal') try: wikiPage = u(urllib2.unquote(str(linkInfo.group(1))).decode('utf8', 'ignore').replace(u'_', ' ')) except: wikiPage = u(linkInfo.group(1)).replace(u'_', ' ') label = wikiPage if isMedia: if wikiPage[-4:].lower() == '.wav': wikiPage = 'Media:' + wikiPage else: wikiPage = ':File:' + wikiPage link.setLink('hl2:' + wikiPage) if link.getLabel() is None: link.setLabel(label) return link addLinkFilter(hlwikiLinks)
Template filters
Template renaming
def templateRenameMapping(t, **kwargs): templateMap = { # Format goes like this (without the "#" in front obviously): #'Good template name': ['Bad template lowercase name 1', 'Bad template lowercase name 2', 'Bad template lowercase name 3'], # Last line has no comma at the end 'Crush': ['pngcrush'] } for n in templateMap: if t.getName().lower() in templateMap[n]: t.setName(n) return t addTemplateFilter(templateRenameMapping)
Remove useless templates
def removeUselessTemplate(t, **kwargs): if t.getName().lower() in (u'targeted', u'languages'): return None # Delete template return t addTemplateFilter(removeUselessTemplate)
Filter parameters of certain templates
def templateParamFilter(t, **kwargs): params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'filter'] 'patch layout': ['before', 'after', 'current'], 'item infobox': ['released'] } if t.getName().lower() not in params: return t for p in params[t.getName().lower()]: if t.getParam(p): t.setParam(p, fixContent(t.getParam(p), **kwargs)) return t addTemplateFilter(templateParamFilter)
Remove obsolete parameters
def obsoleteParameterFilter(t, **kwargs): params = { # Map: 'lowercase template name': ['list', 'of', 'params', 'to', 'delete'] } if t.getName().lower() not in params: return t for p in params[t.getName().lower()]: p = u(p) if p.find(u'#n') != -1: for i in range(10): t.delParam(p.replace(u'#n', str(i))) else: t.delParam(p) return t addTemplateFilter(obsoleteParameterFilter)
Implement {{Dictionary}}
class DictionaryUpdater: def __init__(self): self.subpageTemplateLang = """{{#switch:{{{lang|{{SUBPAGENAME}}}}}|%options%}}""" self.subpageTemplateParam = """{{#switch:{{{1|}}}|%options%}}""" self.invalidParamError = """<span class="error">Error: invalid param.</span>[[Category:ERROR]]""" self.subpageTemplateID = """%string%""" self.partialUpdateThreshold = 750 # Update SyncData every n edits self.dictionaries = { u'Template:Dictionary/items': { # Dictionary page 'name': 'items', # Dictionary name (used for categorizing) 'sync': 'Template:Dictionary/items/Special:SyncData' # Page holding last sync data }, u'Template:Dictionary/common strings': { # Warning: no underscore 'name': 'common strings', 'sync': 'Template:Dictionary/common strings/Special:SyncData' }, u'Template:Dictionary/price': { 'name': 'price', 'sync': 'Template:Dictionary/price/Special:SyncData', 'allTemplate': '{{{{{template|item price/fmt}}}|%options%|tt={{{tt|yes}}}}}' }, u'Template:Dictionary/mechanics': { 'name': 'mechanics', 'sync': 'Template:Dictionary/mechanics/Special:SyncData' }, u'Template:Dictionary/characters': { 'name': 'characters', 'sync': 'Template:Dictionary/characters/Special:SyncData' }, u'Template:Dictionary/demonstration': { 'name': 'demonstration', 'sync': 'Template:Dictionary/demonstration/Special:SyncData' }, u'Template:Dictionary/transcripts': { 'name': 'transcripts', 'sync': 'Template:Dictionary/transcripts/Special:SyncData' }, u'Template:Dictionary/portal achievements': { 'name': 'portal achievements', 'sync': 'Template:Dictionary/portal achievements/Special:SyncData' }, u'Template:Dictionary/portal: still alive achievements': { 'name': 'portal: still alive achievements', 'sync': 'Template:Dictionary/portal: still alive achievements/Special:SyncData' }, u'Template:Dictionary/portal 2 achievements': { 'name': 'portal 2 achievements', 'sync': 'Template:Dictionary/portal 2 achievements/Special:SyncData' }, u'Template:Dictionary/rexaura achievements': { 'name': 'rexaura achievements', 'sync': 'Template:Dictionary/rexaura achievements/Special:SyncData' }, u'Template:Dictionary/portal stories: mel achievements': { 'name': 'portal stories: mel achievements', 'sync': 'Template:Dictionary/portal stories: mel achievements/Special:SyncData' }, u'Template:Dictionary/portal pinball achievements': { 'name': 'portal pinball achievements', 'sync': 'Template:Dictionary/portal pinball achievements/Special:SyncData' }, u'Template:Dictionary/bridge constructor portal achievements': { 'name': 'bridge constructor portal achievements', 'sync': 'Template:Dictionary/bridge constructor portal achievements/Special:SyncData' }, u'Template:Dictionary/audio': { 'name': 'audio', 'sync': 'Template:Dictionary/audio/Special:SyncData' }, u'Template:Dictionary/voice lines/Adventure core': { 'name': 'voice lines/Adventure core', 'sync': 'Template:Dictionary/voice lines/Adventure core/Special:SyncData' }, u'Template:Dictionary/voice lines/Announcer': { 'name': 'voice lines/Announcer', 'sync': 'Template:Dictionary/voice lines/Announcer/Special:SyncData' }, u'Template:Dictionary/voice lines/Ap-Sap': { 'name': 'voice lines/Ap-Sap', 'sync': 'Template:Dictionary/voice lines/Ap-Sap/Special:SyncData' }, u'Template:Dictionary/voice lines/Cave Johnson': { 'name': 'voice lines/Cave Johnson', 'sync': 'Template:Dictionary/voice lines/Cave Johnson/Special:SyncData' }, u'Template:Dictionary/voice lines/Defective Turret': { 'name': 'voice lines/Defective Turret', 'sync': 'Template:Dictionary/voice lines/Defective Turret/Special:SyncData' }, u'Template:Dictionary/voice lines/Fact core': { 'name': 'voice lines/Fact core', 'sync': 'Template:Dictionary/voice lines/Fact core/Special:SyncData' }, u'Template:Dictionary/voice lines/Floor Turret': { 'name': 'voice lines/Floor Turret', 'sync': 'Template:Dictionary/voice lines/Floor Turret/Special:SyncData' }, u'Template:Dictionary/voice lines/GLaDOS': { 'name': 'voice lines/GLaDOS', 'sync': 'Template:Dictionary/voice lines/GLaDOS/Special:SyncData' }, u'Template:Dictionary/voice lines/Space core': { 'name': 'voice lines/Space core', 'sync': 'Template:Dictionary/voice lines/Space core/Special:SyncData' }, u'Template:Dictionary/voice lines/Wheatley': { 'name': 'voice lines/Wheatley', 'sync': 'Template:Dictionary/voice lines/Wheatley/Special:SyncData' }, u'Template:Dictionary/voice lines/Grady': { 'name': 'voice lines/Grady', 'sync': 'Template:Dictionary/voice lines/Grady/Special:SyncData' }, u'Template:Dictionary/voice lines/Desk Job': { 'name': 'voice lines/Desk Job', 'sync': 'Template:Dictionary/voice lines/Desk Job/Special:SyncData' } } self.subpageSeparator = u'/' # List of supported languages, in prefered order self.languages = [u'en', u'ar', u'cs', u'cz', u'da', u'de', u'es', u'fi', u'fr', u'hu', u'it', u'ja', u'ka', u'ko', u'nl', u'no', u'pl', u'po', u'pt', u'pt-br', u'ro', u'ru', u'sv', u'sw', u'th', u'tr', u'tu', u'zh-hans', u'zh-hant'] self.defaultLang = u'en' self.allKeyName = u'_all_' self.filterName = u'Your friendly neighborhood dictionary updater' self.commentsExtract = compileRegex(r) self.stringsExtract = compileRegex(r'(?:^[ \t]*#[ \t]*([^\r\n]*?)[ \t]*$\s*)?^[ \t]*([^\r\n]+?[ \t]*(?:\|[ \t]*[^\r\n]+?[ \t]*)*):[ \t]*([^\r\n]+?[ \t]*$|\s*[\r\n]+(?:\s*[ \t]+[-\w]+[ \t]*:[ \t]*[^\r\n]+[ \t]*$)+)', re.IGNORECASE | re.MULTILINE) self.translationExtract = compileRegex(r'^[ \t]+([-\w]+)[ \t]*:[ \t]*([^\r\n]+)[ \t]*$', re.IGNORECASE | re.MULTILINE) addWhitelistPage(self.dictionaries.keys()) self.editCounts = {} def updateSyncData(self, currentDict, syncData, note=): # Build syncdata string representation syncKeys = syncData.keys() syncKeys.sort() syncLines = [] for k in syncKeys: syncLines.append(k + u':' + syncData[k]) if note: note = u' (' + u(note) + u')' editPage(self.dictionaries[currentDict]['sync'], u'\n'.join(syncLines), summary=u'Updated synchronization information for [[:' + currentDict + u']]' + note + u'.', minor=True, nocreate=False) def generateSubpage(self, keyName, data, currentDict, syncData): h = hashlib.md5() if type(data) is type({}): # Subkeys (translations or not) isTranslation = True subpage = u(self.subpageTemplateLang) for k in data: if 'blankString' in self.dictionaries[currentDict] and data[k] == self.dictionaries[currentDict]['blankString']: data[k] = u if isTranslation and k not in self.languages: isTranslation = False subpage = u(self.subpageTemplateParam) ordered = [] unordered = {} if isTranslation: missing = [] for lang in self.languages: if lang in data: ordered.append(lang + u'=' + data[lang]) unordered[lang] = data[lang] h.update((lang + u'=' + data[lang]).encode('utf8')) else: missing.append(lang) h.update((u'null-' + lang).encode('utf8')) if self.defaultLang in data: ordered.insert(0, u'#default=' + data[self.defaultLang]) if len(missing): subpage = subpage.replace(u'%missing%', u"Languages missing: " + u', '.join(missing)) else: subpage = subpage.replace(u'%missing%', u"Supported languages: all") else: # Not a translation h.update('Any-') subkeys = data.keys() subkeys.sort() for k in subkeys: ordered.append(k + u'=' + data[k]) unordered[k] = data[k] h.update((k + u'=' + data[k]).encode('utf8')) if 'allTemplate' in self.dictionaries[currentDict] and (len(unordered) or len(self.dictionaries[currentDict]['allTemplate']['params'])): allKey = [] keys = unordered.keys() keys.sort() for k in keys: allKey.append(k + u'=' + unordered[k]) insertIndex = 0 if isTranslation and self.defaultLang in data: insertIndex = 1 ordered.insert(insertIndex, u(self.allKeyName) + u'=' + u(self.dictionaries[currentDict]['allTemplate'].replace(u'%options%', u'|'.join(allKey)))) subpage = subpage.replace(u'%options%', u'|'.join(ordered)) else: # No subkeys data = u(data) subpage = self.subpageTemplateID h.update(u(u'ID-' + data).encode('utf8')) subpage = subpage.replace(u'%string%', data) h = u(h.hexdigest()) if keyName in syncData and syncData[keyName] == h: return # Same hash subpage = subpage.replace(u'%dictionary%', currentDict) subpage = subpage.replace(u'%dictionaryname%', self.dictionaries[currentDict]['name']) subpage = subpage.replace(u'%keyname%', keyName) if editPage(currentDict + self.subpageSeparator + keyName, subpage, summary=u'Pushed changes from [[:' + currentDict + u']] for string "' + keyName + u'".', minor=True, nocreate=False): syncData[keyName] = h # Update sync data if currentDict not in self.editCounts: self.editCounts[currentDict] = 0 self.editCounts[currentDict] += 1 if self.editCounts[currentDict] > self.partialUpdateThreshold: self.editCounts[currentDict] = 0 self.updateSyncData(currentDict, syncData, 'Partial update') def processComment(self, commentString, currentDict, definedStrings, syncData): commentContents = [] for extractedStr in self.stringsExtract.finditer(commentString): comment = u if extractedStr.group(1): comment = u'# ' + u(extractedStr.group(1)) + u'\n' dataString = u(extractedStr.group(3)) if dataString.find(u'\r') == -1 and dataString.find(u'\n') == -1: # Assume no subkeys data = dataString.strip() dataWriteback = u' ' + data else: # There's subkeys; detect whether this is a translation or not data = {} isTranslation = True for translation in self.translationExtract.finditer(dataString.rstrip()): data[u(translation.group(1))] = u(translation.group(2)) if u(translation.group(1)) not in self.languages: isTranslation = False ordered = [] if isTranslation: for lang in self.languages: if lang in data: ordered.append(u' ' + lang + u': ' + data[lang]) else: # Not a translation, so order in alphabetical order subkeys = data.keys() subkeys.sort() for subk in subkeys: ordered.append(u' ' + subk + u': ' + data[subk]) dataWriteback = u'\n' + u'\n'.join(ordered) keyNames = u(extractedStr.group(2)).lower().split(u'|') validKeyNames = [] for keyName in keyNames: keyName = keyName.replace(u'_', u' ').replace(u'#', u).strip() if keyName in definedStrings: continue # Duplicate key definedStrings.append(keyName) validKeyNames.append(keyName) self.generateSubpage(keyName, data, currentDict, syncData) if len(validKeyNames): commentContents.append(comment + u' | '.join(validKeyNames) + u':' + dataWriteback) return u'\n\n'.join(commentContents) def __call__(self, content, **kwargs): if 'article' not in kwargs: return content if u(kwargs['article'].title) not in self.dictionaries: return content currentDict = u(kwargs['article'].title) if random.randint(0, 50) == 0: # With probability 2%, ignore syncdata completely. Helps with stale syncdata and people overwriting things. syncDataText = u else: try: syncDataText = u(page(self.dictionaries[currentDict]['sync']).getWikiText()).split(u'\n') except: # Page probably doesn't exist syncDataText = u syncData = {} for sync in syncDataText: sync = u(sync.strip()) if not sync: continue sync = sync.split(u':', 2) if len(sync) == 2: syncData[sync[0]] = sync[1] oldSyncData = syncData.copy() newContent = u previousIndex = 0 definedStrings = [] for comment in self.commentsExtract.finditer(content): newContent += content[previousIndex:comment.start()] previousIndex = comment.end() # Process current comment newContent += u newContent += content[previousIndex:] # Check for deleted strings for k in oldSyncData: if k not in definedStrings: try: deletePage(currentDict + self.subpageSeparator + k, 'Removed deleted string "' + k + u'" from ' + currentDict + u'.') except: pass if k in syncData: del syncData[k] self.updateSyncData(currentDict, syncData, 'Full update') self.editCounts[currentDict] = 0 return newContent def scheduledRun(self): for d in self.dictionaries: fixPage(d) dictUpdater = DictionaryUpdater() addFilter(dictUpdater) scheduleTask(dictUpdater.scheduledRun, 3)
Update checklists on list of subscribers
def itemChecklists(): game = 620 cleanItemName = compileRegex(r'^the +') def updateItemChecklist(checklist, schema, support): if not checklist.getParam('steamid'): checklist.setParam('error', 'Unspecified Steam ID.') return supportedItems = {} for i in support: supportedItems[i] = 0 try: steamUser = steam.user.profile(checklist.getParam('steamid')).id64 except steam.user.ProfileNotFoundError as e: try: steamUser = steam.user.vanity_url(checklist.getParam('steamid')).id64 except Exception as e2: checklist.setParam('error', u'Cannot find profile: ' + u(e) + u' / ' + u(e2)) return try: backpack = steam.items.inventory(game, steamUser, schema) except Exception as e: checklist.setParam('error', u'Cannot load inventory: ' + u(e)) return for item in backpack: itemName = cleanItemName.sub(u, u(item.name).lower()) if itemName in supportedItems: supportedItems[itemName] += 1 for item in supportedItems: if supportedItems[item] > 1: checklist.setParam(item, supportedItems[item]) elif supportedItems[item] == 1: checklist.setParam(item, 'yes') else: p = checklist.getParam(item) if p is not None: p = p.lower() if p in (None, 'no', '0'): checklist.setParam(item, 'no') elif p not in ('wanted', 'want', 'do not', 'anti', 'do not want'): checklist.setParam(item, 'had') return try: schema = steamGetGameSchema(game) allItems = [] for item in schema: allItems.append(cleanItemName.sub(u, u(item.name).lower())) except: return # No schema means no fancy support = [] templateParams = compileRegex(r'\{\{\{\s*(?:the +)?([^{}|]+?)\s*\|') templateCode = page('Template:Item checklist').getWikiText() res = templateParams.search(templateCode) while res: item = u(res.group(1)).lower() if item not in support and item in allItems: support.append(item) templateCode = templateCode[res.end():] res = templateParams.search(templateCode) checkPage, checkLinks, checkKeys = linkExtract(page('User:WindBOT/Item_checklists').getWikiText()) linksLeft = checkLinks.values()[:] for i in range(12): randLink = random.choice(linksLeft) linksLeft.remove(randLink) checklist = page(randLink.getLink()) print 'Updating', checklist oldContent = u(checklist.getWikiText()) content, templatelist, templatekeys = templateExtract(oldContent) for t in templatelist.values(): if t.getName().lower().find(u'checklist') != -1: updateItemChecklist(t, schema, support) content = templateRestore(content, templatelist, templatekeys) if oldContent != content: editPage(checklist, content, summary=u'Updated Item checklist [[:' + u(checklist.title) + u']]', minor=True) scheduleTask(itemChecklists, 365)