User:WindBOT/CoreSource: Difference between revisions
From the Portal Wiki
More actions
m shit |
Added new built-in filter: Simplify anchors |
||
| (9 intermediate revisions by the same user not shown) | |||
| Line 22: | Line 22: | ||
import tempfile | import tempfile | ||
import traceback | import traceback | ||
import threading | |||
import random | import random | ||
import subprocess | import subprocess | ||
import cStringIO as StringIO | import cStringIO as StringIO | ||
import shutil | |||
import wikitools | import wikitools | ||
import wikiUpload | import wikiUpload | ||
import feedparser | import feedparser | ||
import steam | try: | ||
import steam | |||
except: | |||
steam = None | |||
from wikiUpload import wikiUploader | from wikiUpload import wikiUploader | ||
from botConfig import config | from botConfig import config | ||
steam.set_api_key(config['steamAPI']) | if steam is not None and 'steamAPI' in config: | ||
steam.set_api_key(config['steamAPI']) | |||
config['runtime'] = { | config['runtime'] = { | ||
'rcid': -1, | 'rcid': -1, | ||
| Line 82: | Line 88: | ||
kw = kwargs or self.kwargs | kw = kwargs or self.kwargs | ||
return self.func(*(self.pending + args), **kw) | return self.func(*(self.pending + args), **kw) | ||
def getTempFilename(): | class BatchScheduler: | ||
def __init__(self, concurrency=16): | |||
self.concurrency = 16 | |||
self.tasks = [] | |||
def schedule(self, target, *args, **kwargs): | |||
self.tasks.append((target, args, kwargs)) | |||
def execute(self): | |||
while len(self.tasks): | |||
pool = [] | |||
numThreads = min(self.concurrency, len(self.tasks)) | |||
for task in range(numThreads): | |||
task = self.tasks[task] | |||
t = threading.Thread(target=task[0], args=task[1], kwargs=task[2]) | |||
t.start() | |||
pool.append(t) | |||
for t in pool: | |||
t.join() | |||
self.tasks = self.tasks[numThreads:] | |||
def getTempFilename(extension=None): | |||
global config | global config | ||
f = tempfile.mkstemp(prefix=config['tempPrefix']) | if extension is None: | ||
f = tempfile.mkstemp(prefix=config['tempPrefix']) | |||
else: | |||
f = tempfile.mkstemp(suffix=u'.' + u(extension), prefix=config['tempPrefix']) | |||
os.close(f[0]) # Damn you Python I just want a filename | os.close(f[0]) # Damn you Python I just want a filename | ||
return f[1] | return u(f[1]) | ||
def wiki(): | def wiki(): | ||
| Line 124: | Line 151: | ||
global config | global config | ||
summary = getSummary(summary) | summary = getSummary(summary) | ||
p = page(p) | |||
try: | |||
print 'Editing', p.title, 'with summary', summary | |||
except: | |||
pass | |||
try: | try: | ||
if nocreate: | if nocreate: | ||
result = | if minor: | ||
result = p.edit(u(content), summary=summary, minor=True, bot=bot, nocreate=nocreate) | |||
else: | |||
result = p.edit(u(content), summary=summary, notminor=True, bot=bot, nocreate=nocreate) | |||
else: | else: | ||
result = | if minor: | ||
result = p.edit(u(content), summary=summary, minor=True, bot=bot) | |||
else: | |||
result = p.edit(u(content), summary=summary, notminor=True, bot=bot) | |||
except: | except: | ||
warning('Couldn\'t edit', p) | warning('Couldn\'t edit', p.title) | ||
return None | return None | ||
try: | try: | ||
| Line 136: | Line 174: | ||
config['runtime']['edits'] += 1 | config['runtime']['edits'] += 1 | ||
except: | except: | ||
warning('Couldn\'t edit', p) | warning('Couldn\'t edit', p.title) | ||
return result | return result | ||
def deletePage(p, summary=False): | def deletePage(p, summary=False): | ||
| Line 169: | Line 207: | ||
except: | except: | ||
warning('Couldn\'t update edit count.') | warning('Couldn\'t update edit count.') | ||
# Because SOME LIBRARY will not use singletons, this has to be done at the bot level | |||
# rather than the individual filter level to avoid loading the damn thing twice. | |||
steamGameSchemas = {} | |||
def steamGetGameSchema(game): | |||
global steamGameSchemas | |||
if steam is None: | |||
return None | |||
if game not in steamGameSchemas: | |||
steamGameSchemas[game] = game.item_schema() | |||
return steamGameSchemas[game] | |||
steamGameAssets = {} | |||
def steamGetGameAssets(game): | |||
global steamGameAssets | |||
if steam is None: | |||
return None | |||
if game not in steamGameAssets: | |||
steamGameAssets[game] = game.assets() | |||
return steamGameAssets[game] | |||
def compileRegex(regex, flags=re.IGNORECASE): | def compileRegex(regex, flags=re.IGNORECASE): | ||
| Line 201: | Line 258: | ||
self.setLabel(None) | self.setLabel(None) | ||
self.setLink(u'') | self.setLink(u'') | ||
self.anchor = None | |||
self.joined = False | self.joined = False | ||
if len(content) > 2: | if len(content) > 2: | ||
| Line 210: | Line 268: | ||
if lnk.find(u':') == -1: | if lnk.find(u':') == -1: | ||
lnk = lnk.replace(u'_', u' ') | lnk = lnk.replace(u'_', u' ') | ||
anchor = None | |||
if lnk.find(u'#') != -1: | |||
lnk, anchor = lnk.split(u'#', 1) | |||
self.setAnchor(anchor) | |||
self.setLink(lnk) | self.setLink(lnk) | ||
if len(split) == 2: | if len(split) == 2: | ||
| Line 215: | Line 277: | ||
else: | else: | ||
self.setLabel(split[0]) | self.setLabel(split[0]) | ||
self.joined = | self.joined = anchor is None | ||
elif content[0] == u'[' and content[-1] == u']': | elif content[0] == u'[' and content[-1] == u']': | ||
split = content[1:-1].split(u' ', 1) | split = content[1:-1].split(u' ', 1) | ||
| Line 228: | Line 290: | ||
def getBody(self): | def getBody(self): | ||
return u(self.body) | return u(self.body) | ||
def getLink(self): | def getLink(self, withAnchor=False): | ||
if withAnchor and self.getAnchor() is not None: | |||
return u(self.link) + u'#' + self.getAnchor() | |||
return u(self.link) | return u(self.link) | ||
def getAnchor(self): | |||
return self.anchor | |||
def getLabel(self): | def getLabel(self): | ||
if self.label is None: | if self.label is None: | ||
| Line 241: | Line 307: | ||
self.body = u(body) | self.body = u(body) | ||
def setLink(self, link): | def setLink(self, link): | ||
self.link = u(link | link = u(link) | ||
if self.getType() == u'internal' and link.find(u'#') != -1: | |||
link, anchor = link.split(u'#', 1) | |||
self.setAnchor(anchor) | |||
self.link = link | |||
if self.joined: | if self.joined: | ||
self.label = u(link) | self.label = u(link) | ||
replaceDots = compileRegex(r'(?:\.[a-f\d][a-f\d])+') | |||
def _replaceDots(self, g): | |||
s = '' | |||
g = g.group(0) | |||
for i in xrange(0, len(g), 3): | |||
s += chr(int(g[i + 1:i + 3], 16)) | |||
return s.decode('utf8') | |||
def setAnchor(self, anchor): | |||
if self.getType() == u'internal': | |||
u(anchor).replace(u'_', u' ') | |||
try: | |||
anchor = link.replaceDots.sub(self._replaceDots, anchor) | |||
except: | |||
pass | |||
self.anchor = anchor | |||
def setLabel(self, label): | def setLabel(self, label): | ||
if label is None: | if label is None: | ||
| Line 257: | Line 342: | ||
def __unicode__(self): | def __unicode__(self): | ||
label = self.getLabel() | label = self.getLabel() | ||
tmpLink = self.getLink() | tmpLink = self.getLink(withAnchor=True) | ||
if self.getType() == u'internal': | if self.getType() == u'internal': | ||
tmpLink2 = tmpLink.replace(u'_', u' ') | tmpLink2 = tmpLink.replace(u'_', u' ') | ||
| Line 554: | Line 639: | ||
content = content.replace(s[0], s[1]) | content = content.replace(s[0], s[1]) | ||
return content | return content | ||
def regReplaceCallBack(sub, match): | def regReplaceCallBack(sub, match): | ||
groupcount = 1 | groupcount = 1 | ||
| Line 611: | Line 695: | ||
return compileRegex(u'/' + u(f[1]['language']) + u'$').search(u(article)) | return compileRegex(u'/' + u(f[1]['language']) + u'$').search(u(article)) | ||
return True | return True | ||
scheduledTasks = [] | |||
def scheduleTask(task, oneinevery): | def scheduleTask(task, oneinevery): | ||
global scheduledTasks | |||
result = random.randint(0, oneinevery-1) | result = random.randint(0, oneinevery-1) | ||
print 'Task:', task, '; result:', result | print 'Task:', task, '; result:', result | ||
if not result: | if not result: | ||
task() | scheduledTasks.append(task) | ||
def runScheduledTasks(): | |||
global scheduledTasks | |||
if not len(scheduledTasks): | |||
print 'No tasks scheduled.' | |||
return | |||
print 'Running scheduled tasks...' | |||
for t in scheduledTasks: | |||
print 'Running task:', t | |||
try: | |||
t() | |||
print 'End of task:', t | |||
except: | |||
print 'Error while executing task:', t | |||
def sFilter(filters, content, returnActive=False, **kwargs): | def sFilter(filters, content, returnActive=False, **kwargs): | ||
content = u(content) | content = u(content) | ||
| Line 654: | Line 753: | ||
f, params = f | f, params = f | ||
for i in range(len(linklist)): | for i in range(len(linklist)): | ||
if linklist[i] is not None: | if linklist[i] is not None and isinstance(linklist[i], link): | ||
oldLink = u(linklist[i]) | oldLink = u(linklist[i]) | ||
linklist[i] = f(linklist[i], **kwargs) | linklist[i] = f(linklist[i], **kwargs) | ||
| Line 670: | Line 769: | ||
f, params = f | f, params = f | ||
for i in range(len(templatelist)): | for i in range(len(templatelist)): | ||
if templatelist[i] is not None: | if templatelist[i] is not None and isinstance(templatelist[i], template): | ||
oldTemplate = u(templatelist[i]) | oldTemplate = u(templatelist[i]) | ||
templatelist[i] = f(templatelist[i], **kwargs) | templatelist[i] = f(templatelist[i], **kwargs) | ||
| Line 702: | Line 801: | ||
return setFilterName(curry(dumbReplacement, rs), u'DumbReplacements(' + u(rs) + u')') | return setFilterName(curry(dumbReplacement, rs), u'DumbReplacements(' + u(rs) + u')') | ||
def dumbReplace(subject, replacement): | def dumbReplace(subject, replacement): | ||
return setFilterName(dumbReplaces({subject: replacement}), u'DumbReplacement(' + u(subject) + u' | return setFilterName(dumbReplaces({subject: replacement}), u'DumbReplacement(' + u(subject) + u' \u2192 ' + u(replacement) + u')') | ||
def wordRegex(word, **kwargs): | def wordRegex(word, **kwargs): | ||
flags = None | flags = None | ||
| Line 709: | Line 808: | ||
word = word[0] | word = word[0] | ||
word = u(re.sub(r'[-_ ]+', r'[-_ ]', u(word))) | word = u(re.sub(r'[-_ ]+', r'[-_ ]', u(word))) | ||
word = u(r"(?<![\u00E8-\u00F8\xe8-\xf8\w])(?<!'')(?<!" + r'"' + | word = u(r"(?<![\u00E8-\u00F8\xe8-\xf8\w])(?<!'')(?<!" + r'"' + ")(?:\\b|(?<=[ \\[\\]\\(\\):;.,\"'*\\xab\\xbb])|^)" + word + r"(?:\b(?![\u00E8-\u00F8\xe8-\xf8\w])(?!''|" + r'"' + ")|(?=[ \\[\\]\(\\):;.,\"'*\\xab\\xbb])|$)") | ||
if flags is None: | if flags is None: | ||
return word | return word | ||
| Line 734: | Line 833: | ||
else: | else: | ||
rs[wordRegex(w, **kwargs)] = correct | rs[wordRegex(w, **kwargs)] = correct | ||
return setFilterName(regexes(rs), u'WordFilter(' + u'/'.join(badwords2) + u' | return setFilterName(regexes(rs), u'WordFilter(' + u'/'.join(badwords2) + u' \u2192 ' + correct + u')') | ||
def enforceCapitalization(*words, **kwargs): | def enforceCapitalization(*words, **kwargs): | ||
for w in words: | for w in words: | ||
| Line 948: | Line 1,047: | ||
except: | except: | ||
error('Couldn\'t grab page', p) | error('Couldn\'t grab page', p) | ||
coderegex = compileRegex(r'^(?: | coderegex = compileRegex(r'^(?: [^\r\n]*(?:[\r\n]+|$))+', re.MULTILINE) | ||
trimcode = compileRegex(r'^ | trimcode = compileRegex(r'^ |</?nowiki>', re.MULTILINE) | ||
for m in coderegex.finditer(code): | for m in coderegex.finditer(code): | ||
try: | try: | ||
| Line 1,074: | Line 1,173: | ||
languages[u(key)][curlang] = u(value) | languages[u(key)][curlang] = u(value) | ||
else: | else: | ||
pass | pass | ||
return languages | return languages | ||
def languagesFilter(languages, commonto=None, prefix=None, suffix=None, exceptions=[]): | def languagesFilter(languages, commonto=None, prefix=None, suffix=None, exceptions=[]): | ||
| Line 1,116: | Line 1,215: | ||
else: | else: | ||
addSafeFilter(f, language=targetPageLang) | addSafeFilter(f, language=targetPageLang) | ||
def getRandBits(): | |||
return random.getrandbits(128) | |||
def getFileHash(filename): | |||
h = hashlib.md5() | |||
f = open(filename, 'rb') | |||
for i in f.readlines(): | |||
h.update(i) | |||
f.close() | |||
return u(h.hexdigest()) | |||
def deleteFile(*fs): | |||
for f in fs: | |||
try: | |||
os.remove(f) | |||
except: | |||
pass | |||
def programExists(programName): | |||
try: | |||
result = subprocess.call(['which', programName]) | |||
return result == 0 | |||
except: | |||
return False | |||
def run(): | def run(): | ||
global config | global config | ||
| Line 1,129: | Line 1,249: | ||
doPageRequests(force=False) | doPageRequests(force=False) | ||
updateEditCount() | updateEditCount() | ||
import rcNotify | runScheduledTasks() | ||
try: | |||
import rcNotify | |||
rcNotify.main(once=True) | |||
except: | |||
pass | |||
try: | try: | ||
subprocess.Popen(['killall', 'cpulimit']).communicate() | subprocess.Popen(['killall', 'cpulimit']).communicate() | ||