jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes
......................................................................
[PEP8] changes
Change-Id: I203871505f12d2e248d460b3c9848ed297a119b4
---
M spellcheck.py
1 file changed, 141 insertions(+), 113 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/spellcheck.py b/spellcheck.py
index 5bb524c..d0f1ad2 100644
--- a/spellcheck.py
+++ b/spellcheck.py
@@ -51,15 +51,17 @@
"""
#
# (C) Andre Engels, 2005
-# (C) Pywikipedia bot team, 2006-2012
+# (C) Pywikibot team, 2006-2013
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
#
-import re, sys
-import string, codecs
+import re
+import sys
+import string
+import codecs
import wikipedia as pywikibot
from pywikibot import i18n
import pagegenerators
@@ -69,7 +71,8 @@
def __init__(self, text):
self.style = text
-def distance(a,b):
+
+def distance(a, b):
# Calculates the Levenshtein distance between a and b.
# That is, the number of edits needed to change one into
# the other, where one edit is the addition, removal or
@@ -78,23 +81,25 @@
n, m = len(a), len(b)
if n > m:
# Make sure n <= m, to use O(min(n,m)) space
- a,b = b,a
- n,m = m,n
- current = range(n+1)
- for i in range(1,m+1):
- previous, current = current, [i]+[0]*m
- for j in range(1,n+1):
- add, delete = previous[j]+1, current[j-1]+1
- change = previous[j-1]
- if a[j-1] != b[i-1]:
+ a, b = b, a
+ n, m = m, n
+ current = range(n + 1)
+ for i in range(1, m + 1):
+ previous, current = current, [i] + [0] * m
+ for j in range(1, n + 1):
+ add, delete = previous[j] + 1, current[j - 1] + 1
+ change = previous[j - 1]
+ if a[j - 1] != b[i - 1]:
change = change + 1
current[j] = min(add, delete, change)
return current[n]
+
def getalternatives(string):
# Find possible correct words for the incorrect word string
basetext = pywikibot.input(
- u"Give a text that should occur in the words to be checked.\nYou can choose
to give no text, but this will make searching slow:")
+ u"Give a text that should occur in the words to be checked.\n"
+ u"You can choose to give no text, but this will make searching slow:")
basetext = basetext.lower()
simwords = {}
for i in xrange(11):
@@ -102,7 +107,7 @@
for alt in knownwords.keys():
dothis = not basetext or basetext in alt.lower()
if dothis:
- diff = distance(string,alt)
+ diff = distance(string, alt)
if diff < 11:
if knownwords[alt] == alt:
simwords[diff] += [alt]
@@ -114,31 +119,35 @@
posswords += simwords[i]
return posswords[:30]
+
def uncap(string):
# uncapitalize the first word of the string
if len(string) > 1:
- return string[0].lower()+string[1:]
+ return string[0].lower() + string[1:]
else:
return string.lower()
+
def cap(string):
# uncapitalize the first word of the string
- return string[0].upper()+string[1:]
+ return string[0].upper() + string[1:]
+
def askAlternative(word, context=None, title=''):
correct = None
- pywikibot.output(u"="*60)
+ pywikibot.output(u"=" * 60)
pywikibot.output(u"Found unknown word '%s' in '%s'" %
(word, title))
if context:
pywikibot.output(u"Context:")
- pywikibot.output(u""+context)
- pywikibot.output(u"-"*60)
+ pywikibot.output(u"" + context)
+ pywikibot.output(u"-" * 60)
while not correct:
for i in xrange(len(Word(word).getAlternatives())):
pywikibot.output(u"%s: Replace by '%s'"
- % (i+1,
- Word(word).getAlternatives()[i].replace('_','
')))
- pywikibot.output(u"a: Add '%s' as correct"%word)
+ % (i + 1,
+ Word(word).getAlternatives()[i].replace('_',
+ ' ')))
+ pywikibot.output(u"a: Add '%s' as correct" % word)
if word[0].isupper():
pywikibot.output(u"c: Add '%s' as correct" %
(uncap(word)))
pywikibot.output(u"i: Ignore once (default)")
@@ -149,7 +158,8 @@
pywikibot.output(u"*: Edit by hand")
pywikibot.output(u"x: Do not check the rest of this page")
answer = pywikibot.input(u":")
- if answer == "": answer = "i"
+ if answer == "":
+ answer = "i"
if answer in "aAiIpP":
correct = word
if answer in "aA":
@@ -166,9 +176,9 @@
correct != uncap(word) and \
correct != word:
try:
- knownwords[word] += [correct.replace(' ','_')]
+ knownwords[word] += [correct.replace(' ', '_')]
except KeyError:
- knownwords[word] = [correct.replace(' ','_')]
+ knownwords[word] = [correct.replace(' ', '_')]
newwords.append(word)
knownwords[correct] = correct
newwords.append(correct)
@@ -181,69 +191,71 @@
if possible:
print "Found alternatives:"
for pos in possible:
- pywikibot.output(" %s"%pos)
+ pywikibot.output(" %s" % pos)
else:
print "No similar words found."
- elif answer=="*":
+ elif answer == "*":
correct = edit
- elif answer=="x":
+ elif answer == "x":
correct = endpage
else:
for i in xrange(len(Word(word).getAlternatives())):
- if answer == str(i+1):
- correct = Word(word).getAlternatives()[i].replace('_','
')
+ if answer == str(i + 1):
+ correct = Word(word).getAlternatives()[i].replace('_', '
')
return correct
+
def removeHTML(page):
# TODO: Consider removing this; this stuff can be done by
# cosmetic_changes.py
result = page
- result = result.replace('Ä',u'Ä')
- result = result.replace('ä',u'ä')
- result = result.replace('Ë',u'Ë')
- result = result.replace('ë',u'ë')
- result = result.replace('Ï',u'Ï')
- result = result.replace('ï',u'ï')
- result = result.replace('Ö',u'Ö')
- result = result.replace('ö',u'ö')
- result = result.replace('Ü',u'Ü')
- result = result.replace('ü',u'ü')
- result = result.replace('Á',u'Á')
- result = result.replace('á',u'á')
- result = result.replace('É',u'É')
- result = result.replace('é',u'é')
- result = result.replace('Í',u'Í')
- result = result.replace('í',u'í')
- result = result.replace('Ó',u'Ó')
- result = result.replace('ó',u'ó')
- result = result.replace('Ú',u'Ú')
- result = result.replace('ú',u'ú')
- result = result.replace('À',u'À')
- result = result.replace('à',u'à')
- result = result.replace('È',u'È')
- result = result.replace('è',u'è')
- result = result.replace('Ì',u'Ì')
- result = result.replace('ì',u'ì')
- result = result.replace('Ò',u'Ò')
- result = result.replace('ò',u'ò')
- result = result.replace('Ù',u'Ù')
- result = result.replace('ù',u'ù')
- result = result.replace('Â',u'Â')
- result = result.replace('â',u'â')
- result = result.replace('Ê',u'Ê')
- result = result.replace('ê',u'ê')
- result = result.replace('Î',u'Î')
- result = result.replace('î',u'î')
- result = result.replace('Ô',u'Ô')
- result = result.replace('ô',u'ô')
- result = result.replace('Û',u'Û')
- result = result.replace('û',u'û')
- result = result.replace('Å',u'Å')
- result = result.replace('å',u'å')
- result = result.replace('°',u'°')
+ result = result.replace('Ä', u'Ä')
+ result = result.replace('ä', u'ä')
+ result = result.replace('Ë', u'Ë')
+ result = result.replace('ë', u'ë')
+ result = result.replace('Ï', u'Ï')
+ result = result.replace('ï', u'ï')
+ result = result.replace('Ö', u'Ö')
+ result = result.replace('ö', u'ö')
+ result = result.replace('Ü', u'Ü')
+ result = result.replace('ü', u'ü')
+ result = result.replace('Á', u'Á')
+ result = result.replace('á', u'á')
+ result = result.replace('É', u'É')
+ result = result.replace('é', u'é')
+ result = result.replace('Í', u'Í')
+ result = result.replace('í', u'í')
+ result = result.replace('Ó', u'Ó')
+ result = result.replace('ó', u'ó')
+ result = result.replace('Ú', u'Ú')
+ result = result.replace('ú', u'ú')
+ result = result.replace('À', u'À')
+ result = result.replace('à', u'à')
+ result = result.replace('È', u'È')
+ result = result.replace('è', u'è')
+ result = result.replace('Ì', u'Ì')
+ result = result.replace('ì', u'ì')
+ result = result.replace('Ò', u'Ò')
+ result = result.replace('ò', u'ò')
+ result = result.replace('Ù', u'Ù')
+ result = result.replace('ù', u'ù')
+ result = result.replace('Â', u'Â')
+ result = result.replace('â', u'â')
+ result = result.replace('Ê', u'Ê')
+ result = result.replace('ê', u'ê')
+ result = result.replace('Î', u'Î')
+ result = result.replace('î', u'î')
+ result = result.replace('Ô', u'Ô')
+ result = result.replace('ô', u'ô')
+ result = result.replace('Û', u'Û')
+ result = result.replace('û', u'û')
+ result = result.replace('Å', u'Å')
+ result = result.replace('å', u'å')
+ result = result.replace('°', u'°')
return result
-def spellcheck(page, checknames = True, knownonly = False, title=''):
+
+def spellcheck(page, checknames=True, knownonly=False, title=''):
pageskip = []
text = page
if correct_html_codes:
@@ -251,30 +263,33 @@
loc = 0
while True:
wordsearch = re.compile(r'([\s\=\<\>\_]*)([^\s\=\<\>\_]+)')
- match = wordsearch.search(text,loc)
+ match = wordsearch.search(text, loc)
if not match:
# No more words on this page
break
loc += len(match.group(1))
bigword = Word(match.group(2))
smallword = bigword.derive()
- if not Word(smallword).isCorrect(checkalternative = knownonly) and \
+ if not Word(smallword).isCorrect(checkalternative=knownonly) and \
(checknames or not smallword[0].isupper()):
replacement = askAlternative(smallword,
- context=text[max(0,loc-40):loc +
len(match.group(2))+40],
+ context=text[
+ max(0, loc - 40):loc + len(
+ match.group(2)) + 40],
title=title)
if replacement == edit:
import editarticle
editor = editarticle.TextEditor()
# TODO: Don't know to which index to jump
- newtxt = editor.edit(text, jumpIndex = 0, highlight=smallword)
+ newtxt = editor.edit(text, jumpIndex=0, highlight=smallword)
if newtxt:
text = newtxt
elif replacement == endpage:
loc = len(text)
else:
replacement = bigword.replace(replacement)
- text = text[:loc] + replacement + text[loc+len(match.group(2)):]
+ text = text[:loc] + replacement + text[
+ loc + len(match.group(2)):]
loc += len(replacement)
if knownonly == 'plus' and text != page:
knownonly = False
@@ -288,13 +303,14 @@
class Word(object):
- def __init__(self,text):
+
+ def __init__(self, text):
self.word = text
def __str__(self):
return self.word
- def __cmp__(self,other):
+ def __cmp__(self, other):
return self.word.__cmp__(str(other))
def derive(self):
@@ -309,51 +325,56 @@
# Remove barred links
if shortword.rfind('|') != -1:
if -1 < shortword.rfind('[[') < shortword.rfind('|'):
- shortword = shortword[:shortword.rfind('[[')] +
shortword[shortword.rfind('|')+1:]
+ shortword = shortword[:shortword.rfind('[[')] + shortword[
+ shortword.rfind('|') + 1:]
else:
- shortword = shortword[shortword.rfind('|')+1:]
- shortword = shortword.replace('[','')
- shortword = shortword.replace(']','')
+ shortword = shortword[shortword.rfind('|') + 1:]
+ shortword = shortword.replace('[', '')
+ shortword = shortword.replace(']', '')
# Remove non-alphanumerical characters at the start
try:
while shortword[0] in string.punctuation:
- shortword=shortword[1:]
+ shortword = shortword[1:]
except IndexError:
return ""
# Remove non-alphanumerical characters at the end; no need for the
# try here because if things go wrong here, they should have gone
# wrong before
while shortword[-1] in string.punctuation:
- shortword=shortword[:-1]
+ shortword = shortword[:-1]
# Do not check URLs
if shortword.startswith("http://"):
- shortword=""
+ shortword = ""
# Do not check 'words' with only numerical characters
number = True
for i in xrange(len(shortword)):
- if not (shortword[i] in string.punctuation or shortword[i] in
string.digits):
+ if not (shortword[i] in string.punctuation or
+ shortword[i] in string.digits):
number = False
if number:
shortword = ""
return shortword
- def replace(self,rep):
- # Replace the short form by 'rep'. Keeping simple for now - if the
- # short form is part of the long form, replace it. If it is not, ask
- # the user
+ def replace(self, rep):
+ """Replace the short form by 'rep'. Keeping simple for now
- if the
+ short form is part of the long form, replace it. If it is not, ask the
+ user
+
+ """
if rep == self.derive():
return self.word
if self.derive() not in self.word:
return pywikibot.input(
u"Please give the result of replacing %s by %s in %s:"
% (self.derive(), rep, self.word))
- return self.word.replace(self.derive(),rep)
+ return self.word.replace(self.derive(), rep)
- def isCorrect(self,checkalternative = False):
- # If checkalternative is True, the word will only be found incorrect if
- # it is on the spelling list as a spelling error. Otherwise it will
- # be found incorrect if it is not on the list as a correctly spelled
- # word.
+ def isCorrect(self, checkalternative=False):
+ """If checkalternative is True, the word will only be found
incorrect
+ if it is on the spelling list as a spelling error. Otherwise it will be
+ found incorrect if it is not on the list as a correctly spelled word.
+
+ """
if self.word == "":
return True
if self.word in pageskip:
@@ -366,7 +387,8 @@
except KeyError:
pass
if self.word != uncap(self.word):
- return Word(uncap(self.word)).isCorrect(checkalternative=checkalternative)
+ return Word(uncap(self.word)).isCorrect(
+ checkalternative=checkalternative)
else:
if checkalternative:
if checklang == 'nl' and self.word.endswith("'s"):
@@ -401,11 +423,12 @@
def declare_correct(self):
knownwords[self.word] = self.word
- def declare_alternative(self,alt):
+ def declare_alternative(self, alt):
if not alt in knownwords[self.word]:
knownwords[self.word].append(word)
newwords.append(self.word)
return self.alternatives
+
def checkPage(page, checknames=True, knownonly=False):
try:
@@ -413,10 +436,12 @@
except pywikibot.Error:
pass
else:
- text = spellcheck(text, checknames=checknames, knownonly=knownonly,
title=page.title())
+ text = spellcheck(text, checknames=checknames, knownonly=knownonly,
+ title=page.title())
if text != page.get():
summary = i18n.twtranslate(page.site, 'spellcheck-checking')
page.put(text, summary)
+
try:
pageskip = []
@@ -460,10 +485,10 @@
if not checklang:
checklang = mysite.language()
filename = pywikibot.config.datafilepath('externals/spelling',
- 'spelling-' + checklang + '.txt')
+ 'spelling-' + checklang +
'.txt')
print "Getting wordlist"
try:
- f = codecs.open(filename, 'r', encoding = mysite.encoding())
+ f = codecs.open(filename, 'r', encoding=mysite.encoding())
for line in f.readlines():
# remove trailing newlines and carriage returns
try:
@@ -495,10 +520,13 @@
raise
try:
if newpages:
- for (page, date, length, loggedIn, user, comment) in
pywikibot.getSite().newpages(1000):
+ for (page, date, length, loggedIn, user,
+ comment) in pywikibot.getSite().newpages(1000):
checkPage(page, checknames, knownonly)
elif start:
- for page in
pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start,includeredirects=False)):
+ for page in pagegenerators.PreloadingGenerator(
+ pagegenerators.AllpagesPageGenerator(start=start,
+ includeredirects=False)):
checkPage(page, checknames, knownonly)
if longpages:
@@ -509,7 +537,7 @@
title = ' '.join(title)
while title != '':
try:
- page = pywikibot.Page(mysite,title)
+ page = pywikibot.Page(mysite, title)
text = page.get()
except pywikibot.NoPage:
print "Page does not exist."
@@ -521,14 +549,14 @@
finally:
pywikibot.stopme()
filename = pywikibot.config.datafilepath('externals/spelling',
- 'spelling-' + checklang + '.txt')
+ 'spelling-' + checklang +
'.txt')
if rebuild:
list = knownwords.keys()
list.sort()
- f = codecs.open(filename, 'w', encoding = mysite.encoding())
+ f = codecs.open(filename, 'w', encoding=mysite.encoding())
else:
list = newwords
- f = codecs.open(filename, 'a', encoding = mysite.encoding())
+ f = codecs.open(filename, 'a', encoding=mysite.encoding())
for word in list:
if Word(word).isCorrect():
if word != uncap(word):
@@ -536,7 +564,7 @@
# Capitalized form of a word that is in the list
# uncapitalized
continue
- f.write("1 %s\n"%word)
+ f.write("1 %s\n" % word)
else:
- f.write("0 %s %s\n"%(word," ".join(knownwords[word])))
+ f.write("0 %s %s\n" % (word, "
".join(knownwords[word])))
f.close()
--
To view, visit
https://gerrit.wikimedia.org/r/98109
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I203871505f12d2e248d460b3c9848ed297a119b4
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot