Revision: 7061
Author: alexsh
Date: 2009-07-14 12:13:56 +0000 (Tue, 14 Jul 2009)
Log Message:
-----------
update_namespaces.py
*Set update main family.py function default to false. (I don't think it's a good idea for no parameter to update main family.)
Modified Paths:
--------------
trunk/pywikipedia/maintenance/update_namespaces.py
Modified: trunk/pywikipedia/maintenance/update_namespaces.py
===================================================================
--- trunk/pywikipedia/maintenance/update_namespaces.py 2009-07-14 12:12:17 UTC (rev 7060)
+++ trunk/pywikipedia/maintenance/update_namespaces.py 2009-07-14 12:13:56 UTC (rev 7061)
@@ -92,7 +92,7 @@
return True
return True
-def check_and_update(families, update_main):
+def check_and_update(families, update_main = False):
for family in families:
family = wikipedia.Family(family)
result = family_check.check_family(family)
Revision: 7060
Author: alexsh
Date: 2009-07-14 12:12:17 +0000 (Tue, 14 Jul 2009)
Log Message:
-----------
family_check.py
*remove import output from wp (wikipedia is imported)
*set API data receive from self post to query.py.
Modified Paths:
--------------
trunk/pywikipedia/maintenance/family_check.py
Modified: trunk/pywikipedia/maintenance/family_check.py
===================================================================
--- trunk/pywikipedia/maintenance/family_check.py 2009-07-14 07:37:25 UTC (rev 7059)
+++ trunk/pywikipedia/maintenance/family_check.py 2009-07-14 12:12:17 UTC (rev 7060)
@@ -1,42 +1,33 @@
import sys
sys.path.append('..')
-import wikipedia, config
-from wikipedia import output
+import wikipedia, config, query
-import simplejson
-
def check_namespaces(site):
try:
if not site.apipath():
- output(u'Warning! %s has no apipath() defined!' % site)
+ wikipedia.output(u'Warning! %s has no apipath() defined!' % site)
return
except NotImplementedError:
# TODO: If use Special:Export to get XML file and parse details in <namespaces></namespaces>,
# we can get the namespace names without API.
- output(u'Warning! %s is not support API!' % site)
+ wikipedia.output(u'Warning! %s is not support API!' % site)
return
predata = { 'action': 'query',
'meta': 'siteinfo',
- 'siprop': 'namespaces',
- 'format': 'json'}
+ 'siprop': 'namespaces'}
try:
- response, json = site.postForm(site.apipath(), predata)
+ data = query.GetData(predata, site = site, useAPI = True)['query']['namespaces']
except wikipedia.ServerError, e:
- output(u'Warning! %s: %s' % (site, e))
+ wikipedia.output(u'Warning! %s: %s' % (site, e))
return
- try:
- data = simplejson.loads(json)
- except ValueError:
- output(u'Warning! %s is defined but does not exist!' % site)
- return
result = []
- for namespace in data['query']['namespaces'].itervalues():
+ for namespace in data.itervalues():
try:
defined_namespace = site.namespace(namespace['id'])
except KeyError:
- output(u'Warning! %s has no _default for namespace %s' % \
+ wikipedia.output(u'Warning! %s has no _default for namespace %s' % \
(site, namespace['id']))
defined_namespace = None
@@ -45,16 +36,16 @@
return result
def check_family(family):
- output(u'Checking namespaces for %s' % family.name)
+ wikipedia.output(u'Checking namespaces for %s' % family.name)
result = {}
for lang in family.langs:
if not family.obsolete.has_key(lang):
site = wikipedia.getSite(lang, family)
- output(u'Checking %s' % site)
+ wikipedia.output(u'Checking %s' % site)
namespaces = check_namespaces(site)
if namespaces:
for id, name, defined_namespace in namespaces:
- output(u'Namespace %s for %s is %s, %s is defined in family file.' % \
+ wikipedia.output(u'Namespace %s for %s is %s, %s is defined in family file.' % \
(id, site, name, defined_namespace))
result[lang] = namespaces
return result
@@ -64,8 +55,8 @@
wikipedia.handleArgs()
family = wikipedia.Family(wikipedia.default_family)
result = check_family(family)
- output(u'Writing raw Python dictionary to stdout.')
- output(u'Format is: (namespace_id, namespace_name, predefined_namespace)')
+ wikipedia.output(u'Writing raw Python dictionary to stdout.')
+ wikipedia.output(u'Format is: (namespace_id, namespace_name, predefined_namespace)')
print result
finally:
wikipedia.stopme()
Revision: 7059
Author: alexsh
Date: 2009-07-14 07:37:25 +0000 (Tue, 14 Jul 2009)
Log Message:
-----------
*config.py: preset use_api for run query.getData in all action
*family.py: Fix: nice_get_address not use nicepath.
Modified Paths:
--------------
trunk/pywikipedia/config.py
trunk/pywikipedia/family.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2009-07-14 07:32:04 UTC (rev 7058)
+++ trunk/pywikipedia/config.py 2009-07-14 07:37:25 UTC (rev 7059)
@@ -76,6 +76,10 @@
# Login using the API. This is less likely to break.
use_api_login = False
+# Enable data recieve from all avalible API.
+
+use_api = False
+
# Get the names of all known families, and initialize
# with empty dictionaries
import wikipediatools as _wt
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2009-07-14 07:32:04 UTC (rev 7058)
+++ trunk/pywikipedia/family.py 2009-07-14 07:37:25 UTC (rev 7059)
@@ -3600,7 +3600,7 @@
# The URL to get a page, in the format indexed by Google.
def nice_get_address(self, code, name):
- return '/wiki/%s' % (name)
+ return '%s%s' % (self.nicepath(code), name)
def edit_address(self, code, name):
return '%s?title=%s&action=edit&useskin=monobook' % (self.path(code), name)
Revision: 7057
Author: btongminh
Date: 2009-07-13 20:33:34 +0000 (Mon, 13 Jul 2009)
Log Message:
-----------
* Remove the empty lines while cleaning up the command page
* Comment the code!
Modified Paths:
--------------
trunk/pywikipedia/commonsdelinker/image_replacer.py
Modified: trunk/pywikipedia/commonsdelinker/image_replacer.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/image_replacer.py 2009-07-13 16:05:20 UTC (rev 7056)
+++ trunk/pywikipedia/commonsdelinker/image_replacer.py 2009-07-13 20:33:34 UTC (rev 7057)
@@ -59,6 +59,8 @@
def read_replace_log(self):
+ """ The actual worker method """
+
# FIXME: Make sqlite3 compatible
insert = """INSERT INTO %s (timestamp, old_image, new_image,
status, user, comment) VALUES (%%s, %%s, %%s,
@@ -80,6 +82,7 @@
username = None
try:
+ # Fetch revision history
revisions = self.get_history(page.title(), since, username)
# Fetch the page any way, to prevent editconflicts
old_text = text = page.get()
@@ -93,38 +96,58 @@
#self.site.conn.connect()
return time.sleep(self.config['timeout'])
+ # We're being killed
if '{{stop}}' in text.lower():
output(u'Found {{stop}} on command page. Not replacing anything.')
return time.sleep(self.config['timeout'])
+ # Sort oldest first
revisions.sort(key = lambda rev: rev['timestamp'])
+
+ # Find all commands
replacements = self.template.finditer(text)
remove_from_list = []
count = 0
for replacement in replacements:
if count == self.config.get('replacer_rate_limit', -1): break
+ # Find out who's to blame
res = self.examine_revision_history(
revisions, replacement, username)
if res and self.allowed_replacement(replacement) and \
replacement.group(1) != replacement.group(2):
+ # Insert replace command into database
self.cursor.execute(insert, res)
+ # Tag line for removal
remove_from_list.append(replacement.group(0))
output('Replacing %s by %s: %s' % replacement.groups())
count += 1
+
+ # Save all replaces to database
self.database.commit()
if remove_from_list and self.config.get('clean_list', False):
+ # Cleanup the command page
while True:
try:
for remove in remove_from_list:
text = text.replace(remove, u'')
+ # Kill the freaky CommonsDupes
+ text = text.replacer('== Dummy section, heading can be deleted (using [http://tools.wikimedia.de/~magnus/commons_dupes.php CommonsDupes]) ==', '')
+ # Kill the freaky whitespace
+ text = text.replace('\r', '')
+ while '\n\n\n' in text:
+ text = text.replace('\n\n\n', '\n')
+ # Save the page
page.put(text.strip(), comment = 'Removing images being processed')
return
except wikipedia.EditConflict:
+ # Try again
text = page.get()
def get_history(self, title, since, username):
+ """ Fetch the last 50 revisions using the API """
+
address = self.site.api_address()
predata = [
('action', 'query'),
@@ -149,6 +172,8 @@
return page.get('revisions', [])
def examine_revision_history(self, revisions, replacement, username):
+ """ Find out who is to blame for a replacement """
+
for revision in revisions:
if replacement.group(0) in revision['*']:
db_time = db_timestamp(revision['timestamp'])
@@ -163,6 +188,9 @@
return
def read_finished_replacements(self):
+ """ Find out which replacements have been completed and add them to
+ the reporters queue. """
+
self.cursor.execute('START TRANSACTION WITH CONSISTENT SNAPSHOT')
self.cursor.execute("""SELECT old_image, new_image, user, comment FROM
%s WHERE status = 'done' AND timestamp >= %i""" % \
@@ -198,6 +226,8 @@
time.sleep(self.config['timeout'] * 2)
def allowed_replacement(self, replacement):
+ """ Method to prevent World War III """
+
for source, target in self.disallowed_replacements:
if source.search(replacement.group(1)) and \
target.search(replacement.group(2)):
@@ -205,6 +235,8 @@
return True
class Reporter(threadpool.Thread):
+ """ Asynchronous worker to report finished replacements to file pages. """
+
def __init__(self, pool, site, config):
self.site = wikipedia.getSite(site.lang, site.family,
site.user, True)
Revision: 7055
Author: russblau
Date: 2009-07-13 13:47:49 +0000 (Mon, 13 Jul 2009)
Log Message:
-----------
Port r7035 from trunk
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2009-07-13 13:43:48 UTC (rev 7054)
+++ branches/rewrite/pywikibot/site.py 2009-07-13 13:47:49 UTC (rev 7055)
@@ -102,10 +102,14 @@
raise NoSuchSite("Language %s in family %s is obsolete"
% (self.__code, self.__family.name))
if self.__code not in self.languages():
- if self.__code == 'zh-classic' and 'zh-classical' in self.languages():
+ if self.__code == 'zh-classic' \
+ and 'zh-classical' in self.languages():
self.__code = 'zh-classical'
- # database hack (database is varchar[10] -> zh-classical
- # is cut to zh-classic.
+ # database hack (database is varchar[10], so zh-classical
+ # is cut to zh-classic)
+ elif self.__family.name in self.__family.langs.keys() \
+ or len(self.__family.langs) == 1:
+ self.__code = self.__family.name
else:
raise NoSuchSite("Language %s does not exist in family %s"
% (self.__code, self.__family.name))
Revision: 7054
Author: russblau
Date: 2009-07-13 13:43:48 +0000 (Mon, 13 Jul 2009)
Log Message:
-----------
more flexibility in parsing NamespaceFilterPageGenerator arguments
Modified Paths:
--------------
branches/rewrite/pywikibot/pagegenerators.py
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2009-07-13 13:43:10 UTC (rev 7053)
+++ branches/rewrite/pywikibot/pagegenerators.py 2009-07-13 13:43:48 UTC (rev 7054)
@@ -605,9 +605,13 @@
for i in xrange(len(namespaces)):
ns = namespaces[i]
if isinstance(ns, basestring):
- index = site.getNamespaceIndex(ns)
- if index is None:
- raise ValueError(u'Unknown namespace: %s' % ns)
+ try:
+ # namespace might be given as str representation of int
+ index = int(ns)
+ except ValueError:
+ index = site.getNamespaceIndex(ns)
+ if index is None:
+ raise ValueError(u'Unknown namespace: %s' % ns)
namespaces[i] = index
for page in generator:
if page.namespace() in namespaces: