Pywikipedia-svn July 2009

pywikipedia-svn@lists.wikimedia.org

9 participants
94 discussions

SVN: [7061] trunk/pywikipedia/maintenance/update_namespaces.py

by alexsh＠svn.wikimedia.org

Revision: 7061 Author: alexsh Date: 2009-07-14 12:13:56 +0000 (Tue, 14 Jul 2009) Log Message: ----------- update_namespaces.py *Set update main family.py function default to false. (I don't think it's a good idea for no parameter to update main family.) Modified Paths: -------------- trunk/pywikipedia/maintenance/update_namespaces.py Modified: trunk/pywikipedia/maintenance/update_namespaces.py =================================================================== --- trunk/pywikipedia/maintenance/update_namespaces.py 2009-07-14 12:12:17 UTC (rev 7060) +++ trunk/pywikipedia/maintenance/update_namespaces.py 2009-07-14 12:13:56 UTC (rev 7061) @@ -92,7 +92,7 @@ return True return True -def check_and_update(families, update_main): +def check_and_update(families, update_main = False): for family in families: family = wikipedia.Family(family) result = family_check.check_family(family)

14 years, 10 months

SVN: [7060] trunk/pywikipedia/maintenance/family_check.py

by alexsh＠svn.wikimedia.org

Revision: 7060 Author: alexsh Date: 2009-07-14 12:12:17 +0000 (Tue, 14 Jul 2009) Log Message: ----------- family_check.py *remove import output from wp (wikipedia is imported) *set API data receive from self post to query.py. Modified Paths: -------------- trunk/pywikipedia/maintenance/family_check.py Modified: trunk/pywikipedia/maintenance/family_check.py =================================================================== --- trunk/pywikipedia/maintenance/family_check.py 2009-07-14 07:37:25 UTC (rev 7059) +++ trunk/pywikipedia/maintenance/family_check.py 2009-07-14 12:12:17 UTC (rev 7060) @@ -1,42 +1,33 @@ import sys sys.path.append('..') -import wikipedia, config -from wikipedia import output +import wikipedia, config, query -import simplejson - def check_namespaces(site): try: if not site.apipath(): - output(u'Warning! %s has no apipath() defined!' % site) + wikipedia.output(u'Warning! %s has no apipath() defined!' % site) return except NotImplementedError: # TODO: If use Special:Export to get XML file and parse details in <namespaces></namespaces>, # we can get the namespace names without API. - output(u'Warning! %s is not support API!' % site) + wikipedia.output(u'Warning! %s is not support API!' % site) return predata = { 'action': 'query', 'meta': 'siteinfo', - 'siprop': 'namespaces', - 'format': 'json'} + 'siprop': 'namespaces'} try: - response, json = site.postForm(site.apipath(), predata) + data = query.GetData(predata, site = site, useAPI = True)['query']['namespaces'] except wikipedia.ServerError, e: - output(u'Warning! %s: %s' % (site, e)) + wikipedia.output(u'Warning! %s: %s' % (site, e)) return - try: - data = simplejson.loads(json) - except ValueError: - output(u'Warning! %s is defined but does not exist!' % site) - return result = [] - for namespace in data['query']['namespaces'].itervalues(): + for namespace in data.itervalues(): try: defined_namespace = site.namespace(namespace['id']) except KeyError: - output(u'Warning! %s has no _default for namespace %s' % \ + wikipedia.output(u'Warning! %s has no _default for namespace %s' % \ (site, namespace['id'])) defined_namespace = None @@ -45,16 +36,16 @@ return result def check_family(family): - output(u'Checking namespaces for %s' % family.name) + wikipedia.output(u'Checking namespaces for %s' % family.name) result = {} for lang in family.langs: if not family.obsolete.has_key(lang): site = wikipedia.getSite(lang, family) - output(u'Checking %s' % site) + wikipedia.output(u'Checking %s' % site) namespaces = check_namespaces(site) if namespaces: for id, name, defined_namespace in namespaces: - output(u'Namespace %s for %s is %s, %s is defined in family file.' % \ + wikipedia.output(u'Namespace %s for %s is %s, %s is defined in family file.' % \ (id, site, name, defined_namespace)) result[lang] = namespaces return result @@ -64,8 +55,8 @@ wikipedia.handleArgs() family = wikipedia.Family(wikipedia.default_family) result = check_family(family) - output(u'Writing raw Python dictionary to stdout.') - output(u'Format is: (namespace_id, namespace_name, predefined_namespace)') + wikipedia.output(u'Writing raw Python dictionary to stdout.') + wikipedia.output(u'Format is: (namespace_id, namespace_name, predefined_namespace)') print result finally: wikipedia.stopme()

14 years, 10 months

SVN: [7059] trunk/pywikipedia

by alexsh＠svn.wikimedia.org

Revision: 7059 Author: alexsh Date: 2009-07-14 07:37:25 +0000 (Tue, 14 Jul 2009) Log Message: ----------- *config.py: preset use_api for run query.getData in all action *family.py: Fix: nice_get_address not use nicepath. Modified Paths: -------------- trunk/pywikipedia/config.py trunk/pywikipedia/family.py Modified: trunk/pywikipedia/config.py =================================================================== --- trunk/pywikipedia/config.py 2009-07-14 07:32:04 UTC (rev 7058) +++ trunk/pywikipedia/config.py 2009-07-14 07:37:25 UTC (rev 7059) @@ -76,6 +76,10 @@ # Login using the API. This is less likely to break. use_api_login = False +# Enable data recieve from all avalible API. + +use_api = False + # Get the names of all known families, and initialize # with empty dictionaries import wikipediatools as _wt Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2009-07-14 07:32:04 UTC (rev 7058) +++ trunk/pywikipedia/family.py 2009-07-14 07:37:25 UTC (rev 7059) @@ -3600,7 +3600,7 @@ # The URL to get a page, in the format indexed by Google. def nice_get_address(self, code, name): - return '/wiki/%s' % (name) + return '%s%s' % (self.nicepath(code), name) def edit_address(self, code, name): return '%s?title=%s&action=edit&useskin=monobook' % (self.path(code), name)

14 years, 10 months

SVN: [7058] trunk/pywikipedia/query.py

by alexsh＠svn.wikimedia.org

Revision: 7058 Author: alexsh Date: 2009-07-14 07:32:04 +0000 (Tue, 14 Jul 2009) Log Message: ----------- Fix: some action run in POST method only, ref:/w/api.php documentatioin Modified Paths: -------------- trunk/pywikipedia/query.py Modified: trunk/pywikipedia/query.py =================================================================== --- trunk/pywikipedia/query.py 2009-07-13 20:33:34 UTC (rev 7057) +++ trunk/pywikipedia/query.py 2009-07-14 07:32:04 UTC (rev 7058) @@ -69,10 +69,18 @@ lastError = None retry_idle_time = 5 + postAC = [ + 'edit', 'login', 'purge', 'rollback', 'delete', 'undelete', 'protect', + 'block', 'unblock', 'move', 'emailuser','import', 'userrights', + ] + while retryCount >= 0: try: jsontext = "Nothing received" - jsontext = site.getUrl( path, retry=True, data=data ) + if params['action'] in postAC: + res, jsontext = site.postData(path, urllib.urlencode(params.items())) + else: + jsontext = site.getUrl( path, retry=True, data=data ) # This will also work, but all unicode strings will need to be converted from \u notation # decodedObj = eval( jsontext )

14 years, 10 months

SVN: [7057] trunk/pywikipedia/commonsdelinker/image_replacer.py

by btongminh＠svn.wikimedia.org

Revision: 7057 Author: btongminh Date: 2009-07-13 20:33:34 +0000 (Mon, 13 Jul 2009) Log Message: ----------- * Remove the empty lines while cleaning up the command page * Comment the code! Modified Paths: -------------- trunk/pywikipedia/commonsdelinker/image_replacer.py Modified: trunk/pywikipedia/commonsdelinker/image_replacer.py =================================================================== --- trunk/pywikipedia/commonsdelinker/image_replacer.py 2009-07-13 16:05:20 UTC (rev 7056) +++ trunk/pywikipedia/commonsdelinker/image_replacer.py 2009-07-13 20:33:34 UTC (rev 7057) @@ -59,6 +59,8 @@ def read_replace_log(self): + """ The actual worker method """ + # FIXME: Make sqlite3 compatible insert = """INSERT INTO %s (timestamp, old_image, new_image, status, user, comment) VALUES (%%s, %%s, %%s, @@ -80,6 +82,7 @@ username = None try: + # Fetch revision history revisions = self.get_history(page.title(), since, username) # Fetch the page any way, to prevent editconflicts old_text = text = page.get() @@ -93,38 +96,58 @@ #self.site.conn.connect() return time.sleep(self.config['timeout']) + # We're being killed if '{{stop}}' in text.lower(): output(u'Found {{stop}} on command page. Not replacing anything.') return time.sleep(self.config['timeout']) + # Sort oldest first revisions.sort(key = lambda rev: rev['timestamp']) + + # Find all commands replacements = self.template.finditer(text) remove_from_list = [] count = 0 for replacement in replacements: if count == self.config.get('replacer_rate_limit', -1): break + # Find out who's to blame res = self.examine_revision_history( revisions, replacement, username) if res and self.allowed_replacement(replacement) and \ replacement.group(1) != replacement.group(2): + # Insert replace command into database self.cursor.execute(insert, res) + # Tag line for removal remove_from_list.append(replacement.group(0)) output('Replacing %s by %s: %s' % replacement.groups()) count += 1 + + # Save all replaces to database self.database.commit() if remove_from_list and self.config.get('clean_list', False): + # Cleanup the command page while True: try: for remove in remove_from_list: text = text.replace(remove, u'') + # Kill the freaky CommonsDupes + text = text.replacer('== Dummy section, heading can be deleted (using [http://tools.wikimedia.de/~magnus/commons_dupes.php CommonsDupes]) ==', '') + # Kill the freaky whitespace + text = text.replace('\r', '') + while '\n\n\n' in text: + text = text.replace('\n\n\n', '\n') + # Save the page page.put(text.strip(), comment = 'Removing images being processed') return except wikipedia.EditConflict: + # Try again text = page.get() def get_history(self, title, since, username): + """ Fetch the last 50 revisions using the API """ + address = self.site.api_address() predata = [ ('action', 'query'), @@ -149,6 +172,8 @@ return page.get('revisions', []) def examine_revision_history(self, revisions, replacement, username): + """ Find out who is to blame for a replacement """ + for revision in revisions: if replacement.group(0) in revision['*']: db_time = db_timestamp(revision['timestamp']) @@ -163,6 +188,9 @@ return def read_finished_replacements(self): + """ Find out which replacements have been completed and add them to + the reporters queue. """ + self.cursor.execute('START TRANSACTION WITH CONSISTENT SNAPSHOT') self.cursor.execute("""SELECT old_image, new_image, user, comment FROM %s WHERE status = 'done' AND timestamp >= %i""" % \ @@ -198,6 +226,8 @@ time.sleep(self.config['timeout'] * 2) def allowed_replacement(self, replacement): + """ Method to prevent World War III """ + for source, target in self.disallowed_replacements: if source.search(replacement.group(1)) and \ target.search(replacement.group(2)): @@ -205,6 +235,8 @@ return True class Reporter(threadpool.Thread): + """ Asynchronous worker to report finished replacements to file pages. """ + def __init__(self, pool, site, config): self.site = wikipedia.getSite(site.lang, site.family, site.user, True)

14 years, 10 months

SVN: [7056] branches/rewrite/pywikibot/site.py

by russblau＠svn.wikimedia.org

Revision: 7056 Author: russblau Date: 2009-07-13 16:05:20 +0000 (Mon, 13 Jul 2009) Log Message: ----------- overlooked import Modified Paths: -------------- branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2009-07-13 13:47:49 UTC (rev 7055) +++ branches/rewrite/pywikibot/site.py 2009-07-13 16:05:20 UTC (rev 7056) @@ -14,6 +14,7 @@ from pywikibot import deprecate_arg from pywikibot import config from pywikibot import deprecated +from pywikibot import pagegenerators from pywikibot.throttle import Throttle from pywikibot.data import api from pywikibot.exceptions import *

14 years, 10 months

SVN: [7055] branches/rewrite/pywikibot/site.py

by russblau＠svn.wikimedia.org

Revision: 7055 Author: russblau Date: 2009-07-13 13:47:49 +0000 (Mon, 13 Jul 2009) Log Message: ----------- Port r7035 from trunk Modified Paths: -------------- branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2009-07-13 13:43:48 UTC (rev 7054) +++ branches/rewrite/pywikibot/site.py 2009-07-13 13:47:49 UTC (rev 7055) @@ -102,10 +102,14 @@ raise NoSuchSite("Language %s in family %s is obsolete" % (self.__code, self.__family.name)) if self.__code not in self.languages(): - if self.__code == 'zh-classic' and 'zh-classical' in self.languages(): + if self.__code == 'zh-classic' \ + and 'zh-classical' in self.languages(): self.__code = 'zh-classical' - # database hack (database is varchar[10] -> zh-classical - # is cut to zh-classic. + # database hack (database is varchar[10], so zh-classical + # is cut to zh-classic) + elif self.__family.name in self.__family.langs.keys() \ + or len(self.__family.langs) == 1: + self.__code = self.__family.name else: raise NoSuchSite("Language %s does not exist in family %s" % (self.__code, self.__family.name))

14 years, 10 months

SVN: [7054] branches/rewrite/pywikibot/pagegenerators.py

by russblau＠svn.wikimedia.org

Revision: 7054 Author: russblau Date: 2009-07-13 13:43:48 +0000 (Mon, 13 Jul 2009) Log Message: ----------- more flexibility in parsing NamespaceFilterPageGenerator arguments Modified Paths: -------------- branches/rewrite/pywikibot/pagegenerators.py Modified: branches/rewrite/pywikibot/pagegenerators.py =================================================================== --- branches/rewrite/pywikibot/pagegenerators.py 2009-07-13 13:43:10 UTC (rev 7053) +++ branches/rewrite/pywikibot/pagegenerators.py 2009-07-13 13:43:48 UTC (rev 7054) @@ -605,9 +605,13 @@ for i in xrange(len(namespaces)): ns = namespaces[i] if isinstance(ns, basestring): - index = site.getNamespaceIndex(ns) - if index is None: - raise ValueError(u'Unknown namespace: %s' % ns) + try: + # namespace might be given as str representation of int + index = int(ns) + except ValueError: + index = site.getNamespaceIndex(ns) + if index is None: + raise ValueError(u'Unknown namespace: %s' % ns) namespaces[i] = index for page in generator: if page.namespace() in namespaces:

14 years, 10 months

SVN: [7053] branches/rewrite/pywikibot/site.py

by russblau＠svn.wikimedia.org

Revision: 7053 Author: russblau Date: 2009-07-13 13:43:10 +0000 (Mon, 13 Jul 2009) Log Message: ----------- workaround for https://bugzilla.wikimedia.org/show_bug.cgi?id=19640 Modified Paths: -------------- branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2009-07-13 13:33:21 UTC (rev 7052) +++ branches/rewrite/pywikibot/site.py 2009-07-13 13:43:10 UTC (rev 7053) @@ -1302,8 +1302,15 @@ type_arg="categorymembers", gcmtitle=cmtitle, gcmprop="ids|title|sortkey", - namespaces=namespaces, step=step, +# namespaces=namespaces, + step=step, total=total) +# workaround for https://bugzilla.wikimedia.org/show_bug.cgi?id=19640: + if namespaces: + if not isinstance(namespaces, list): + namespaces = [namespaces] + cmgen = pagegenerators.NamespaceFilterPageGenerator( + cmgen, namespaces, site=self) return cmgen def loadrevisions(self, page=None, getText=False, revids=None,

14 years, 10 months

SVN: [7052] branches/rewrite/pywikibot/page.py

by russblau＠svn.wikimedia.org

Revision: 7052 Author: russblau Date: 2009-07-13 13:33:21 +0000 (Mon, 13 Jul 2009) Log Message: ----------- replace deprecated arg Modified Paths: -------------- branches/rewrite/pywikibot/page.py Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2009-07-13 13:32:21 UTC (rev 7051) +++ branches/rewrite/pywikibot/page.py 2009-07-13 13:33:21 UTC (rev 7052) @@ -417,7 +417,7 @@ def previousRevision(self): """Return the revision id for the previous revision of this Page.""" - vh = self.getVersionHistory(revCount=2) + vh = self.getVersionHistory(total=2) revkey = sorted(self._revisions, reverse=True)[1] return revkey

14 years, 10 months

← Newer
1
2
3
4
5
6
7
8
9
10
Older →

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn July 2009