SVN: [7988] trunk/pywikipedia - Pywikipedia-svn

11 Mar 2010

Revision: 7988
Author:   xqt
Date:     2010-03-11 17:44:03 +0000 (Thu, 11 Mar 2010)

Log Message:
-----------
test api with new has_api() method; move throttle.log to contol file

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Property Changed:
----------------
    trunk/pywikipedia/pywikibot/


Property changes on: trunk/pywikipedia/pywikibot
___________________________________________________________________
Modified: svn:ignore
   - *.pyc

   + *.pyc
*.ctrl


Modified: trunk/pywikipedia/wikipedia.py
===================================================================

--- trunk/pywikipedia/wikipedia.py	2010-03-11 15:01:00 UTC (rev 7987)
+++ trunk/pywikipedia/wikipedia.py	2010-03-11 17:44:03 UTC (rev 7988)
@@ -4144,12 +4144,13 @@
         self.releasepid = 1200 # Free the process id
         self.lastwait = 0.0
         self.delay = 0
-        if multiplydelay:
+        self.multiplydelay = multiplydelay
+        if self.multiplydelay:
             self.checkMultiplicity()
         self.setDelay(mindelay)
 
     def logfn(self):
-        return config.datafilepath('logs', 'throttle.log')
+        return config.datafilepath('pywikibot', 'throttle.ctrl')
 
     def checkMultiplicity(self):
         self.lock.acquire()
@@ -4211,7 +4212,7 @@
 
     def getDelay(self):
         thisdelay = self.delay
-        if self.pid: # If self.pid, we're checking for multiple processes
+        if self.multiplydelay: # If self.pid, we're checking for multiple processes
             if time.time() > self.checktime + self.checkdelay:
                 self.checkMultiplicity()
             if thisdelay < (self.mindelay * self.next_multiplicity):
@@ -4595,6 +4596,7 @@
     mediawiki_message: Retrieve the text of a specified MediaWiki message
     has_mediawiki_message: True if this site defines specified MediaWiki
                            message
+    has_api: True if this site's family provides api interface
 
     shared_image_repository: Return tuple of image repositories used by this
         site.
@@ -5677,21 +5679,13 @@
         """Return the MediaWiki message text for key "key"
"""
         # Allmessages is retrieved once for all per created Site object
         if (not self._mediawiki_messages) or forceReload:
-            api = False
+            api = self.has_api()
             if verbose:
                 output(u"Retrieving mediawiki messages from
Special:Allmessages")
             # Only MediaWiki r27393/1.12 and higher support XML output for
Special:Allmessages
             if self.versionnumber() < 12:
                 usePHP = True
             else:
-                try:
-                    if config.use_api:
-                        x = self.api_address()
-                        del x
-                        api = True
-                except NotImplementedError:
-                    api = False
-
                 usePHP = False
                 elementtree = True
                 try:
@@ -5797,6 +5791,17 @@
             return True
         except KeyError:
             return False
+        
+    def has_api(self):
+        """Return True if this sites family has api
interface."""
+        try:
+            if config.use_api:
+                x = self.apipath()
+                del x
+                return True
+        except NotImplementedError:
+            pass
+        return False
 
     def _load(self, sysop = False, force = False):
         """
@@ -5814,16 +5819,10 @@
         if verbose:
             output(u'Getting information for site %s' % self)
 
-        try:
-            api_url = self.api_address()
-            del api_url
-        except NotImplementedError:
-            config.use_api = False
-
         # Get data
         # API Userinfo is available from version 1.11
         # preferencetoken available from 1.14
-        if config.use_api and self.versionnumber() >= 11:
+        if self.has_api() and self.versionnumber() >= 11:
             #Query userinfo
             params = {
                 'action': 'query',
@@ -5860,66 +5859,47 @@
         Use API when enabled use_api and version >= 1.11,
         or use Special:Search.
         """
-        try:
-            if config.use_api and self.versionnumber() >= 11:
-                apiUrl = self.site().api_address()
-                del apiUrl
-            else:
-                raise NotImplementedError
-        except NotImplementedError:
-            _search = self._search_without_api
+        if self.has_api() and self.versionnumber() >= 11:
+            #Yield search results (using api) for query.
+            params = {
+                'action': 'query',
+                'list': 'search',
+                'srsearch': q,
+                'srlimit': number
+            }
+            if namespaces:
+                params['srnamespace'] = namespaces
+
+            offset = 0
+            while True:
+                params['sroffset'] = offset
+                data = query.GetData(params, self)['query']
+                if 'error' in data:
+                    raise RuntimeError('%s' % data['error'])
+                if not data['search']:
+                    break
+                for s in data['search']:
+                    offset += 1
+                    page = Page(self, s['title'])
+                    yield page, s['snippet'], '', s['size'],
s['wordcount'], s['timestamp']
         else:
-            _search = self._search_with_api
-        return _search(query, number, namespaces)
+            #Yield search results (using Special:Search page) for query.
+            throttle = True
+            path = self.search_address(urllib.quote_plus(query.encode('utf-8')),
+                                       n=number, ns=namespaces)
+            get_throttle()
+            html = self.getUrl(path)
+            entryR = re.compile(ur'<li><a href=".+?"
title="(?P<title>.+?)">.+?</a>',
+                                re.DOTALL)
+            for m in entryR.finditer(html):
+                page = Page(self, m.group('title'))
+                yield page, '', '', '', '', ''
 
-    def _search_with_api(self, q, number, namespaces):
-        """Yield search results (using api) for query."""
-        params = {
-            'action': 'query',
-            'list': 'search',
-            'srsearch': q,
-            'srlimit': number
-        }
-        if namespaces:
-            params['srnamespace'] = namespaces
-
-        offset = 0
-        while True:
-            params['sroffset'] = offset
-            data = query.GetData(params, self)['query']
-            if 'error' in data:
-                raise RuntimeError('%s' % data['error'])
-            if not data['search']:
-                break
-            for s in data['search']:
-                offset += 1
-                page = Page(self, s['title'])
-                yield page, s['snippet'], '', s['size'],
s['wordcount'], s['timestamp']
-
-    def _search_without_api(self, query, number, namespaces):
-        """Yield search results (using Special:Search page) for
query."""
-        throttle = True
-        path = self.search_address(urllib.quote_plus(query.encode('utf-8')),
-                                   n=number, ns=namespaces)
-        get_throttle()
-        html = self.getUrl(path)
-
-        entryR = re.compile(ur'<li><a href=".+?"
title="(?P<title>.+?)">.+?</a>',
-                            re.DOTALL)
-
-        for m in entryR.finditer(html):
-            page = Page(self, m.group('title'))
-            yield page, '', '', '', '', ''
-
     # TODO: avoid code duplication for the following methods
 
     def logpages(self, number=50, mode='', user=None, repeat=False, namespace=[],
offset=-1):
-        if config.use_api:
-            apiURL = self.api_address()
-            del apiURL
-        else:
-            raise NotImplementedError
-        if mode not in ('block', 'protect', 'rights',
'delete', 'upload',
+        if not self.has_api() or \
+           mode not in ('block', 'protect', 'rights',
'delete', 'upload',
                         'move', 'import', 'patrol',
'merge', 'suppress',
                         'review', 'stable', 'gblblock',
'renameuser',
                         'globalauth', 'gblrights', 'abusefilter',
'newusers'):
@@ -5986,14 +5966,9 @@
         #       should use both offset and limit parameters, and have an
         #       option to fetch older rather than newer pages
         seen = set()
-        try:
-            d = self.apipath()
-            del d
-        except NotImplementedError:
-            config.use_api = False
-
+        api = self.has_api()
         while True:
-            if config.use_api and self.versionnumber() >= 10:
+            if api and self.versionnumber() >= 10:
                 params = {
                     'action': 'query',
                     'list': 'recentchanges',
@@ -6668,18 +6643,12 @@
         """Yield Pages from results of Special:Linksearch for
'siteurl'."""
         cache = []
         R = re.compile('title
?=\"([^<>]*?)\">[^<>]*</a></li>')
-        #Check API can work
-        if config.use_api:
-            try:
-                d = self.api_address()
-                del d
-            except NotImplementedError:
-                config.use_api = False
-
+        api = self.has_api()
         urlsToRetrieve = [siteurl]
         if not siteurl.startswith('*.'):
             urlsToRetrieve.append('*.' + siteurl)
-        if config.use_api and self.versionnumber() >= 11:
+
+        if api and self.versionnumber() >= 11:
             output(u'Querying API exturlusage...')
             for url in urlsToRetrieve:
                 params = {