Revision: 8670
Author: multichill
Date: 2010-10-20 15:05:53 +0000 (Wed, 20 Oct 2010)
Log Message:
-----------
Merged some code form
https://fisheye.toolserver.org/browse/multichill/bot/geograph/geograph_lib.… to be
able to categorize images based on the provided location.
Modified Paths:
--------------
trunk/pywikipedia/imagerecat.py
Modified: trunk/pywikipedia/imagerecat.py
===================================================================
--- trunk/pywikipedia/imagerecat.py 2010-10-20 14:41:13 UTC (rev 8669)
+++ trunk/pywikipedia/imagerecat.py 2010-10-20 15:05:53 UTC (rev 8670)
@@ -40,6 +40,7 @@
import wikipedia as pywikibot
import config
import pagegenerators
+import xml.etree.ElementTree
category_blacklist = []
countries = []
@@ -167,6 +168,82 @@
pywikibot.output(lang + project + article)
return (commonshelperCats, usage, galleries)
+def getOpenStreetMapCats(latitude, longitude):
+ '''
+ Get a list of location categories based on the OSM nomatim tool
+ '''
+ result = []
+ locationList = getOpenStreetMap(latitude, longitude)
+ for i in range(0, len(locationList)):
+ #print 'Working on ' + locationList[i]
+ if i <= len(locationList)-3:
+ category = getCategoryByName(name=locationList[i], parent=locationList[i+1],
grandparent=locationList[i+2])
+ elif i == len(locationList)-2:
+ category = getCategoryByName(name=locationList[i], parent=locationList[i+1])
+ else:
+ category = getCategoryByName(name=locationList[i])
+ if category and not category==u'':
+ result.append(category)
+ #print result
+ return result
+
+
+def getOpenStreetMap(latitude, longitude):
+ '''
+ Get the result from
http://nominatim.openstreetmap.org/reverse
+ and put it in a list of tuples to play around with
+ '''
+ result = []
+ gotInfo = False
+ parameters = urllib.urlencode({'lat' : latitude, 'lon' : longitude})
+ while(not gotInfo):
+ try:
+ page =
urllib.urlopen("http://nominatim.openstreetmap.org/reverse?format=xml&… %
parameters)
+ et = xml.etree.ElementTree.parse(page)
+ gotInfo=True
+ except IOError:
+ pywikibot.output(u'Got an IOError, let\'s try again')
+ time.sleep(30)
+ except socket.timeout:
+ pywikibot.output(u'Got a timeout, let\'s try again')
+ time.sleep(30)
+ validParts = [u'hamlet', u'village', u'city',
u'county', u'country']
+ invalidParts = [u'path', u'road', u'suburb',
u'state', u'country_code']
+ addressparts = et.find('addressparts')
+ #xml.etree.ElementTree.dump(et)
+
+ for addresspart in addressparts.getchildren():
+ if addresspart.tag in validParts:
+ result.append(addresspart.text)
+ elif addresspart.tag in invalidParts:
+ pywikibot.output(u'Dropping %s, %s' % (addresspart.tag, addresspart.text))
+ else:
+ pywikibot.output(u'WARNING %s, %s is not in addressparts lists' %
(addresspart.tag, addresspart.text))
+ #print result
+ return result
+
+def getCategoryByName(name, parent=u'', grandparent=u''):
+
+ if not parent==u'':
+ workname = name.strip() + u',_' + parent.strip()
+ workcat = catlib.Category(
+ pywikibot.getSite(u'commons', u'commons'), workname)
+ if workcat.exists():
+ return workname
+ if not grandparent==u'':
+ workname = name.strip() + u',_' + grandparent.strip()
+ workcat = catlib.Category(
+ pywikibot.getSite(u'commons', u'commons'), workname)
+ if workcat.exists():
+ return workname
+ workname = name.strip()
+ workcat = catlib.Category(
+ pywikibot.getSite(u'commons', u'commons'), workname)
+ if workcat.exists():
+ return workname
+ return u''
+
+
def getUsage(use):
''' Parse the Commonsense output to get the usage '''
result = []