jenkins-bot merged this change.

View Change

Approvals: Huji: Looks good to me, approved jenkins-bot: Verified
[cleanup] Remove experimental contents_on_disk feature from interwiki.py

contents_on_disk feature has never been tested and it is unsure whether
it works as expected. It is not clear how many RAM is reduced with this
implementation. There are other option to avoid sucking too much memory
like -query and -array.

Change-Id: I3ea6db24cdf61296c6c9bb6e10b4dc4e13690596
---
M pywikibot/config2.py
M scripts/interwiki.py
2 files changed, 5 insertions(+), 120 deletions(-)

diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index 6f6bed7..da7feea 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -112,9 +112,9 @@

_private_values = {'authenticate', 'db_password'}
_deprecated_variables = {'available_ssl_project', 'fake_user_agent',
- 'line_separator', 'LS', 'panoramio', 'proxy',
- 'special_page_limit', 'sysopnames', 'use_SSL_onlogin',
- 'use_SSL_always'}
+ 'interwiki_contents_on_disk', 'line_separator', 'LS',
+ 'panoramio', 'proxy', 'special_page_limit',
+ 'sysopnames', 'use_SSL_onlogin', 'use_SSL_always'}

# ############# ACCOUNT SETTINGS ##############

@@ -609,11 +609,6 @@
# Save file with local articles without interwikis.
without_interwiki = False

-# Experimental feature:
-# Store the page contents on disk (/cache/ directory) instead of loading
-# them in RAM.
-interwiki_contents_on_disk = False
-
# ############# SOLVE_DISAMBIGUATION SETTINGS ############
#
# Set disambiguation_comment[FAMILY][LANG] to a non-empty string to override
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 8f9fd66..06c0a6a 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -340,8 +340,8 @@
# (C) Rob W.W. Hooft, 2003
# (C) Daniel Herding, 2004
# (C) Yuri Astrakhan, 2005-2006
-# (C) xqt, 2009-2019
-# (C) Pywikibot team, 2007-2019
+# (C) xqt, 2009-2020
+# (C) Pywikibot team, 2007-2020
#
# Distributed under the terms of the MIT license.
#
@@ -352,7 +352,6 @@
import os
import pickle
import re
-import shelve
import socket
import sys
from textwrap import fill
@@ -447,7 +446,6 @@
hintnobracket = False
hints = []
hintsareright = False
- contentsondisk = config.interwiki_contents_on_disk
lacklanguage = None
minlinks = 0
quiet = False
@@ -558,81 +556,6 @@
return True


-class StoredPage(pywikibot.Page):
-
- """
- Store the Page contents on disk.
-
- This is to avoid sucking too much memory when a big number of Page objects
- will be loaded at the same time.
- """
-
- # Please prefix the class members names by SP
- # to avoid possible name clashes with pywikibot.Page
-
- # path to the shelve
- SPpath = None
- # shelve
- SPstore = None
-
- # attributes created by pywikibot.Page.__init__
- SPcopy = ['_editrestriction',
- '_site',
- '_namespace',
- '_section',
- '_title',
- 'editRestriction',
- 'moveRestriction',
- '_permalink',
- '_userName',
- '_ipedit',
- '_editTime',
- '_startTime',
- '_revisionId',
- '_deletedRevs']
-
- def SPdeleteStore():
- """Delete SPStore."""
- if StoredPage.SPpath:
- del StoredPage.SPstore
- os.unlink(StoredPage.SPpath)
- SPdeleteStore = staticmethod(SPdeleteStore)
-
- def __init__(self, page):
- """Initializer."""
- for attr in StoredPage.SPcopy:
- setattr(self, attr, getattr(page, attr))
-
- if not StoredPage.SPpath:
- index = 1
- while True:
- path = config.datafilepath('cache', 'pagestore' + str(index))
- if not os.path.exists(path):
- break
- index += 1
- StoredPage.SPpath = path
- StoredPage.SPstore = shelve.open(path)
-
- self.SPkey = str(self)
- self.SPcontentSet = False
-
- def SPgetContents(self):
- """Get stored content."""
- return StoredPage.SPstore[self.SPkey]
-
- def SPsetContents(self, contents):
- """Store content."""
- self.SPcontentSet = True
- StoredPage.SPstore[self.SPkey] = contents
-
- def SPdelContents(self):
- """Delete stored content."""
- if self.SPcontentSet:
- del StoredPage.SPstore[self.SPkey]
-
- _contents = property(SPgetContents, SPsetContents, SPdelContents)
-
-
class PageTree(object):

"""
@@ -783,9 +706,6 @@
plus optionally a list of hints for translation
"""
self.conf = conf
- if self.conf.contentsondisk:
- if originPage:
- originPage = StoredPage(originPage)

super(Subject, self).__init__(originPage)

@@ -882,8 +802,6 @@

for link in links:
page = pywikibot.Page(link)
- if self.conf.contentsondisk:
- page = StoredPage(page)
self.todo.add(page)
self.foundIn[page] = [None]
if keephintedsites:
@@ -960,8 +878,6 @@
self.foundIn[page].append(linkingPage)
return False
else:
- if self.conf.contentsondisk:
- page = StoredPage(page)
self.foundIn[page] = [linkingPage]
self.todo.add(page)
counter.plus(page.site)
@@ -1275,8 +1191,6 @@
# the 1st existig page becomes the origin page, if none was
# supplied
if self.conf.initialredirect:
- if self.conf.contentsondisk:
- redirectTargetPage = StoredPage(redirectTargetPage)
# don't follow another redirect; it might be a self
# loop
if not redirectTargetPage.isRedirectPage() \
@@ -1693,27 +1607,6 @@
if config.interwiki_backlink:
self.reportBacklinks(new, updatedSites)

- def clean(self):
- """
- Delete the contents that are stored on disk for this Subject.
-
- We cannot afford to define this in a StoredPage destructor because
- StoredPage instances can get referenced cyclicly: that would stop the
- garbage collector from destroying some of those objects.
-
- It's also not necessary to set these lines as a Subject destructor:
- deleting all stored content one entry by one entry when bailing out
- after a KeyboardInterrupt for example is redundant, because the
- whole storage file will be eventually removed.
- """
- if self.conf.contentsondisk:
- for page in self.foundIn:
- # foundIn can contain either Page or StoredPage objects
- # calling the destructor on _contents will delete the
- # disk records if necessary
- if hasattr(page, '_contents'):
- del page._contents
-
def replaceLinks(self, page, newPages):
"""Return True if saving was successful."""
if self.conf.localonly:
@@ -2244,7 +2137,6 @@
subj = self.subjects[i]
if subj.isDone():
subj.finish()
- subj.clean()
del self.subjects[i]

def isDone(self):
@@ -2564,8 +2456,6 @@
dumpFileName = bot.dump(append)
raise
finally:
- if iwconf.contentsondisk:
- StoredPage.SPdeleteStore()
if dumpFileName:
try:
restoredFiles.remove(dumpFileName)

To view, visit change 588397. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I3ea6db24cdf61296c6c9bb6e10b4dc4e13690596
Gerrit-Change-Number: 588397
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Huji <huji.huji@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot (75)