jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/339317 )
Change subject: tools: make general function to compute file sha
......................................................................
tools: make general function to compute file sha
It can be reused in several places:
- site.upload()
- Filepage.download() [if/when it will be merged]
Change-Id: I756c4d127274f7f6031920127850f30de3964597
---
M pywikibot/site.py
M pywikibot/tools/__init__.py
M tests/tools_tests.py
3 files changed, 89 insertions(+), 10 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/pywikibot/site.py b/pywikibot/site.py
index bdf1fb4..52e6927 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -18,7 +18,6 @@
import copy
import datetime
import functools
-import hashlib
import heapq
import itertools
import json
@@ -68,6 +67,7 @@
from pywikibot.family import WikimediaFamily
from pywikibot.throttle import Throttle
from pywikibot.tools import (
+ compute_file_hash,
itergroup, UnicodeMixin, ComparableMixin, SelfCallMixin, SelfCallString,
deprecated, deprecate_arg, deprecated_args, remove_last_args,
redirect_func, issue_deprecation_warning,
@@ -6027,15 +6027,7 @@
# The SHA1 was also requested so calculate and compare it
assert 'sha1' in stash_info, \
'sha1 not in stash info: {0}'.format(stash_info)
- sha1 = hashlib.sha1()
- bytes_to_read = offset
- with open(source_filename, 'rb') as f:
- while bytes_to_read > 0:
- read_bytes = f.read(min(bytes_to_read, 1 << 20))
- assert read_bytes # make sure we actually read bytes
- bytes_to_read -= len(read_bytes)
- sha1.update(read_bytes)
- sha1 = sha1.hexdigest()
+ sha1 = compute_file_hash(source_filename, bytes_to_read=offset)
if sha1 != stash_info['sha1']:
raise ValueError(
'The SHA1 of {0} bytes of the stashed "{1}" is {2}
'
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 526fd93..261f5ac 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -10,6 +10,7 @@
import collections
import gzip
+import hashlib
import inspect
import itertools
import os
@@ -1714,3 +1715,41 @@
# re-read and check changes
if os.stat(filename).st_mode != st_mode:
warn(warn_str.format(filename, st_mode - stat.S_IFREG, mode))
+
+
+def compute_file_hash(filename, sha='sha1', bytes_to_read=None):
+ """Compute file hash.
+
+ Result is expressed as hexdigest().
+
+ @param filename: filename path
+ @type filename: basestring
+
+ @param func: hashing function among the following in hashlib:
+ md5(), sha1(), sha224(), sha256(), sha384(), and sha512()
+ function name shall be passed as string, e.g. 'sha1'.
+ @type filename: basestring
+
+ @param bytes_to_read: only the first bytes_to_read will be considered;
+ if file size is smaller, the whole file will be considered.
+ @type bytes_to_read: None or int
+
+ """
+ size = os.path.getsize(filename)
+ if bytes_to_read is None:
+ bytes_to_read = size
+ else:
+ bytes_to_read = min(bytes_to_read, size)
+ step = 1 << 20
+
+ shas = ['md5', 'sha1', 'sha224', 'sha256',
'sha384', 'sha512']
+ assert sha in shas
+ sha = getattr(hashlib, sha)() # sha instance
+
+ with open(filename, 'rb') as f:
+ while bytes_to_read > 0:
+ read_bytes = f.read(min(bytes_to_read, step))
+ assert read_bytes # make sure we actually read bytes
+ bytes_to_read -= len(read_bytes)
+ sha.update(read_bytes)
+ return sha.hexdigest()
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
index 6e97772..c450966 100644
--- a/tests/tools_tests.py
+++ b/tests/tools_tests.py
@@ -754,6 +754,54 @@
self.chmod.assert_called_once_with(self.file, 0o600)
+class TestFileShaCalculator(TestCase):
+
+ """Test calculator of sha of a file."""
+
+ net = False
+
+ filename = join_xml_data_path('article-pear-0.10.xml')
+
+ def setUp(self):
+ """Setup tests."""
+ super(TestFileShaCalculator, self).setUp()
+
+ def test_md5_complete_calculation(self):
+ """"Test md5 of complete file."""
+ res = tools.compute_file_hash(self.filename, sha='md5')
+ self.assertEqual(res, '5d7265e290e6733e1e2020630262a6f3')
+
+ def test_md5_partial_calculation(self):
+ """"Test md5 of partial file (1024 bytes)."""
+ res = tools.compute_file_hash(self.filename, sha='md5',
+ bytes_to_read=1024)
+ self.assertEqual(res, 'edf6e1accead082b6b831a0a600704bc')
+
+ def test_sha1_complete_calculation(self):
+ """"Test sha1 of complete file."""
+ res = tools.compute_file_hash(self.filename, sha='sha1')
+ self.assertEqual(res, '1c12696e1119493a625aa818a35c41916ce32d0c')
+
+ def test_sha1_partial_calculation(self):
+ """"Test sha1 of partial file (1024
bytes)."""
+ res = tools.compute_file_hash(self.filename, sha='sha1',
+ bytes_to_read=1024)
+ self.assertEqual(res, 'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6')
+
+ def test_sha224_complete_calculation(self):
+ """"Test sha224 of complete file."""
+ res = tools.compute_file_hash(self.filename, sha='sha224')
+ self.assertEqual(
+ res, '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33')
+
+ def test_sha224_partial_calculation(self):
+ """"Test sha224 of partial file (1024
bytes)."""
+ res = tools.compute_file_hash(self.filename, sha='sha224',
+ bytes_to_read=1024)
+ self.assertEqual(
+ res, 'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169')
+
+
class Foo(object):
"""Test class to verify classproperty decorator."""
--
To view, visit
https://gerrit.wikimedia.org/r/339317
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I756c4d127274f7f6031920127850f30de3964597
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Magul <tomasz.magulski(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>