diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py index 05033a63b..5cb7a8cd4 100755 --- a/src/ZODB/scripts/repozo.py +++ b/src/ZODB/scripts/repozo.py @@ -79,6 +79,13 @@ ZODB file. A full recover will always be done if a pack has occured since the last incremental backup. + -Q / --quick + Verify via md5 checksum only the last incremental recovered of the + output file. This reduces the disk i/o at the (theoretical) cost of + inconsistency. This is a probabilistic way of determining whether a + full recover is necessary. This argument is ignored when -F / --full + is used. + -w --with-verify Verify on the fly the backup files on recovering. This option runs @@ -267,6 +274,9 @@ class Options: if options.killold: log('--kill-old-on-full option is ignored in recover mode') options.killold = False + if options.full and options.quick: + log('--quick option is ignored if --full option is used') + options.quick = None else: assert options.mode == VERIFY if options.date is not None: @@ -752,42 +762,49 @@ def do_incremental_recover(options, repofiles): with open(options.output, 'r+b') as outfp: outfp.seek(0, 2) initial_length = outfp.tell() - with open(datfile) as fp: - previous_chunk = None + + error = '' + previous_chunk = None + with open(datfile) as fp, open(options.output, 'r+b') as outfp: for line in fp: - fn, startpos, endpos, _ = chunk = line.split() + fn, startpos, endpos, check_sum = chunk = line.split() startpos = int(startpos) endpos = int(endpos) if endpos > initial_length: break + if not options.quick: + if check_sum != checksum(outfp, endpos - startpos): + error = ('Target file is not consistent with backup %s, ' + 'falling back to a full recover.') % fn + break previous_chunk = chunk - - if previous_chunk is None: + if error: + log(error) + return do_full_recover(options, repofiles) + elif previous_chunk is None: log('Target file smaller than full backup, ' 'falling back to a full recover.') return do_full_recover(options, repofiles) - if endpos < initial_length: + elif endpos < initial_length: log('Target file is larger than latest backup, ' 'falling back to a full recover.') return do_full_recover(options, repofiles) - check_startpos = int(previous_chunk[1]) - check_endpos = int(previous_chunk[2]) - with open(options.output, 'r+b') as outfp: - outfp.seek(check_startpos) - check_sum = checksum(outfp, check_endpos - check_startpos) - if endpos == initial_length and chunk[3] == check_sum: + if options.quick: + check_startpos = int(previous_chunk[1]) + check_endpos = int(previous_chunk[2]) + with open(options.output, 'r+b') as outfp: + outfp.seek(check_startpos) + if previous_chunk[3] != checksum( + outfp, check_endpos - check_startpos): + error = ('Target file is not consistent with backup %s, ' + 'falling back to a full recover.' % previous_chunk[0]) + if error: + log(error) + return do_full_recover(options, repofiles) + if endpos == initial_length: log('Target file is same size as latest backup, ' 'doing nothing.') return - elif previous_chunk[3] != check_sum: - if endpos == initial_length: - log('Target file is not consistent with latest backup, ' - 'falling back to a full recover.') - return do_full_recover(options, repofiles) - else: - log('Last whole common chunk checksum did not match with backup, ' - 'falling back to a full recover.') - return do_full_recover(options, repofiles) filename = os.path.join(options.repository, os.path.basename(fn)) diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py index eccb4f1c1..9d5696e72 100644 --- a/src/ZODB/scripts/tests/test_repozo.py +++ b/src/ZODB/scripts/tests/test_repozo.py @@ -220,13 +220,16 @@ def test_recover_ignored_args(self): from ZODB.scripts import repozo options = repozo.parseargs(['-R', '-r', '/tmp/nosuchdir', '-v', '-f', '/tmp/ignored.fs', - '-k']) + '-k', '--full', '--quick']) self.assertEqual(options.file, None) self.assertIn('--file option is ignored in recover mode', sys.stderr.getvalue()) self.assertEqual(options.killold, False) self.assertIn('--kill-old-on-full option is ignored in recover mode', sys.stderr.getvalue()) + self.assertEqual(options.quick, None) + self.assertIn('--quick option is ignored if --full option is used', + sys.stderr.getvalue()) def test_verify_ignored_args(self): from ZODB.scripts import repozo @@ -1040,6 +1043,7 @@ def tearDown(self): def _makeOptions(self, **kw): options = super()._makeOptions(**kw) options.full = False + options.quick = kw.get('quick', False) return options def _createRecoveredDataFS(self, output, options): @@ -1092,6 +1096,33 @@ def test_w_incr_recover_from_incr_backup(self): self.assertEqual(_read_file(output), b'AAABBBCCCDDD') self.assertFalse(os.path.exists(output + '.part')) + def test_w_quick_incr_recover_from_incr_backup(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + quick=True, + withverify=False) + self._createRecoveredDataFS(output, options) + # Create 2 more .deltafs, to prove the code knows where to pick up + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile(8, 9, 10, '.deltafs', 'DDD') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n' # noqa: E501 line too long + '/backup/2010-05-14-08-09-10.deltafs 9 12 45054f47ac3305a2a33e9bcceadff712\n') # noqa: E501 line too long + os.unlink( + os.path.join(self._repository_directory, + '2010-05-14-04-05-06.deltafs')) + self._callFUT(options) + self.assertNotIn('falling back to a full recover.', + sys.stderr.getvalue()) + self.assertEqual(_read_file(output), b'AAABBBCCCDDD') + self.assertFalse(os.path.exists(output + '.part')) + def test_w_incr_backup_with_verify_sum_inconsistent(self): import tempfile dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') @@ -1192,14 +1223,14 @@ def test_w_incr_backup_switch_auto_to_full_recover_if_chunk_is_wrong(self): # n self._makeFile(6, 7, 8, '.deltafs', 'CCC') self._makeFile( 2, 3, 4, '.dat', - '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long - '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n' # noqa: E501 line too long + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b8\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long self._callFUT(options) self.assertEqual(_read_file(output), b'AAABBBCCC') self.assertFalse(os.path.exists(output + '.part')) self.assertIn( - "Last whole common chunk checksum did not match with backup, falling back to a full recover.", # noqa: E501 line too long + "Target file is not consistent with backup /backup/2010-05-14-02-03-04.fs, falling back to a full recover.", # noqa: E501 line too long sys.stderr.getvalue()) def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self): @@ -1226,9 +1257,55 @@ def test_w_incr_backup_switch_auto_to_full_recover_after_pack(self): self.assertEqual(_read_file(output), b'CCDD') self.assertFalse(os.path.exists(output + '.part')) self.assertIn( - 'Target file is larger than latest backup, falling back to a full recover.', # noqa: E501 line too long + "Target file is not consistent with backup /backup/2010-05-14-06-07-08.fs, falling back to a full recover.", # noqa: E501 line too long sys.stderr.getvalue()) + def test_w_quick_incr_backup_switch_auto_to_full_recover_if_last_chunk_is_wrong(self): # noqa: E501 line too long + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + quick=True, + withverify=False) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a4\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBBCCC') + self.assertFalse(os.path.exists(output + '.part')) + self.assertIn( + "Target file is not consistent with backup /backup/2010-05-14-04-05-06.deltafs, falling back to a full recover.", # noqa: E501 line too long + sys.stderr.getvalue()) + + def test_w_quick_incr_backup_dont_see_old_inconsistencies(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') + output = os.path.join(dd, 'Data.fs') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + quick=True, + withverify=False) + self._createRecoveredDataFS(output, options) + self._makeFile(6, 7, 8, '.deltafs', 'CCC') + self._makeFile( + 2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' # noqa: E501 line too long + '/backup/2010-05-14-04-05-06.deltafs 3 6 2bb225f0ba9a58930757a868ed57d9a3\n' # noqa: E501 line too long + '/backup/2010-05-14-06-07-08.deltafs 6 9 defb99e69a9f1f6e06f15006b1f166ae\n') # noqa: E501 line too long + # The ZODB is longer than announced in the .dat file + with open(output, 'r+b') as f: + f.write(b'ZZZBBBCCC') + self._callFUT(options) + self.assertEqual(_read_file(output), b'ZZZBBBCCC') + self.assertFalse(os.path.exists(output + '.part')) + self.assertNotIn( + "falling back to a full recover", sys.stderr.getvalue()) + class Test_do_verify(OptionsTestBase, unittest.TestCase):