[bitbake-devel] [PATCH] hashserv: Merge divergent output hashes
Joshua Watt
jpewhacker at gmail.com
Tue Nov 26 15:50:36 UTC 2019
Instructs the hash equivalence server to merge diverging output hashes
to the same unihash when one is reported that unifies them. The primary
use for this is -cross and -native recipes where the outputs will never
match on different host architectures, but the unihashes need to
converge.
Signed-off-by: Joshua Watt <JPEWhacker at gmail.com>
---
bitbake/lib/hashserv/__init__.py | 1 +
bitbake/lib/hashserv/server.py | 31 +++++++++++++++++++++++--
bitbake/lib/hashserv/tests.py | 39 ++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+), 2 deletions(-)
diff --git a/bitbake/lib/hashserv/__init__.py b/bitbake/lib/hashserv/__init__.py
index c3318620f54..218a54a04d8 100644
--- a/bitbake/lib/hashserv/__init__.py
+++ b/bitbake/lib/hashserv/__init__.py
@@ -48,6 +48,7 @@ def setup_database(database, sync=True):
# Create new indexes
cursor.execute('CREATE INDEX IF NOT EXISTS taskhash_lookup_v2 ON tasks_v2 (method, taskhash, created)')
cursor.execute('CREATE INDEX IF NOT EXISTS outhash_lookup_v2 ON tasks_v2 (method, outhash)')
+ cursor.execute('CREATE INDEX IF NOT EXISTS unihash_lookup_v2 ON tasks_v2 (method, unihash)')
return db
diff --git a/bitbake/lib/hashserv/server.py b/bitbake/lib/hashserv/server.py
index 0aff77688e4..0a294b4b1a0 100644
--- a/bitbake/lib/hashserv/server.py
+++ b/bitbake/lib/hashserv/server.py
@@ -267,9 +267,36 @@ class ServerClient(object):
# If a row matching the outhash was found, the unihash for
# the new taskhash should be the same as that one.
# Otherwise the caller provided unihash is used.
- unihash = data['unihash']
- if row is not None:
+ if row is not None and data['unihash'] != row['unihash']:
+ # Update unihashes to ensure all branches are converging on
+ # the same unihash. This is generally a bad thing because
+ # it means that builds are not reproducible, but it
+ # occasionally is unavoidable such as in the case of -cross
+ # and -native tasks for different build host architectures.
+ # For example, take the following taskhashes that produce
+ # the given outhashes and unihashes:
+ #
+ # taskhash outhash unihash
+ # A Z 1
+ # B Y 2
+ # C Y 3 -> 2
+ # B Z 2 -> 1
+ #
+ # In this case, the B task isn't being built reproducibly,
+ # but for the second B the server sees it matches the
+ # outhash of A, and reports that the unihash should be
+ # changed from 2 to 1.
+ #
+ # The inconsistency is that there are still entries that
+ # refer to unihash 2. These should be remapped to unihash
+ # 1, since the latest entry shows that these are
+ # equivalent.
+ cursor.execute('''UPDATE tasks_v2 SET unihash=:new_unihash WHERE method=:method AND unihash=:old_unihash''',
+ {'method': data['method'], 'new_unihash': row['unihash'], 'old_unihash': data['unihash']})
+
unihash = row['unihash']
+ else:
+ unihash = data['unihash']
insert_data = {
'method': data['method'],
diff --git a/bitbake/lib/hashserv/tests.py b/bitbake/lib/hashserv/tests.py
index a5472a996d2..ee9107a9aa8 100644
--- a/bitbake/lib/hashserv/tests.py
+++ b/bitbake/lib/hashserv/tests.py
@@ -99,6 +99,45 @@ class TestHashEquivalenceServer(object):
result = self.client.get_unihash(self.METHOD, taskhash)
self.assertEqual(result, unihash)
+ def test_hash_merging(self):
+ # Verify that unihashes are correctly merged together when diverging
+ # hashes are found. Uses the following table which describes how each
+ # task is reported:
+ #
+ # taskhash outhash unihash
+ # A Z A
+ # B Y B
+ # C Y C -> B
+ # B Z B -> A
+ #
+ A_taskhash = "A"
+ B_taskhash = "B"
+ C_taskhash = "C"
+ Z_outhash = "Z"
+ Y_outhash = "Y"
+
+ result = self.client.report_unihash(A_taskhash, self.METHOD, Z_outhash, A_taskhash)
+ self.assertEqual(result['unihash'], A_taskhash, 'Server reported bad unihash change for task A')
+
+ result = self.client.report_unihash(B_taskhash, self.METHOD, Y_outhash, B_taskhash)
+ self.assertEqual(result['unihash'], B_taskhash, 'Server reported bad unihash change for task A')
+
+ result = self.client.report_unihash(C_taskhash, self.METHOD, Y_outhash, C_taskhash)
+ self.assertEqual(result['unihash'], B_taskhash, 'Server reported bad unihash change for task C')
+
+ # Report a second B with the Z outhash. It should be change to A's unihash
+ result = self.client.report_unihash(B_taskhash, self.METHOD, Z_outhash, B_taskhash)
+ self.assertEqual(result['unihash'], A_taskhash, 'Server reported bad unihash change for task B')
+
+ # The unihash for C should also be A's unihash
+ result = self.client.get_unihash(self.METHOD, C_taskhash)
+ self.assertEqual(result, A_taskhash, 'Server returned bad unihash for task C')
+
+ # The reported unihash for B should also be A. NOTE: this *should*
+ # return the first B reported because it is older
+ result = self.client.get_unihash(self.METHOD, B_taskhash)
+ self.assertEqual(result, A_taskhash, 'Server returned bad unihash for task B')
+
def test_stress(self):
def query_server(failures):
client = Client(self.server.address)
--
2.23.0
More information about the bitbake-devel
mailing list