Fix cluster startup to handle stray heartbeats (#2891)

This commit is contained in:
Przemyslaw Hugh Kaznowski 2023-10-26 16:24:46 +01:00 committed by GitHub
parent 144e7de8ff
commit 42cff67bd1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 1 deletions

View file

@ -626,7 +626,13 @@ impl Datastore {
let hbs = tx.scan_hb(&end_of_time, NO_LIMIT).await?;
trace!("Found {} heartbeats", hbs.len());
for hb in hbs {
unreachable_nodes.remove(&hb.nd.to_string()).unwrap();
match unreachable_nodes.remove(&hb.nd.to_string()) {
None => {
// Didnt exist in cluster and should be deleted
tx.del_hb(hb.hb, hb.nd).await?;
}
Some(_) => {}
}
}
// Remove unreachable nodes
for (_, cl) in unreachable_nodes {

View file

@ -31,6 +31,14 @@ async fn expired_nodes_are_garbage_collected() {
test.db = test.db.with_node_id(sql::Uuid::from(old_node));
test.db.bootstrap().await.unwrap();
// Throw in some stray nodes and heartbeats
let mut tx = test.db.transaction(Write, Optimistic).await.unwrap();
let corrupt_node_1 = Uuid::parse_str("5a65fe57-7ac3-4b13-a31f-6376d3b484c8").unwrap();
let corrupt_node_2 = Uuid::parse_str("eb94a0b4-70ea-482f-a7dd-dc02132be846").unwrap();
tx.set_nd(corrupt_node_1).await.unwrap();
tx.set_hb(old_time, corrupt_node_2).await.unwrap();
tx.commit().await.unwrap();
// Set up second node at a later timestamp
let new_time = Timestamp {
value: 567000,