Improve node membership processing and garbage collection (#4780)

This commit is contained in:
Tobie Morgan Hitchcock 2024-09-16 21:51:29 +01:00 committed by GitHub
parent 912cafd3c5
commit 457adff85c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 521 additions and 402 deletions

View file

@ -1,6 +1,5 @@
use crate::err::Error; use crate::err::Error;
use crate::key::change; use crate::key::change;
#[cfg(debug_assertions)]
use crate::key::debug::Sprintable; use crate::key::debug::Sprintable;
use crate::kvs::Transaction; use crate::kvs::Transaction;
use crate::vs; use crate::vs;
@ -9,30 +8,30 @@ use std::str;
// gc_all_at deletes all change feed entries that become stale at the given timestamp. // gc_all_at deletes all change feed entries that become stale at the given timestamp.
#[allow(unused)] #[allow(unused)]
#[instrument(level = "trace", target = "surrealdb::core::cfs", skip(tx))]
pub async fn gc_all_at(tx: &Transaction, ts: u64) -> Result<(), Error> { pub async fn gc_all_at(tx: &Transaction, ts: u64) -> Result<(), Error> {
// Fetch all namespaces // Fetch all namespaces
let nss = tx.all_ns().await?; let nss = tx.all_ns().await?;
// Loop over each namespace // Loop over each namespace
for ns in nss.as_ref() { for ns in nss.as_ref() {
// Trace for debugging // Trace for debugging
#[cfg(debug_assertions)] trace!("Performing garbage collection on {} for timestamp {ts}", ns.name);
trace!("Performing garbage collection on {ns} for timestamp {ts}");
// Process the namespace // Process the namespace
gc_ns(tx, ts, ns.name.as_str()).await?; gc_ns(tx, ts, &ns.name).await?;
} }
Ok(()) Ok(())
} }
// gc_ns deletes all change feed entries in the given namespace that are older than the given watermark. // gc_ns deletes all change feed entries in the given namespace that are older than the given watermark.
#[allow(unused)] #[allow(unused)]
#[instrument(level = "trace", target = "surrealdb::core::cfs", skip(tx))]
pub async fn gc_ns(tx: &Transaction, ts: u64, ns: &str) -> Result<(), Error> { pub async fn gc_ns(tx: &Transaction, ts: u64, ns: &str) -> Result<(), Error> {
// Fetch all databases // Fetch all databases
let dbs = tx.all_db(ns).await?; let dbs = tx.all_db(ns).await?;
// Loop over each database // Loop over each database
for db in dbs.as_ref() { for db in dbs.as_ref() {
// Trace for debugging // Trace for debugging
#[cfg(debug_assertions)] trace!("Performing garbage collection on {ns}:{} for timestamp {ts}", db.name);
trace!("Performing garbage collection on {ns}:{db} for timestamp {ts}");
// Fetch all tables // Fetch all tables
let tbs = tx.all_tb(ns, &db.name, None).await?; let tbs = tx.all_tb(ns, &db.name, None).await?;
// Get the database changefeed expiration // Get the database changefeed expiration
@ -68,19 +67,14 @@ pub async fn gc_ns(tx: &Transaction, ts: u64, ns: &str) -> Result<(), Error> {
} }
// gc_db deletes all change feed entries in the given database that are older than the given watermark. // gc_db deletes all change feed entries in the given database that are older than the given watermark.
pub async fn gc_range( #[instrument(level = "trace", target = "surrealdb::core::cfs", skip(tx))]
tx: &Transaction, pub async fn gc_range(tx: &Transaction, ns: &str, db: &str, vt: Versionstamp) -> Result<(), Error> {
ns: &str,
db: &str,
watermark: Versionstamp,
) -> Result<(), Error> {
// Calculate the range // Calculate the range
let beg = change::prefix_ts(ns, db, vs::u64_to_versionstamp(0)); let beg = change::prefix_ts(ns, db, vs::u64_to_versionstamp(0));
let end = change::prefix_ts(ns, db, watermark); let end = change::prefix_ts(ns, db, vt);
// Trace for debugging // Trace for debugging
#[cfg(debug_assertions)]
trace!( trace!(
"Performing garbage collection on {ns}:{db} for watermark {watermark:?}, between {} and {}", "Performing garbage collection on {ns}:{db} for watermark {vt:?}, between {} and {}",
beg.sprint(), beg.sprint(),
end.sprint() end.sprint()
); );

View file

@ -177,7 +177,7 @@ mod tests {
// Let the db remember the timestamp for the current versionstamp // Let the db remember the timestamp for the current versionstamp
// so that we can replay change feeds from the timestamp later. // so that we can replay change feeds from the timestamp later.
ds.tick_at(ts.0.timestamp().try_into().unwrap()).await.unwrap(); ds.changefeed_process_at(ts.0.timestamp().try_into().unwrap()).await.unwrap();
// //
// Write things to the table. // Write things to the table.
@ -379,7 +379,7 @@ mod tests {
assert_eq!(r, want); assert_eq!(r, want);
// Now we should see the gc_all results // Now we should see the gc_all results
ds.tick_at((ts.0.timestamp() + 5).try_into().unwrap()).await.unwrap(); ds.changefeed_process_at((ts.0.timestamp() + 5).try_into().unwrap()).await.unwrap();
let tx7 = ds.transaction(Write, Optimistic).await.unwrap(); let tx7 = ds.transaction(Write, Optimistic).await.unwrap();
let r = crate::cf::read(&tx7, NS, DB, Some(TB), ShowSince::Timestamp(ts), Some(10)) let r = crate::cf::read(&tx7, NS, DB, Some(TB), ShowSince::Timestamp(ts), Some(10))
@ -393,13 +393,13 @@ mod tests {
async fn scan_picks_up_from_offset() { async fn scan_picks_up_from_offset() {
// Given we have 2 entries in change feeds // Given we have 2 entries in change feeds
let ds = init(false).await; let ds = init(false).await;
ds.tick_at(5).await.unwrap(); ds.changefeed_process_at(5).await.unwrap();
let _id1 = record_change_feed_entry( let _id1 = record_change_feed_entry(
ds.transaction(Write, Optimistic).await.unwrap(), ds.transaction(Write, Optimistic).await.unwrap(),
"First".to_string(), "First".to_string(),
) )
.await; .await;
ds.tick_at(10).await.unwrap(); ds.changefeed_process_at(10).await.unwrap();
let mut tx = ds.transaction(Write, Optimistic).await.unwrap().inner(); let mut tx = ds.transaction(Write, Optimistic).await.unwrap().inner();
let vs1 = tx.get_versionstamp_from_timestamp(5, NS, DB).await.unwrap().unwrap(); let vs1 = tx.get_versionstamp_from_timestamp(5, NS, DB).await.unwrap().unwrap();
let vs2 = tx.get_versionstamp_from_timestamp(10, NS, DB).await.unwrap().unwrap(); let vs2 = tx.get_versionstamp_from_timestamp(10, NS, DB).await.unwrap().unwrap();
@ -431,7 +431,7 @@ mod tests {
let ds = init(true).await; let ds = init(true).await;
// Create a doc // Create a doc
ds.tick_at(ts.0.timestamp().try_into().unwrap()).await.unwrap(); ds.changefeed_process_at(ts.0.timestamp().try_into().unwrap()).await.unwrap();
let thing = Thing { let thing = Thing {
tb: TB.to_owned(), tb: TB.to_owned(),
id: Id::from("A"), id: Id::from("A"),
@ -444,7 +444,7 @@ mod tests {
res.result.unwrap(); res.result.unwrap();
// Now update it // Now update it
ds.tick_at((ts.0.timestamp() + 10).try_into().unwrap()).await.unwrap(); ds.changefeed_process_at((ts.0.timestamp() + 10).try_into().unwrap()).await.unwrap();
let res = ds let res = ds
.execute( .execute(
format!("UPDATE {thing} SET value=100, new_field=\"new_value\"").as_str(), format!("UPDATE {thing} SET value=100, new_field=\"new_value\"").as_str(),

View file

@ -1,22 +0,0 @@
/// LoggingLifecycle is used to create log messages upon creation, and log messages when it is dropped
#[doc(hidden)]
#[non_exhaustive]
pub struct LoggingLifecycle {
identifier: String,
}
impl LoggingLifecycle {
#[doc(hidden)]
pub fn new(identifier: String) -> Self {
debug!("Started {}", identifier);
Self {
identifier,
}
}
}
impl Drop for LoggingLifecycle {
fn drop(&mut self) {
debug!("Stopped {}", self.identifier);
}
}

View file

@ -18,11 +18,9 @@ mod store;
mod variables; mod variables;
pub mod capabilities; pub mod capabilities;
pub mod lifecycle;
pub mod node; pub mod node;
pub use self::capabilities::Capabilities; pub use self::capabilities::Capabilities;
pub use self::lifecycle::*;
pub use self::notification::*; pub use self::notification::*;
pub use self::options::*; pub use self::options::*;
pub use self::response::*; pub use self::response::*;

53
core/src/kvs/cf.rs Normal file
View file

@ -0,0 +1,53 @@
use crate::err::Error;
use crate::kvs::Datastore;
use crate::kvs::{LockType::*, TransactionType::*};
use crate::vs::Versionstamp;
impl Datastore {
/// Saves the current timestamp for each database's current versionstamp.
#[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub(crate) async fn changefeed_versionstamp(
&self,
ts: u64,
) -> Result<Option<Versionstamp>, Error> {
// Store the latest versionstamp
let mut vs: Option<Versionstamp> = None;
// Create a new transaction
let txn = self.transaction(Write, Optimistic).await?;
// Fetch all namespaces
let nss = catch!(txn, txn.all_ns().await);
// Loop over all namespaces
for ns in nss.iter() {
// Get the namespace name
let ns = &ns.name;
// Fetch all namespaces
let dbs = catch!(txn, txn.all_db(ns).await);
// Loop over all databases
for db in dbs.iter() {
// Get the database name
let db = &db.name;
// TODO(SUR-341): This is incorrect, it's a [ns,db] to vs pair
// It's safe for now, as it is unused but either the signature must change
// to include {(ns, db): (ts, vs)} mapping, or we don't return it
vs = Some(txn.lock().await.set_timestamp_for_versionstamp(ts, ns, db).await?);
}
}
// Commit the changes
catch!(txn, txn.commit().await);
// Return the version
Ok(vs)
}
/// Deletes all change feed entries that are older than the timestamp.
#[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub(crate) async fn changefeed_cleanup(&self, ts: u64) -> Result<(), Error> {
// Create a new transaction
let txn = self.transaction(Write, Optimistic).await?;
// Perform the garbage collection
catch!(txn, crate::cf::gc_all_at(&txn, ts).await);
// Commit the changes
catch!(txn, txn.commit().await);
// Everything ok
Ok(())
}
}

View file

@ -22,7 +22,6 @@ use crate::kvs::index::IndexBuilder;
use crate::kvs::{LockType, LockType::*, TransactionType, TransactionType::*}; use crate::kvs::{LockType, LockType::*, TransactionType, TransactionType::*};
use crate::sql::{statements::DefineUserStatement, Base, Query, Value}; use crate::sql::{statements::DefineUserStatement, Base, Query, Value};
use crate::syn; use crate::syn;
use crate::vs::{conv, Versionstamp};
use channel::{Receiver, Sender}; use channel::{Receiver, Sender};
use futures::Future; use futures::Future;
use reblessive::TreeStack; use reblessive::TreeStack;
@ -41,7 +40,7 @@ use uuid::Uuid;
#[cfg(target_arch = "wasm32")] #[cfg(target_arch = "wasm32")]
use wasmtimer::std::{SystemTime, UNIX_EPOCH}; use wasmtimer::std::{SystemTime, UNIX_EPOCH};
const TARGET: &str = "surrealdb::core::kvs::tr"; const TARGET: &str = "surrealdb::core::kvs::ds";
// If there are an infinite number of heartbeats, then we want to go batch-by-batch spread over several checks // If there are an infinite number of heartbeats, then we want to go batch-by-batch spread over several checks
const LQ_CHANNEL_SIZE: usize = 100; const LQ_CHANNEL_SIZE: usize = 100;
@ -601,118 +600,82 @@ impl Datastore {
} }
} }
// Initialise the cluster and run bootstrap utilities /// Initialise the cluster and run bootstrap utilities
#[instrument(err, level = "trace", target = "surrealdb::core::kvs::ds", skip_all)] #[instrument(err, level = "trace", target = "surrealdb::core::kvs::ds", skip_all)]
pub async fn bootstrap(&self) -> Result<(), Error> { pub async fn bootstrap(&self) -> Result<(), Error> {
// Insert this node in the cluster // Insert this node in the cluster
self.insert_node(self.id).await?; self.insert_node(self.id).await?;
// Mark expired nodes as archived // Mark inactive nodes as archived
self.expire_nodes().await?; self.expire_nodes().await?;
// Remove archived nodes
self.remove_nodes().await?;
// Everything ok // Everything ok
Ok(()) Ok(())
} }
// tick is called periodically to perform maintenance tasks. /// Run the background task to update node registration information
// This is called every TICK_INTERVAL.
#[instrument(err, level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub async fn tick(&self) -> Result<(), Error> {
let now = SystemTime::now().duration_since(UNIX_EPOCH).map_err(|e| {
Error::Internal(format!("Clock may have gone backwards: {:?}", e.duration()))
})?;
let ts = now.as_secs();
self.tick_at(ts).await?;
Ok(())
}
// tick_at is the utility function that is called by tick.
// It is handy for testing, because it allows you to specify the timestamp,
// without depending on a system clock.
#[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))] #[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub async fn tick_at(&self, ts: u64) -> Result<(), Error> { pub async fn node_membership_update(&self) -> Result<(), Error> {
trace!(target: TARGET, "Ticking at timestamp {ts} ({:?})", conv::u64_to_versionstamp(ts)); // Output function invocation details to logs
let _vs = self.save_timestamp_for_versionstamp(ts).await?; trace!(target: TARGET, "Updating node registration information");
self.garbage_collect_stale_change_feeds(ts).await?;
// Update this node in the cluster // Update this node in the cluster
self.update_node(self.id).await?; self.update_node(self.id).await?;
// Mark expired nodes as archived
self.expire_nodes().await?;
// Cleanup expired nodes data
self.cleanup_nodes().await?;
// Garbage collect other data
self.garbage_collect().await?;
// Everything ok // Everything ok
Ok(()) Ok(())
} }
// save_timestamp_for_versionstamp saves the current timestamp for the each database's current versionstamp. /// Run the background task to process and archive inactive nodes
// Note: the returned VS is flawed, as there are multiple {ts: vs} mappings per (ns, db) #[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub(crate) async fn save_timestamp_for_versionstamp( pub async fn node_membership_expire(&self) -> Result<(), Error> {
&self, // Output function invocation details to logs
ts: u64, trace!(target: TARGET, "Processing and archiving inactive nodes");
) -> Result<Option<Versionstamp>, Error> { // Mark expired nodes as archived
let tx = self.transaction(Write, Optimistic).await?; self.expire_nodes().await?;
match self.save_timestamp_for_versionstamp_impl(ts, &tx).await { // Everything ok
Ok(vs) => Ok(vs),
Err(e) => {
match tx.cancel().await {
Ok(_) => {
Err(e)
}
Err(txe) => {
Err(Error::Tx(format!("Error saving timestamp for versionstamp: {:?} and error cancelling transaction: {:?}", e, txe)))
}
}
}
}
}
async fn save_timestamp_for_versionstamp_impl(
&self,
ts: u64,
tx: &Transaction,
) -> Result<Option<Versionstamp>, Error> {
let mut vs: Option<Versionstamp> = None;
let nses = tx.all_ns().await?;
let nses = nses.as_ref();
for ns in nses {
let ns = ns.name.as_str();
let dbs = tx.all_db(ns).await?;
let dbs = dbs.as_ref();
for db in dbs {
let db = db.name.as_str();
// TODO(SUR-341): This is incorrect, it's a [ns,db] to vs pair
// It's safe for now, as it is unused but either the signature must change
// to include {(ns, db): (ts, vs)} mapping, or we don't return it
vs = Some(tx.lock().await.set_timestamp_for_versionstamp(ts, ns, db).await?);
}
}
tx.commit().await?;
Ok(vs)
}
// garbage_collect_stale_change_feeds deletes all change feed entries that are older than the watermarks.
pub(crate) async fn garbage_collect_stale_change_feeds(&self, ts: u64) -> Result<(), Error> {
let tx = self.transaction(Write, Optimistic).await?;
if let Err(e) = self.garbage_collect_stale_change_feeds_impl(&tx, ts).await {
return match tx.cancel().await {
Ok(_) => {
Err(e)
}
Err(txe) => {
Err(Error::Tx(format!("Error garbage collecting stale change feeds: {:?} and error cancelling transaction: {:?}", e, txe)))
}
};
}
Ok(()) Ok(())
} }
async fn garbage_collect_stale_change_feeds_impl( /// Run the background task to process and cleanup archived nodes
&self, #[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
tx: &Transaction, pub async fn node_membership_remove(&self) -> Result<(), Error> {
ts: u64, // Output function invocation details to logs
) -> Result<(), Error> { trace!(target: TARGET, "Processing and cleaning archived nodes");
cf::gc_all_at(tx, ts).await?; // Cleanup expired nodes data
tx.commit().await?; self.remove_nodes().await?;
// Everything ok
Ok(())
}
/// Run the background task to perform changeed garbage collection
#[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub async fn changefeed_process(&self) -> Result<(), Error> {
// Output function invocation details to logs
trace!(target: TARGET, "Running changefeed garbage collection");
// Calculate the current system time
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(|e| {
Error::Internal(format!("Clock may have gone backwards: {:?}", e.duration()))
})?
.as_secs();
// Save timestamps for current versionstamps
self.changefeed_versionstamp(ts).await?;
// Garbage old changefeed data from all databases
self.changefeed_cleanup(ts).await?;
// Everything ok
Ok(())
}
/// Run the background task to perform changeed garbage collection
#[instrument(level = "trace", target = "surrealdb::core::kvs::ds", skip(self))]
pub async fn changefeed_process_at(&self, ts: u64) -> Result<(), Error> {
// Output function invocation details to logs
trace!(target: TARGET, "Running changefeed garbage collection");
// Save timestamps for current versionstamps
self.changefeed_versionstamp(ts).await?;
// Garbage old changefeed data from all databases
self.changefeed_cleanup(ts).await?;
// Everything ok
Ok(()) Ok(())
} }

View file

@ -15,6 +15,7 @@
mod api; mod api;
mod batch; mod batch;
mod cache; mod cache;
mod cf;
mod clock; mod clock;
mod ds; mod ds;
mod export; mod export;

View file

@ -84,31 +84,46 @@ impl Datastore {
pub async fn expire_nodes(&self) -> Result<(), Error> { pub async fn expire_nodes(&self) -> Result<(), Error> {
// Log when this method is run // Log when this method is run
trace!(target: TARGET, "Archiving expired nodes in the cluster"); trace!(target: TARGET, "Archiving expired nodes in the cluster");
// Open transaction and fetch nodes // Fetch all of the inactive nodes
let txn = self.transaction(Write, Optimistic).await?; let inactive = {
let now = self.clock_now().await; let txn = self.transaction(Read, Optimistic).await?;
let nds = catch!(txn, txn.all_nodes().await); let nds = catch!(txn, txn.all_nodes().await);
for nd in nds.iter() { let now = self.clock_now().await;
// Check that the node is active catch!(txn, txn.cancel().await);
if nd.is_active() { // Filter the inactive nodes
// Check if the node has expired nds.iter()
if nd.hb < now - Duration::from_secs(30) { .filter_map(|n| {
// Log the live query scanning // Check that the node is active and has expired
trace!(target: TARGET, id = %nd.id, "Archiving node in the cluster"); match n.is_active() && n.hb < now - Duration::from_secs(30) {
// Mark the node as archived true => Some(n.to_owned()),
let val = nd.archive(); false => None,
// Get the key for the node entry }
let key = crate::key::root::nd::new(nd.id); })
// Update the node entry .collect::<Vec<_>>()
catch!(txn, txn.set(key, val, None).await); };
} // Check if there are inactive nodes
if !inactive.is_empty() {
// Open a writeable transaction
let txn = self.transaction(Write, Optimistic).await?;
// Archive the inactive nodes
for nd in inactive.iter() {
// Log the live query scanning
trace!(target: TARGET, id = %nd.id, "Archiving node in the cluster");
// Mark the node as archived
let val = nd.archive();
// Get the key for the node entry
let key = crate::key::root::nd::new(nd.id);
// Update the node entry
catch!(txn, txn.set(key, val, None).await);
} }
// Commit the changes
catch!(txn, txn.commit().await);
} }
// Commit the changes // Everything was successful
txn.commit().await Ok(())
} }
/// Cleans up nodes which are no longer in this cluster. /// Removes and cleans up nodes which are no longer in this cluster.
/// ///
/// This function should be run periodically at an interval. /// This function should be run periodically at an interval.
/// ///
@ -116,23 +131,25 @@ impl Datastore {
/// When a matching node is found, all node queries, and table queries are /// When a matching node is found, all node queries, and table queries are
/// garbage collected, before the node itself is completely deleted. /// garbage collected, before the node itself is completely deleted.
#[instrument(err, level = "trace", target = "surrealdb::core::kvs::node", skip(self))] #[instrument(err, level = "trace", target = "surrealdb::core::kvs::node", skip(self))]
pub async fn cleanup_nodes(&self) -> Result<(), Error> { pub async fn remove_nodes(&self) -> Result<(), Error> {
// Log when this method is run // Log when this method is run
trace!(target: TARGET, "Cleaning up archived nodes in the cluster"); trace!(target: TARGET, "Cleaning up archived nodes in the cluster");
// Fetch all of the expired nodes // Fetch all of the archived nodes
let expired = { let archived = {
let txn = self.transaction(Read, Optimistic).await?; let txn = self.transaction(Read, Optimistic).await?;
let nds = catch!(txn, txn.all_nodes().await); let nds = catch!(txn, txn.all_nodes().await);
catch!(txn, txn.cancel().await);
// Filter the archived nodes // Filter the archived nodes
nds.iter().filter_map(Node::archived).collect::<Vec<_>>() nds.iter().filter_map(Node::archived).collect::<Vec<_>>()
}; };
// Delete the live queries // Loop over the archived nodes
{ for id in archived.iter() {
for id in expired.iter() { // Open a writeable transaction
let txn = self.transaction(Write, Optimistic).await?;
{
// Log the live query scanning // Log the live query scanning
trace!(target: TARGET, id = %id, "Deleting live queries for node"); trace!(target: TARGET, id = %id, "Deleting live queries for node");
// Scan the live queries for this node // Scan the live queries for this node
let txn = self.transaction(Write, Optimistic).await?;
let beg = crate::key::node::lq::prefix(*id); let beg = crate::key::node::lq::prefix(*id);
let end = crate::key::node::lq::suffix(*id); let end = crate::key::node::lq::suffix(*id);
let mut next = Some(beg..end); let mut next = Some(beg..end);
@ -145,7 +162,7 @@ impl Datastore {
// Get the key for this node live query // Get the key for this node live query
let nlq = catch!(txn, crate::key::node::lq::Lq::decode(k)); let nlq = catch!(txn, crate::key::node::lq::Lq::decode(k));
// Check that the node for this query is archived // Check that the node for this query is archived
if expired.contains(&nlq.nd) { if archived.contains(&nlq.nd) {
// Get the key for this table live query // Get the key for this table live query
let tlq = crate::key::table::lq::new(&val.ns, &val.db, &val.tb, nlq.lq); let tlq = crate::key::table::lq::new(&val.ns, &val.db, &val.tb, nlq.lq);
// Delete the table live query // Delete the table live query
@ -155,15 +172,8 @@ impl Datastore {
} }
} }
} }
// Commit the changes
txn.commit().await?;
} }
} {
// Delete the expired nodes
{
let txn = self.transaction(Write, Optimistic).await?;
// Loop over the nodes and delete
for id in expired.iter() {
// Log the node deletion // Log the node deletion
trace!(target: TARGET, id = %id, "Deleting node from the cluster"); trace!(target: TARGET, id = %id, "Deleting node from the cluster");
// Get the key for the node entry // Get the key for the node entry
@ -172,7 +182,7 @@ impl Datastore {
catch!(txn, txn.del(key).await); catch!(txn, txn.del(key).await);
} }
// Commit the changes // Commit the changes
txn.commit().await?; catch!(txn, txn.commit().await);
} }
// Everything was successful // Everything was successful
Ok(()) Ok(())
@ -192,8 +202,8 @@ impl Datastore {
pub async fn garbage_collect(&self) -> Result<(), Error> { pub async fn garbage_collect(&self) -> Result<(), Error> {
// Log the node deletion // Log the node deletion
trace!(target: TARGET, "Garbage collecting all miscellaneous data"); trace!(target: TARGET, "Garbage collecting all miscellaneous data");
// Fetch expired nodes // Fetch archived nodes
let expired = { let archived = {
let txn = self.transaction(Read, Optimistic).await?; let txn = self.transaction(Read, Optimistic).await?;
let nds = catch!(txn, txn.all_nodes().await); let nds = catch!(txn, txn.all_nodes().await);
// Filter the archived nodes // Filter the archived nodes
@ -240,7 +250,7 @@ impl Datastore {
// Get the node id and the live query id // Get the node id and the live query id
let (nid, lid) = (stm.node.0, stm.id.0); let (nid, lid) = (stm.node.0, stm.id.0);
// Check that the node for this query is archived // Check that the node for this query is archived
if expired.contains(&stm.node) { if archived.contains(&stm.node) {
// Get the key for this node live query // Get the key for this node live query
let tlq = catch!(txn, crate::key::table::lq::Lq::decode(k)); let tlq = catch!(txn, crate::key::table::lq::Lq::decode(k));
// Get the key for this table live query // Get the key for this table live query

View file

@ -88,7 +88,7 @@ async fn writing_ts_again_results_in_following_ts() {
assert_eq!(scanned[1].0, crate::key::database::ts::new("myns", "mydb", 1).encode().unwrap()); assert_eq!(scanned[1].0, crate::key::database::ts::new("myns", "mydb", 1).encode().unwrap());
// Repeating tick // Repeating tick
ds.tick_at(1).await.unwrap(); ds.changefeed_process_at(1).await.unwrap();
// Validate // Validate
let mut tx = ds.transaction(Write, Optimistic).await.unwrap().inner(); let mut tx = ds.transaction(Write, Optimistic).await.unwrap().inner();

View file

@ -7,26 +7,38 @@ use std::time::Duration;
#[doc(hidden)] #[doc(hidden)]
#[non_exhaustive] #[non_exhaustive]
pub struct EngineOptions { pub struct EngineOptions {
/// The maximum number of live queries that can be created in a single transaction pub node_membership_refresh_interval: Duration,
pub new_live_queries_per_transaction: u32, pub node_membership_check_interval: Duration,
/// The size of batches being requested per update in order to catch up a live query pub node_membership_cleanup_interval: Duration,
pub live_query_catchup_size: u32, pub changefeed_gc_interval: Duration,
pub tick_interval: Duration,
} }
impl Default for EngineOptions { impl Default for EngineOptions {
fn default() -> Self { fn default() -> Self {
Self { Self {
new_live_queries_per_transaction: 100, node_membership_refresh_interval: Duration::from_secs(3),
live_query_catchup_size: 1000, node_membership_check_interval: Duration::from_secs(15),
tick_interval: Duration::from_secs(10), node_membership_cleanup_interval: Duration::from_secs(300),
changefeed_gc_interval: Duration::from_secs(10),
} }
} }
} }
impl EngineOptions { impl EngineOptions {
pub fn with_tick_interval(mut self, tick_interval: Duration) -> Self { pub fn with_node_membership_refresh_interval(mut self, interval: Duration) -> Self {
self.tick_interval = tick_interval; self.node_membership_refresh_interval = interval;
self
}
pub fn with_node_membership_check_interval(mut self, interval: Duration) -> Self {
self.node_membership_check_interval = interval;
self
}
pub fn with_node_membership_cleanup_interval(mut self, interval: Duration) -> Self {
self.node_membership_cleanup_interval = interval;
self
}
pub fn with_changefeed_gc_interval(mut self, interval: Duration) -> Self {
self.changefeed_gc_interval = interval;
self self
} }
} }

View file

@ -24,7 +24,7 @@ resolver = "2"
[features] [features]
# Public features # Public features
default = ["protocol-ws", "rustls"] default = ["protocol-ws", "rustls"]
protocol-http = ["dep:reqwest", "dep:tokio-util"] protocol-http = ["dep:reqwest"]
protocol-ws = ["dep:tokio-tungstenite", "dep:trice", "tokio/time"] protocol-ws = ["dep:tokio-tungstenite", "dep:trice", "tokio/time"]
kv-mem = ["surrealdb-core/kv-mem", "tokio/time"] kv-mem = ["surrealdb-core/kv-mem", "tokio/time"]
kv-indxdb = ["surrealdb-core/kv-indxdb"] kv-indxdb = ["surrealdb-core/kv-indxdb"]
@ -67,6 +67,7 @@ features = [
targets = [] targets = []
[dependencies] [dependencies]
arrayvec = "=0.7.4"
bincode = "1.3.3" bincode = "1.3.3"
channel = { version = "1.9.0", package = "async-channel" } channel = { version = "1.9.0", package = "async-channel" }
chrono = { version = "0.4.38", features = ["serde"] } chrono = { version = "0.4.38", features = ["serde"] }
@ -96,7 +97,6 @@ rustls = { version = "0.23.12", default-features = false, features = [
"std", "std",
"tls12", "tls12",
], optional = true } ], optional = true }
arrayvec = "=0.7.4"
reblessive = { version = "0.4.0", features = ["tree"] } reblessive = { version = "0.4.0", features = ["tree"] }
rustls-pki-types = { version = "1.7.0", features = ["web"] } rustls-pki-types = { version = "1.7.0", features = ["web"] }
semver = { version = "1.0.20", features = ["serde"] } semver = { version = "1.0.20", features = ["serde"] }
@ -105,7 +105,7 @@ serde_json = "1.0.127"
serde-content = "0.1.0" serde-content = "0.1.0"
surrealdb-core = { version = "=2.0.0", default-features = false, path = "../core", package = "surrealdb-core" } surrealdb-core = { version = "=2.0.0", default-features = false, path = "../core", package = "surrealdb-core" }
thiserror = "1.0.63" thiserror = "1.0.63"
tokio-util = { version = "0.7.11", optional = true, features = ["compat"] } tokio-util = { version = "0.7.11", features = ["compat"] }
tracing = "0.1.40" tracing = "0.1.40"
trice = { version = "0.4.0", optional = true } trice = { version = "0.4.0", optional = true }
url = "2.5.0" url = "2.5.0"

View file

@ -36,7 +36,6 @@ use std::{
marker::PhantomData, marker::PhantomData,
mem, mem,
sync::Arc, sync::Arc,
time::Duration,
}; };
use surrealdb_core::{ use surrealdb_core::{
dbs::{Response, Session}, dbs::{Response, Session},
@ -83,8 +82,6 @@ pub(crate) mod native;
#[cfg(target_arch = "wasm32")] #[cfg(target_arch = "wasm32")]
pub(crate) mod wasm; pub(crate) mod wasm;
const DEFAULT_TICK_INTERVAL: Duration = Duration::from_secs(10);
/// In-memory database /// In-memory database
/// ///
/// # Examples /// # Examples

View file

@ -6,7 +6,7 @@ use crate::{
opt::{Endpoint, EndpointKind}, opt::{Endpoint, EndpointKind},
ExtraFeatures, OnceLockExt, Result, Surreal, ExtraFeatures, OnceLockExt, Result, Surreal,
}, },
engine::tasks::start_tasks, engine::tasks,
opt::{auth::Root, WaitFor}, opt::{auth::Root, WaitFor},
value::Notification, value::Notification,
Action, Action,
@ -20,6 +20,7 @@ use std::{
}; };
use surrealdb_core::{dbs::Session, iam::Level, kvs::Datastore, options::EngineOptions}; use surrealdb_core::{dbs::Session, iam::Level, kvs::Datastore, options::EngineOptions};
use tokio::sync::watch; use tokio::sync::watch;
use tokio_util::sync::CancellationToken;
impl crate::api::Connection for Db {} impl crate::api::Connection for Db {}
@ -116,15 +117,22 @@ pub(crate) async fn run_router(
let mut live_queries = HashMap::new(); let mut live_queries = HashMap::new();
let mut session = Session::default().with_rt(true); let mut session = Session::default().with_rt(true);
let opt = { let canceller = CancellationToken::new();
let mut engine_options = EngineOptions::default();
engine_options.tick_interval = address let mut opt = EngineOptions::default();
.config if let Some(interval) = address.config.node_membership_refresh_interval {
.tick_interval opt.node_membership_refresh_interval = interval;
.unwrap_or(crate::api::engine::local::DEFAULT_TICK_INTERVAL); }
engine_options if let Some(interval) = address.config.node_membership_check_interval {
}; opt.node_membership_check_interval = interval;
let (tasks, task_chans) = start_tasks(&opt, kvs.clone()); }
if let Some(interval) = address.config.node_membership_cleanup_interval {
opt.node_membership_cleanup_interval = interval;
}
if let Some(interval) = address.config.changefeed_gc_interval {
opt.changefeed_gc_interval = interval;
}
let tasks = tasks::init(kvs.clone(), canceller.clone(), &opt);
let mut notifications = kvs.notifications(); let mut notifications = kvs.notifications();
let mut notification_stream = poll_fn(move |cx| match &mut notifications { let mut notification_stream = poll_fn(move |cx| match &mut notifications {
@ -179,12 +187,10 @@ pub(crate) async fn run_router(
} }
} }
} }
// Shutdown and stop closed tasks
// Stop maintenance tasks canceller.cancel();
for chan in task_chans { // Wait for background tasks to finish
if chan.send(()).is_err() { let _ = tasks.resolve().await;
error!("Error sending shutdown signal to task"); // Delete this node from the cluster
} let _ = kvs.delete_node(kvs.id()).await;
}
tasks.resolve().await.unwrap();
} }

View file

@ -2,7 +2,6 @@ use crate::api::conn::Connection;
use crate::api::conn::Route; use crate::api::conn::Route;
use crate::api::conn::Router; use crate::api::conn::Router;
use crate::api::engine::local::Db; use crate::api::engine::local::Db;
use crate::api::engine::local::DEFAULT_TICK_INTERVAL;
use crate::api::method::BoxFuture; use crate::api::method::BoxFuture;
use crate::api::opt::Endpoint; use crate::api::opt::Endpoint;
use crate::api::ExtraFeatures; use crate::api::ExtraFeatures;
@ -10,7 +9,7 @@ use crate::api::OnceLockExt;
use crate::api::Result; use crate::api::Result;
use crate::api::Surreal; use crate::api::Surreal;
use crate::dbs::Session; use crate::dbs::Session;
use crate::engine::tasks::start_tasks; use crate::engine::tasks;
use crate::iam::Level; use crate::iam::Level;
use crate::kvs::Datastore; use crate::kvs::Datastore;
use crate::opt::auth::Root; use crate::opt::auth::Root;
@ -29,6 +28,7 @@ use std::sync::Arc;
use std::sync::OnceLock; use std::sync::OnceLock;
use std::task::Poll; use std::task::Poll;
use tokio::sync::watch; use tokio::sync::watch;
use tokio_util::sync::CancellationToken;
use wasm_bindgen_futures::spawn_local; use wasm_bindgen_futures::spawn_local;
impl crate::api::Connection for Db {} impl crate::api::Connection for Db {}
@ -113,9 +113,22 @@ pub(crate) async fn run_router(
let mut live_queries = HashMap::new(); let mut live_queries = HashMap::new();
let mut session = Session::default().with_rt(true); let mut session = Session::default().with_rt(true);
let canceller = CancellationToken::new();
let mut opt = EngineOptions::default(); let mut opt = EngineOptions::default();
opt.tick_interval = address.config.tick_interval.unwrap_or(DEFAULT_TICK_INTERVAL); if let Some(interval) = address.config.node_membership_refresh_interval {
let (_tasks, task_chans) = start_tasks(&opt, kvs.clone()); opt.node_membership_refresh_interval = interval;
}
if let Some(interval) = address.config.node_membership_check_interval {
opt.node_membership_check_interval = interval;
}
if let Some(interval) = address.config.node_membership_cleanup_interval {
opt.node_membership_cleanup_interval = interval;
}
if let Some(interval) = address.config.changefeed_gc_interval {
opt.changefeed_gc_interval = interval;
}
let tasks = tasks::init(kvs.clone(), canceller.clone(), &opt);
let mut notifications = kvs.notifications(); let mut notifications = kvs.notifications();
let mut notification_stream = poll_fn(move |cx| match &mut notifications { let mut notification_stream = poll_fn(move |cx| match &mut notifications {
@ -177,11 +190,10 @@ pub(crate) async fn run_router(
} }
} }
} }
// Shutdown and stop closed tasks
// Stop maintenance tasks canceller.cancel();
for chan in task_chans { // Wait for background tasks to finish
if chan.send(()).is_err() { let _ = tasks.resolve().await;
error!("Error sending shutdown signal to maintenance task"); // Delete this node from the cluster
} let _ = kvs.delete_node(kvs.id()).await;
}
} }

View file

@ -1,110 +1,180 @@
use crate::engine::IntervalStream; use crate::engine::IntervalStream;
use crate::err::Error;
use core::future::Future;
use futures::StreamExt; use futures::StreamExt;
use std::pin::Pin;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use surrealdb_core::{kvs::Datastore, options::EngineOptions}; use surrealdb_core::{kvs::Datastore, options::EngineOptions};
use tokio::sync::oneshot; use tokio_util::sync::CancellationToken;
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
use crate::Error as RootError; use tokio::spawn;
#[cfg(target_arch = "wasm32")]
use wasm_bindgen_futures::spawn_local as spawn;
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
use tokio::{spawn as spawn_future, task::JoinHandle}; type Task = Pin<Box<dyn Future<Output = Result<(), tokio::task::JoinError>> + Send + 'static>>;
#[cfg(target_arch = "wasm32")] #[cfg(target_arch = "wasm32")]
use std::sync::atomic::{AtomicBool, Ordering}; type Task = Pin<Box<()>>;
#[cfg(target_arch = "wasm32")]
use wasm_bindgen_futures::spawn_local as spawn_future;
/// This will be true if a task has completed pub struct Tasks(Vec<Task>);
#[cfg(not(target_arch = "wasm32"))]
type FutureTask = JoinHandle<()>;
/// This will be true if a task has completed
#[cfg(target_arch = "wasm32")]
type FutureTask = Arc<AtomicBool>;
pub struct Tasks {
pub nd: FutureTask,
}
impl Tasks { impl Tasks {
#[cfg(target_arch = "wasm32")]
pub async fn resolve(self) -> Result<(), Error> {
Ok(())
}
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
pub async fn resolve(self) -> Result<(), RootError> { pub async fn resolve(self) -> Result<(), Error> {
match self.nd.await { for task in self.0.into_iter() {
// cancelling this task is fine, and can happen when surrealdb exits. let _ = task.await;
Ok(_) => {}
Err(e) if e.is_cancelled() => {}
Err(e) => {
error!("Node agent task failed: {}", e);
let inner_err =
surrealdb_core::err::Error::NodeAgent("node task failed and has been logged");
return Err(RootError::Db(inner_err));
}
} }
Ok(()) Ok(())
} }
} }
/// Starts tasks that are required for the correct running of the engine
pub fn start_tasks(opt: &EngineOptions, dbs: Arc<Datastore>) -> (Tasks, [oneshot::Sender<()>; 1]) {
let nd = init(opt, dbs.clone());
let cancellation_channels = [nd.1];
(
Tasks {
nd: nd.0,
},
cancellation_channels,
)
}
// The init starts a long-running thread for periodically calling Datastore.tick. // The init starts a long-running thread for periodically calling Datastore.tick.
// Datastore.tick is responsible for running garbage collection and other // Datastore.tick is responsible for running garbage collection and other
// background tasks. // background tasks.
// //
// This function needs to be called before after the dbs::init and before the net::init functions. // This function needs to be called before after the dbs::init and before the net::init functions.
// It needs to be before net::init because the net::init function blocks until the web server stops. // It needs to be before net::init because the net::init function blocks until the web server stops.
fn init(opt: &EngineOptions, dbs: Arc<Datastore>) -> (FutureTask, oneshot::Sender<()>) { pub fn init(dbs: Arc<Datastore>, canceller: CancellationToken, opts: &EngineOptions) -> Tasks {
let _init = surrealdb_core::dbs::LoggingLifecycle::new("node agent initialisation".to_string()); let task1 = spawn_task_node_membership_refresh(dbs.clone(), canceller.clone(), opts);
let tick_interval = opt.tick_interval; let task2 = spawn_task_node_membership_check(dbs.clone(), canceller.clone(), opts);
let task3 = spawn_task_node_membership_cleanup(dbs.clone(), canceller.clone(), opts);
trace!("Ticker interval is {:?}", tick_interval); let task4 = spawn_task_changefeed_cleanup(dbs.clone(), canceller.clone(), opts);
#[cfg(target_arch = "wasm32")] Tasks(vec![task1, task2, task3, task4])
let completed_status = Arc::new(AtomicBool::new(false)); }
#[cfg(target_arch = "wasm32")]
let ret_status = completed_status.clone();
// We create a channel that can be streamed that will indicate termination
let (tx, mut rx) = oneshot::channel();
let _fut = spawn_future(async move {
let _lifecycle = surrealdb_core::dbs::LoggingLifecycle::new("heartbeat task".to_string());
let mut ticker = interval_ticker(tick_interval).await;
fn spawn_task_node_membership_refresh(
dbs: Arc<Datastore>,
canceller: CancellationToken,
opts: &EngineOptions,
) -> Task {
// Get the delay interval from the config
let delay = opts.node_membership_refresh_interval;
// Spawn a future
Box::pin(spawn(async move {
// Log the interval frequency
trace!("Updating node registration information every {delay:?}");
// Create a new time-based interval ticket
let mut ticker = interval_ticker(delay).await;
// Loop continuously until the task is cancelled
loop { loop {
tokio::select! { tokio::select! {
v = ticker.next() => { biased;
// ticker will never return None; // Check if this has shutdown
let i = v.unwrap(); _ = canceller.cancelled() => break,
trace!("Node agent tick: {:?}", i); // Receive a notification on the channel
if let Err(e) = dbs.tick().await { Some(_) = ticker.next() => {
error!("Error running node agent tick: {}", e); if let Err(e) = dbs.node_membership_update().await {
error!("Error running node agent tick: {e}");
break; break;
} }
} }
_ = &mut rx => { }
// termination requested }
break trace!("Background task exited: Updating node registration information");
}))
}
fn spawn_task_node_membership_check(
dbs: Arc<Datastore>,
canceller: CancellationToken,
opts: &EngineOptions,
) -> Task {
// Get the delay interval from the config
let delay = opts.node_membership_check_interval;
// Spawn a future
Box::pin(spawn(async move {
// Log the interval frequency
trace!("Processing and archiving inactive nodes every {delay:?}");
// Create a new time-based interval ticket
let mut ticker = interval_ticker(delay).await;
// Loop continuously until the task is cancelled
loop {
tokio::select! {
biased;
// Check if this has shutdown
_ = canceller.cancelled() => break,
// Receive a notification on the channel
Some(_) = ticker.next() => {
if let Err(e) = dbs.node_membership_expire().await {
error!("Error running node agent tick: {e}");
break;
}
} }
} }
} }
trace!("Background task exited: Processing and archiving inactive nodes");
}))
}
#[cfg(target_arch = "wasm32")] fn spawn_task_node_membership_cleanup(
completed_status.store(true, Ordering::Relaxed); dbs: Arc<Datastore>,
}); canceller: CancellationToken,
#[cfg(not(target_arch = "wasm32"))] opts: &EngineOptions,
return (_fut, tx); ) -> Task {
#[cfg(target_arch = "wasm32")] // Get the delay interval from the config
return (ret_status, tx); let delay = opts.node_membership_cleanup_interval;
// Spawn a future
Box::pin(spawn(async move {
// Log the interval frequency
trace!("Processing and cleaning archived nodes every {delay:?}");
// Create a new time-based interval ticket
let mut ticker = interval_ticker(delay).await;
// Loop continuously until the task is cancelled
loop {
tokio::select! {
biased;
// Check if this has shutdown
_ = canceller.cancelled() => break,
// Receive a notification on the channel
Some(_) = ticker.next() => {
if let Err(e) = dbs.node_membership_remove().await {
error!("Error running node agent tick: {e}");
break;
}
}
}
}
trace!("Background task exited: Processing and cleaning archived nodes");
}))
}
fn spawn_task_changefeed_cleanup(
dbs: Arc<Datastore>,
canceller: CancellationToken,
opts: &EngineOptions,
) -> Task {
// Get the delay interval from the config
let delay = opts.changefeed_gc_interval;
// Spawn a future
Box::pin(spawn(async move {
// Log the interval frequency
trace!("Running changefeed garbage collection every {delay:?}");
// Create a new time-based interval ticket
let mut ticker = interval_ticker(delay).await;
// Loop continuously until the task is cancelled
loop {
tokio::select! {
biased;
// Check if this has shutdown
_ = canceller.cancelled() => break,
// Receive a notification on the channel
Some(_) = ticker.next() => {
if let Err(e) = dbs.changefeed_process().await {
error!("Error running node agent tick: {e}");
break;
}
}
}
}
trace!("Background task exited: Running changefeed garbage collection");
}))
} }
async fn interval_ticker(interval: Duration) -> IntervalStream { async fn interval_ticker(interval: Duration) -> IntervalStream {
@ -112,7 +182,7 @@ async fn interval_ticker(interval: Duration) -> IntervalStream {
use tokio::{time, time::MissedTickBehavior}; use tokio::{time, time::MissedTickBehavior};
#[cfg(target_arch = "wasm32")] #[cfg(target_arch = "wasm32")]
use wasmtimer::{tokio as time, tokio::MissedTickBehavior}; use wasmtimer::{tokio as time, tokio::MissedTickBehavior};
// Create a new interval timer
let mut interval = time::interval(interval); let mut interval = time::interval(interval);
// Don't bombard the database if we miss some ticks // Don't bombard the database if we miss some ticks
interval.set_missed_tick_behavior(MissedTickBehavior::Delay); interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
@ -123,31 +193,30 @@ async fn interval_ticker(interval: Duration) -> IntervalStream {
#[cfg(test)] #[cfg(test)]
#[cfg(feature = "kv-mem")] #[cfg(feature = "kv-mem")]
mod test { mod test {
use crate::engine::tasks::start_tasks; use crate::engine::tasks;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use surrealdb_core::{kvs::Datastore, options::EngineOptions}; use surrealdb_core::{kvs::Datastore, options::EngineOptions};
use tokio_util::sync::CancellationToken;
#[test_log::test(tokio::test)] #[test_log::test(tokio::test)]
pub async fn tasks_complete() { pub async fn tasks_complete() {
let can = CancellationToken::new();
let opt = EngineOptions::default(); let opt = EngineOptions::default();
let dbs = Arc::new(Datastore::new("memory").await.unwrap()); let dbs = Arc::new(Datastore::new("memory").await.unwrap());
let (val, chans) = start_tasks(&opt, dbs.clone()); let tasks = tasks::init(dbs.clone(), can.clone(), &opt);
for chan in chans { can.cancel();
chan.send(()).unwrap(); tasks.resolve().await.unwrap();
}
val.resolve().await.unwrap();
} }
#[test_log::test(tokio::test)] #[test_log::test(tokio::test)]
pub async fn tasks_complete_channel_closed() { pub async fn tasks_complete_channel_closed() {
let can = CancellationToken::new();
let opt = EngineOptions::default(); let opt = EngineOptions::default();
let dbs = Arc::new(Datastore::new("memory").await.unwrap()); let dbs = Arc::new(Datastore::new("memory").await.unwrap());
let val = { let tasks = tasks::init(dbs.clone(), can.clone(), &opt);
let (val, _chans) = start_tasks(&opt, dbs.clone()); can.cancel();
val tokio::time::timeout(Duration::from_secs(10), tasks.resolve())
};
tokio::time::timeout(Duration::from_secs(10), val.resolve())
.await .await
.map_err(|e| format!("Timed out after {e}")) .map_err(|e| format!("Timed out after {e}"))
.unwrap() .unwrap()

View file

@ -18,10 +18,13 @@ pub struct Config {
pub(crate) auth: Level, pub(crate) auth: Level,
pub(crate) username: String, pub(crate) username: String,
pub(crate) password: String, pub(crate) password: String,
pub(crate) tick_interval: Option<Duration>,
pub(crate) capabilities: CoreCapabilities, pub(crate) capabilities: CoreCapabilities,
#[cfg(storage)] #[cfg(storage)]
pub(crate) temporary_directory: Option<PathBuf>, pub(crate) temporary_directory: Option<PathBuf>,
pub(crate) node_membership_refresh_interval: Option<Duration>,
pub(crate) node_membership_check_interval: Option<Duration>,
pub(crate) node_membership_cleanup_interval: Option<Duration>,
pub(crate) changefeed_gc_interval: Option<Duration>,
} }
impl Config { impl Config {
@ -98,12 +101,6 @@ impl Config {
self self
} }
/// Set the interval at which the database should run node maintenance tasks
pub fn tick_interval(mut self, interval: impl Into<Option<Duration>>) -> Self {
self.tick_interval = interval.into().filter(|x| !x.is_zero());
self
}
/// Set the capabilities for the database /// Set the capabilities for the database
pub fn capabilities(mut self, capabilities: Capabilities) -> Self { pub fn capabilities(mut self, capabilities: Capabilities) -> Self {
self.capabilities = capabilities.build(); self.capabilities = capabilities.build();
@ -115,4 +112,34 @@ impl Config {
self.temporary_directory = path; self.temporary_directory = path;
self self
} }
/// Set the interval at which the database should run node maintenance tasks
pub fn node_membership_refresh_interval(
mut self,
interval: impl Into<Option<Duration>>,
) -> Self {
self.node_membership_refresh_interval = interval.into().filter(|x| !x.is_zero());
self
}
/// Set the interval at which the database should run node maintenance tasks
pub fn node_membership_check_interval(mut self, interval: impl Into<Option<Duration>>) -> Self {
self.node_membership_check_interval = interval.into().filter(|x| !x.is_zero());
self
}
/// Set the interval at which the database should run node maintenance tasks
pub fn node_membership_cleanup_interval(
mut self,
interval: impl Into<Option<Duration>>,
) -> Self {
self.node_membership_cleanup_interval = interval.into().filter(|x| !x.is_zero());
self
}
/// Set the interval at which the database should run node maintenance tasks
pub fn changefeed_gc_interval(mut self, interval: impl Into<Option<Duration>>) -> Self {
self.changefeed_gc_interval = interval.into().filter(|x| !x.is_zero());
self
}
} }

View file

@ -38,7 +38,6 @@ mod api_integration {
const NS: &str = "test-ns"; const NS: &str = "test-ns";
const ROOT_USER: &str = "root"; const ROOT_USER: &str = "root";
const ROOT_PASS: &str = "root"; const ROOT_PASS: &str = "root";
const TICK_INTERVAL: Duration = Duration::from_secs(1);
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
struct Record { struct Record {
@ -187,10 +186,7 @@ mod api_integration {
username: ROOT_USER, username: ROOT_USER,
password: ROOT_PASS, password: ROOT_PASS,
}; };
let config = Config::new() let config = Config::new().user(root).capabilities(Capabilities::all());
.user(root)
.tick_interval(TICK_INTERVAL)
.capabilities(Capabilities::all());
let db = Surreal::new::<Mem>(config).await.unwrap(); let db = Surreal::new::<Mem>(config).await.unwrap();
db.signin(root).await.unwrap(); db.signin(root).await.unwrap();
(permit, db) (permit, db)
@ -284,10 +280,7 @@ mod api_integration {
username: ROOT_USER, username: ROOT_USER,
password: ROOT_PASS, password: ROOT_PASS,
}; };
let config = Config::new() let config = Config::new().user(root).capabilities(Capabilities::all());
.user(root)
.tick_interval(TICK_INTERVAL)
.capabilities(Capabilities::all());
#[allow(deprecated)] #[allow(deprecated)]
let db = Surreal::new::<File>((path, config)).await.unwrap(); let db = Surreal::new::<File>((path, config)).await.unwrap();
db.signin(root).await.unwrap(); db.signin(root).await.unwrap();
@ -320,10 +313,7 @@ mod api_integration {
username: ROOT_USER, username: ROOT_USER,
password: ROOT_PASS, password: ROOT_PASS,
}; };
let config = Config::new() let config = Config::new().user(root).capabilities(Capabilities::all());
.user(root)
.tick_interval(TICK_INTERVAL)
.capabilities(Capabilities::all());
let db = Surreal::new::<RocksDb>((path, config)).await.unwrap(); let db = Surreal::new::<RocksDb>((path, config)).await.unwrap();
db.signin(root).await.unwrap(); db.signin(root).await.unwrap();
(permit, db) (permit, db)
@ -354,10 +344,7 @@ mod api_integration {
username: ROOT_USER, username: ROOT_USER,
password: ROOT_PASS, password: ROOT_PASS,
}; };
let config = Config::new() let config = Config::new().user(root).capabilities(Capabilities::all());
.user(root)
.tick_interval(TICK_INTERVAL)
.capabilities(Capabilities::all());
let db = Surreal::new::<TiKv>(("127.0.0.1:2379", config)).await.unwrap(); let db = Surreal::new::<TiKv>(("127.0.0.1:2379", config)).await.unwrap();
db.signin(root).await.unwrap(); db.signin(root).await.unwrap();
(permit, db) (permit, db)
@ -387,10 +374,7 @@ mod api_integration {
username: ROOT_USER, username: ROOT_USER,
password: ROOT_PASS, password: ROOT_PASS,
}; };
let config = Config::new() let config = Config::new().user(root).capabilities(Capabilities::all());
.user(root)
.tick_interval(TICK_INTERVAL)
.capabilities(Capabilities::all());
let path = "/etc/foundationdb/fdb.cluster"; let path = "/etc/foundationdb/fdb.cluster";
surrealdb::engine::any::connect((format!("fdb://{path}"), config.clone())) surrealdb::engine::any::connect((format!("fdb://{path}"), config.clone()))
.await .await
@ -418,10 +402,7 @@ mod api_integration {
username: ROOT_USER, username: ROOT_USER,
password: ROOT_PASS, password: ROOT_PASS,
}; };
let config = Config::new() let config = Config::new().user(root).capabilities(Capabilities::all());
.user(root)
.tick_interval(TICK_INTERVAL)
.capabilities(Capabilities::all());
let db = Surreal::new::<SurrealKV>((path, config)).await.unwrap(); let db = Surreal::new::<SurrealKV>((path, config)).await.unwrap();
db.signin(root).await.unwrap(); db.signin(root).await.unwrap();
(permit, db) (permit, db)

View file

@ -49,11 +49,11 @@ async fn database_change_feeds() -> Result<(), Error> {
let dbs = new_ds().await?; let dbs = new_ds().await?;
let ses = Session::owner().with_ns(ns.as_str()).with_db(db.as_str()); let ses = Session::owner().with_ns(ns.as_str()).with_db(db.as_str());
let mut current_time = 0u64; let mut current_time = 0u64;
dbs.tick_at(current_time).await?; dbs.changefeed_process_at(current_time).await?;
let res = &mut dbs.execute(sql.as_str(), &ses, None).await?; let res = &mut dbs.execute(sql.as_str(), &ses, None).await?;
// Increment by a second (sic) // Increment by a second (sic)
current_time += 1; current_time += 1;
dbs.tick_at(current_time).await?; dbs.changefeed_process_at(current_time).await?;
assert_eq!(res.len(), 3); assert_eq!(res.len(), 3);
// DEFINE DATABASE // DEFINE DATABASE
let tmp = res.remove(0).result; let tmp = res.remove(0).result;
@ -178,7 +178,7 @@ async fn database_change_feeds() -> Result<(), Error> {
"; ";
// This is neccessary to mark a point in time that can be GC'd // This is neccessary to mark a point in time that can be GC'd
current_time += 1; current_time += 1;
dbs.tick_at(current_time).await?; dbs.changefeed_process_at(current_time).await?;
let tx = dbs.transaction(Write, Optimistic).await?; let tx = dbs.transaction(Write, Optimistic).await?;
tx.cancel().await?; tx.cancel().await?;
@ -189,7 +189,7 @@ async fn database_change_feeds() -> Result<(), Error> {
let one_hour_in_secs = 3600; let one_hour_in_secs = 3600;
current_time += one_hour_in_secs; current_time += one_hour_in_secs;
current_time += 1; current_time += 1;
dbs.tick_at(current_time).await?; dbs.changefeed_process_at(current_time).await?;
let res = &mut dbs.execute(sql, &ses, None).await?; let res = &mut dbs.execute(sql, &ses, None).await?;
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse("[]"); let val = Value::parse("[]");
@ -229,9 +229,9 @@ async fn table_change_feeds() -> Result<(), Error> {
let ses = Session::owner().with_ns("test-tb-cf").with_db("test-tb-cf"); let ses = Session::owner().with_ns("test-tb-cf").with_db("test-tb-cf");
let start_ts = 0u64; let start_ts = 0u64;
let end_ts = start_ts + 1; let end_ts = start_ts + 1;
dbs.tick_at(start_ts).await?; dbs.changefeed_process_at(start_ts).await?;
let res = &mut dbs.execute(sql, &ses, None).await?; let res = &mut dbs.execute(sql, &ses, None).await?;
dbs.tick_at(end_ts).await?; dbs.changefeed_process_at(end_ts).await?;
assert_eq!(res.len(), 10); assert_eq!(res.len(), 10);
// DEFINE TABLE // DEFINE TABLE
let tmp = res.remove(0).result; let tmp = res.remove(0).result;
@ -388,7 +388,7 @@ async fn table_change_feeds() -> Result<(), Error> {
let sql = " let sql = "
SHOW CHANGES FOR TABLE person SINCE 0; SHOW CHANGES FOR TABLE person SINCE 0;
"; ";
dbs.tick_at(end_ts + 3599).await?; dbs.changefeed_process_at(end_ts + 3599).await?;
let res = &mut dbs.execute(sql, &ses, None).await?; let res = &mut dbs.execute(sql, &ses, None).await?;
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
assert!( assert!(
@ -402,7 +402,7 @@ async fn table_change_feeds() -> Result<(), Error> {
.unwrap() .unwrap()
); );
// GC after 1hs // GC after 1hs
dbs.tick_at(end_ts + 3600).await?; dbs.changefeed_process_at(end_ts + 3600).await?;
let res = &mut dbs.execute(sql, &ses, None).await?; let res = &mut dbs.execute(sql, &ses, None).await?;
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse("[]"); let val = Value::parse("[]");
@ -423,7 +423,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
// Save timestamp 1 // Save timestamp 1
let ts1_dt = "2023-08-01T00:00:00Z"; let ts1_dt = "2023-08-01T00:00:00Z";
let ts1 = DateTime::parse_from_rfc3339(ts1_dt).unwrap(); let ts1 = DateTime::parse_from_rfc3339(ts1_dt).unwrap();
db.tick_at(ts1.timestamp().try_into().unwrap()).await.unwrap(); db.changefeed_process_at(ts1.timestamp().try_into().unwrap()).await.unwrap();
// Create and update users // Create and update users
let sql = " let sql = "
CREATE user:amos SET name = 'Amos'; CREATE user:amos SET name = 'Amos';
@ -627,7 +627,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
// Save timestamp 2 // Save timestamp 2
let ts2_dt = "2023-08-01T00:00:05Z"; let ts2_dt = "2023-08-01T00:00:05Z";
let ts2 = DateTime::parse_from_rfc3339(ts2_dt).unwrap(); let ts2 = DateTime::parse_from_rfc3339(ts2_dt).unwrap();
db.tick_at(ts2.timestamp().try_into().unwrap()).await.unwrap(); db.changefeed_process_at(ts2.timestamp().try_into().unwrap()).await.unwrap();
// //
// Show changes using timestamp 1 // Show changes using timestamp 1
// //
@ -684,7 +684,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
// Save timestamp 3 // Save timestamp 3
let ts3_dt = "2023-08-01T00:00:10Z"; let ts3_dt = "2023-08-01T00:00:10Z";
let ts3 = DateTime::parse_from_rfc3339(ts3_dt).unwrap(); let ts3 = DateTime::parse_from_rfc3339(ts3_dt).unwrap();
db.tick_at(ts3.timestamp().try_into().unwrap()).await.unwrap(); db.changefeed_process_at(ts3.timestamp().try_into().unwrap()).await.unwrap();
// //
// Show changes using timestamp 3 // Show changes using timestamp 3
// //

View file

@ -1,11 +1,10 @@
use crate::net::client_ip::ClientIp; use crate::net::client_ip::ClientIp;
use std::sync::OnceLock; use std::sync::OnceLock;
use std::{net::SocketAddr, path::PathBuf}; use std::{net::SocketAddr, path::PathBuf};
use surrealdb::options::EngineOptions;
pub static CF: OnceLock<Config> = OnceLock::new(); pub static CF: OnceLock<Config> = OnceLock::new();
use surrealdb::options::EngineOptions;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Config { pub struct Config {
pub bind: SocketAddr, pub bind: SocketAddr,
@ -15,6 +14,6 @@ pub struct Config {
pub pass: Option<String>, pub pass: Option<String>,
pub crt: Option<PathBuf>, pub crt: Option<PathBuf>,
pub key: Option<PathBuf>, pub key: Option<PathBuf>,
pub engine: Option<EngineOptions>, pub engine: EngineOptions,
pub no_identification_headers: bool, pub no_identification_headers: bool,
} }

View file

@ -1,5 +1,4 @@
use super::config; use super::config::{Config, CF};
use super::config::Config;
use crate::cli::validator::parser::env_filter::CustomEnvFilter; use crate::cli::validator::parser::env_filter::CustomEnvFilter;
use crate::cli::validator::parser::env_filter::CustomEnvFilterParser; use crate::cli::validator::parser::env_filter::CustomEnvFilterParser;
use crate::cnf::LOGO; use crate::cnf::LOGO;
@ -14,7 +13,7 @@ use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use surrealdb::engine::any::IntoEndpoint; use surrealdb::engine::any::IntoEndpoint;
use surrealdb::engine::tasks::start_tasks; use surrealdb::engine::tasks;
use surrealdb::options::EngineOptions; use surrealdb::options::EngineOptions;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
@ -43,12 +42,33 @@ pub struct StartCommandArguments {
// Tasks // Tasks
// //
#[arg( #[arg(
help = "The interval at which to run node agent tick (including garbage collection)", help = "The interval at which to refresh node registration information",
help_heading = "Database" help_heading = "Database"
)] )]
#[arg(env = "SURREAL_TICK_INTERVAL", long = "tick-interval", value_parser = super::validator::duration)] #[arg(env = "SURREAL_NODE_MEMBERSHIP_REFRESH_INTERVAL", long = "node-membership-refresh-interval", value_parser = super::validator::duration)]
#[arg(default_value = "3s")]
node_membership_refresh_interval: Duration,
#[arg(
help = "The interval at which process and archive inactive nodes",
help_heading = "Database"
)]
#[arg(env = "SURREAL_NODE_MEMBERSHIP_CHECK_INTERVAL", long = "node-membership-check-interval", value_parser = super::validator::duration)]
#[arg(default_value = "15s")]
node_membership_check_interval: Duration,
#[arg(
help = "The interval at which to process and cleanup archived nodes",
help_heading = "Database"
)]
#[arg(env = "SURREAL_NODE_MEMBERSHIP_CLEANUP_INTERVAL", long = "node-membership-cleanup-interval", value_parser = super::validator::duration)]
#[arg(default_value = "300s")]
node_membership_cleanup_interval: Duration,
#[arg(
help = "The interval at which to perform changefeed garbage collection",
help_heading = "Database"
)]
#[arg(env = "SURREAL_CHANGEFEED_GC_INTERVAL", long = "changefeed-gc-interval", value_parser = super::validator::duration)]
#[arg(default_value = "10s")] #[arg(default_value = "10s")]
tick_interval: Duration, changefeed_gc_interval: Duration,
// //
// Authentication // Authentication
// //
@ -143,7 +163,10 @@ pub async fn init(
dbs, dbs,
web, web,
log, log,
tick_interval, node_membership_refresh_interval,
node_membership_check_interval,
node_membership_cleanup_interval,
changefeed_gc_interval,
no_banner, no_banner,
no_identification_headers, no_identification_headers,
.. ..
@ -168,38 +191,42 @@ pub async fn init(
} else { } else {
(None, None) (None, None)
}; };
// Setup the command-line options // Configure the engine
let _ = config::CF.set(Config { let engine = EngineOptions::default()
.with_node_membership_refresh_interval(node_membership_refresh_interval)
.with_node_membership_check_interval(node_membership_check_interval)
.with_node_membership_cleanup_interval(node_membership_cleanup_interval)
.with_changefeed_gc_interval(changefeed_gc_interval);
// Configure the config
let config = Config {
bind: listen_addresses.first().cloned().unwrap(), bind: listen_addresses.first().cloned().unwrap(),
client_ip, client_ip,
path, path,
user, user,
pass, pass,
no_identification_headers, no_identification_headers,
engine: Some(EngineOptions::default().with_tick_interval(tick_interval)), engine,
crt, crt,
key, key,
}); };
// This is the cancellation token propagated down to // Setup the command-line options
// all the async functions that needs to be stopped gracefully. let _ = CF.set(config);
let ct = CancellationToken::new();
// Initiate environment // Initiate environment
env::init().await?; env::init().await?;
// Create a token to cancel tasks
let canceller = CancellationToken::new();
// Start the datastore // Start the datastore
let ds = Arc::new(dbs::init(dbs).await?); let datastore = Arc::new(dbs::init(dbs).await?);
// Start the node agent // Start the node agent
let (tasks, task_chans) = let nodetasks = tasks::init(datastore.clone(), canceller.clone(), &CF.get().unwrap().engine);
start_tasks(&config::CF.get().unwrap().engine.unwrap_or_default(), ds.clone());
// Start the web server // Start the web server
net::init(ds, ct.clone()).await?; net::init(datastore.clone(), canceller.clone()).await?;
// Shutdown and stop closed tasks // Shutdown and stop closed tasks
task_chans.into_iter().for_each(|chan| { canceller.cancel();
if chan.send(()).is_err() { // Wait for background tasks to finish
error!("Failed to send shutdown signal to task"); nodetasks.resolve().await?;
} // Delete this node from the cluster
}); datastore.delete_node(datastore.id()).await?;
ct.cancel();
tasks.resolve().await?;
// All ok // All ok
Ok(()) Ok(())
} }

View file

@ -97,6 +97,8 @@ impl Builder {
} }
pub fn shutdown() -> Result<(), Error> { pub fn shutdown() -> Result<(), Error> {
// Output information to logs
trace!("Shutting down telemetry service");
// Flush all telemetry data and block until done // Flush all telemetry data and block until done
opentelemetry::global::shutdown_tracer_provider(); opentelemetry::global::shutdown_tracer_provider();
// Everything ok // Everything ok
@ -115,14 +117,17 @@ pub fn filter_from_value(v: &str) -> Result<EnvFilter, ParseError> {
// Otherwise, let's show info and above // Otherwise, let's show info and above
"info" => Ok(EnvFilter::default().add_directive(Level::INFO.into())), "info" => Ok(EnvFilter::default().add_directive(Level::INFO.into())),
// Otherwise, let's show debugs and above // Otherwise, let's show debugs and above
"debug" => EnvFilter::builder() "debug" => EnvFilter::builder().parse(
.parse("warn,surreal=debug,surrealdb=debug,surrealcs=warn,surrealdb::core::kvs=debug"), "warn,surreal=debug,surrealdb=debug,surrealcs=warn,surrealdb::core::kvs::tr=debug",
),
// Specify the log level for each code area // Specify the log level for each code area
"trace" => EnvFilter::builder() "trace" => EnvFilter::builder().parse(
.parse("warn,surreal=trace,surrealdb=trace,surrealcs=warn,surrealdb::core::kvs=debug"), "warn,surreal=trace,surrealdb=trace,surrealcs=warn,surrealdb::core::kvs::tr=debug",
),
// Check if we should show all surreal logs // Check if we should show all surreal logs
"full" => EnvFilter::builder() "full" => EnvFilter::builder().parse(
.parse("debug,surreal=trace,surrealdb=trace,surrealcs=debug,surrealdb::core=trace"), "debug,surreal=trace,surrealdb=trace,surrealcs=debug,surrealdb::core::kvs::tr=trace",
),
// Check if we should show all module logs // Check if we should show all module logs
"all" => Ok(EnvFilter::default().add_directive(Level::TRACE.into())), "all" => Ok(EnvFilter::default().add_directive(Level::TRACE.into())),
// Let's try to parse the custom log level // Let's try to parse the custom log level

View file

@ -22,10 +22,6 @@ mod cli_integration {
use super::common::{self, StartServerArguments, PASS, USER}; use super::common::{self, StartServerArguments, PASS, USER};
/// This depends on the interval configuration that we cannot yet inject
const ONE_PERIOD: Duration = Duration::new(10, 0);
const TWO_PERIODS: Duration = Duration::new(20, 0);
#[test] #[test]
fn version_command() { fn version_command() {
assert!(common::run("version").output().is_ok()); assert!(common::run("version").output().is_ok());
@ -878,7 +874,6 @@ mod cli_integration {
auth: false, auth: false,
tls: false, tls: false,
wait_is_ready: true, wait_is_ready: true,
tick_interval: ONE_PERIOD,
..Default::default() ..Default::default()
}) })
.await .await
@ -980,7 +975,7 @@ mod cli_integration {
} }
}; };
sleep(TWO_PERIODS).await; sleep(Duration::from_secs(20)).await;
info!("* Show changes after GC"); info!("* Show changes after GC");
{ {

View file

@ -159,7 +159,6 @@ pub struct StartServerArguments {
pub auth: bool, pub auth: bool,
pub tls: bool, pub tls: bool,
pub wait_is_ready: bool, pub wait_is_ready: bool,
pub tick_interval: time::Duration,
pub temporary_directory: Option<String>, pub temporary_directory: Option<String>,
pub args: String, pub args: String,
pub vars: Option<HashMap<String, String>>, pub vars: Option<HashMap<String, String>>,
@ -172,7 +171,6 @@ impl Default for StartServerArguments {
auth: true, auth: true,
tls: false, tls: false,
wait_is_ready: true, wait_is_ready: true,
tick_interval: time::Duration::new(1, 0),
temporary_directory: None, temporary_directory: None,
args: "".to_string(), args: "".to_string(),
vars: None, vars: None,
@ -247,7 +245,6 @@ pub async fn start_server(
auth, auth,
tls, tls,
wait_is_ready, wait_is_ready,
tick_interval,
temporary_directory, temporary_directory,
args, args,
vars, vars,
@ -274,11 +271,6 @@ pub async fn start_server(
extra_args.push_str(" --unauthenticated"); extra_args.push_str(" --unauthenticated");
} }
if !tick_interval.is_zero() {
let sec = tick_interval.as_secs();
extra_args.push_str(format!(" --tick-interval {sec}s").as_str());
}
if let Some(path) = temporary_directory { if let Some(path) = temporary_directory {
extra_args.push_str(format!(" --temporary-directory {path}").as_str()); extra_args.push_str(format!(" --temporary-directory {path}").as_str());
} }