Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ClusterState API #2459

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ hyper-rustls = { version = "0.27.2", default-features = false, features = [
"logging",
] }
hyper-util = { version = "0.1" }
indexmap = { version = "2" }
itertools = "0.13.0"
jsonschema = "0.26.0"
metrics = { version = "0.24" }
Expand Down
8 changes: 8 additions & 0 deletions crates/admin/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.protoc_arg("--experimental_allow_proto3_optional")
.extern_path(".restate.common", "::restate_types::protobuf::common")
.extern_path(".restate.cluster", "::restate_types::protobuf::cluster")
.extern_path(
".restate.deprecated_cluster",
"::restate_types::protobuf::deprecated_cluster",
)
.extern_path(
".restate.cluster_configuration",
"::restate_types::protobuf::cluster_configuration",
)
.compile_protos(
&["./protobuf/cluster_ctrl_svc.proto"],
&["protobuf", "../types/protobuf"],
Expand Down
11 changes: 7 additions & 4 deletions crates/admin/protobuf/cluster_ctrl_svc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
syntax = "proto3";

import "restate/common.proto";
import "restate/cluster.proto";
import "restate/cluster_configuration.proto";
import "restate/deprecated_cluster.proto";
import "google/protobuf/empty.proto";

package restate.cluster_ctrl;
Expand Down Expand Up @@ -43,17 +44,19 @@ service ClusterCtrlSvc {

message SetClusterConfigurationResponse {}
message SetClusterConfigurationRequest {
restate.cluster.ClusterConfiguration cluster_configuration = 1;
restate.cluster_configuration.ClusterConfiguration cluster_configuration = 1;
}

message GetClusterConfigurationRequest {}
message GetClusterConfigurationResponse {
restate.cluster.ClusterConfiguration cluster_configuration = 1;
restate.cluster_configuration.ClusterConfiguration cluster_configuration = 1;
}

message ClusterStateRequest {}

message ClusterStateResponse { restate.cluster.ClusterState cluster_state = 1; }
message ClusterStateResponse {
restate.deprecated_cluster.ClusterState cluster_state = 1;
}

message ListLogsRequest {}

Expand Down
15 changes: 9 additions & 6 deletions crates/admin/src/cluster_controller/cluster_state_refresher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ use restate_core::network::{
use restate_core::{
Metadata, ShutdownError, TaskCenter, TaskCenterFutureExt, TaskHandle, TaskKind,
};
use restate_types::cluster::cluster_state::{
use restate_types::deprecated_cluster::cluster_state::{
AliveNode, ClusterState, DeadNode, NodeState, SuspectNode,
};
use restate_types::net::node::GetNodeState;
use restate_types::net::node::GetPartitionsProcessorsState;
use restate_types::time::MillisSinceEpoch;
use restate_types::Version;

pub struct ClusterStateRefresher<T> {
network_sender: Networking<T>,
get_state_router: RpcRouter<GetNodeState>,
get_state_router: RpcRouter<GetPartitionsProcessorsState>,
in_flight_refresh: Option<TaskHandle<anyhow::Result<()>>>,
cluster_state_update_rx: watch::Receiver<Arc<ClusterState>>,
cluster_state_update_tx: Arc<watch::Sender<Arc<ClusterState>>>,
Expand Down Expand Up @@ -99,7 +99,7 @@ impl<T: TransportConnect> ClusterStateRefresher<T> {
}

fn start_refresh_task(
get_state_router: RpcRouter<GetNodeState>,
get_state_router: RpcRouter<GetPartitionsProcessorsState>,
network_sender: Networking<T>,
cluster_state_tx: Arc<watch::Sender<Arc<ClusterState>>>,
) -> Result<Option<TaskHandle<anyhow::Result<()>>>, ShutdownError> {
Expand Down Expand Up @@ -134,8 +134,11 @@ impl<T: TransportConnect> ClusterStateRefresher<T> {
async move {
match network_sender.node_connection(node_id).await {
Ok(connection) => {
let outgoing = Outgoing::new(node_id, GetNodeState::default())
.assign_connection(connection);
let outgoing = Outgoing::new(
node_id,
GetPartitionsProcessorsState::default(),
)
.assign_connection(connection);

(
node_id,
Expand Down
2 changes: 1 addition & 1 deletion crates/admin/src/cluster_controller/grpc_svc_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use std::num::NonZeroU16;
use std::time::Duration;

use bytes::{Bytes, BytesMut};
use restate_types::protobuf::cluster::ClusterConfiguration;
use restate_types::protobuf::cluster_configuration::ClusterConfiguration;
use tonic::{async_trait, Request, Response, Status};
use tracing::info;

Expand Down
8 changes: 5 additions & 3 deletions crates/admin/src/cluster_controller/observed_cluster_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ use std::collections::{HashMap, HashSet};

use xxhash_rust::xxh3::Xxh3Builder;

use restate_types::cluster::cluster_state::{ClusterState, NodeState, RunMode};
use restate_types::deprecated_cluster::cluster_state::{ClusterState, NodeState};
use restate_types::identifiers::PartitionId;
use restate_types::partition_processor::RunMode;
use restate_types::{GenerationalNodeId, NodeId, PlainNodeId};

/// Represents the scheduler's observed state of the cluster. The scheduler will use this
Expand Down Expand Up @@ -135,10 +136,11 @@ mod tests {
};
use googletest::prelude::{empty, eq};
use googletest::{assert_that, elements_are, unordered_elements_are};
use restate_types::cluster::cluster_state::{
AliveNode, ClusterState, DeadNode, NodeState, PartitionProcessorStatus, RunMode,
use restate_types::deprecated_cluster::cluster_state::{
AliveNode, ClusterState, DeadNode, NodeState,
};
use restate_types::identifiers::PartitionId;
use restate_types::partition_processor::{PartitionProcessorStatus, RunMode};
use restate_types::time::MillisSinceEpoch;
use restate_types::{GenerationalNodeId, PlainNodeId, Version};
use std::collections::{BTreeMap, HashMap};
Expand Down
7 changes: 4 additions & 3 deletions crates/admin/src/cluster_controller/scheduler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,13 +600,13 @@ mod tests {
};
use restate_core::network::{ForwardingHandler, Incoming, MessageCollectorMockConnector};
use restate_core::{Metadata, TestCoreEnv, TestCoreEnvBuilder};
use restate_types::cluster::cluster_state::{
AliveNode, ClusterState, DeadNode, NodeState, PartitionProcessorStatus, RunMode,
};
use restate_types::cluster_controller::{
SchedulingPlan, SchedulingPlanBuilder, TargetPartitionState,
};
use restate_types::config::Configuration;
use restate_types::deprecated_cluster::cluster_state::{
AliveNode, ClusterState, DeadNode, NodeState,
};
use restate_types::identifiers::{PartitionId, PartitionKey};
use restate_types::metadata_store::keys::SCHEDULING_PLAN_KEY;
use restate_types::net::codec::WireDecode;
Expand All @@ -615,6 +615,7 @@ mod tests {
use restate_types::nodes_config::{
LogServerConfig, NodeConfig, NodesConfiguration, Role, StorageState,
};
use restate_types::partition_processor::{PartitionProcessorStatus, RunMode};
use restate_types::partition_table::{PartitionTable, ReplicationStrategy};
use restate_types::time::MillisSinceEpoch;
use restate_types::{GenerationalNodeId, PlainNodeId, Version};
Expand Down
12 changes: 7 additions & 5 deletions crates/admin/src/cluster_controller/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ use restate_core::{
cancellation_watcher, Metadata, MetadataWriter, ShutdownError, TargetVersion, TaskCenter,
TaskKind,
};
use restate_types::cluster::cluster_state::ClusterState;
use restate_types::config::{AdminOptions, Configuration};
use restate_types::deprecated_cluster::cluster_state::ClusterState;
use restate_types::health::HealthStatus;
use restate_types::identifiers::{PartitionId, SnapshotId};
use restate_types::live::Live;
Expand Down Expand Up @@ -842,16 +842,18 @@ mod tests {
};
use restate_core::test_env::NoOpMessageHandler;
use restate_core::{TaskCenter, TaskKind, TestCoreEnv, TestCoreEnvBuilder};
use restate_types::cluster::cluster_state::PartitionProcessorStatus;
use restate_types::config::{AdminOptions, Configuration};
use restate_types::health::HealthStatus;
use restate_types::identifiers::PartitionId;
use restate_types::live::Live;
use restate_types::logs::{LogId, Lsn, SequenceNumber};
use restate_types::net::node::{GetNodeState, NodeStateResponse};
use restate_types::net::node::{
GetPartitionsProcessorsState, PartitionsProcessorsStateResponse,
};
use restate_types::net::partition_processor_manager::ControlProcessors;
use restate_types::net::AdvertisedAddress;
use restate_types::nodes_config::{LogServerConfig, NodeConfig, NodesConfiguration, Role};
use restate_types::partition_processor::PartitionProcessorStatus;
use restate_types::{GenerationalNodeId, Version};

#[test(restate_core::test)]
Expand Down Expand Up @@ -902,7 +904,7 @@ mod tests {
}

impl MessageHandler for NodeStateHandler {
type MessageType = GetNodeState;
type MessageType = GetPartitionsProcessorsState;

async fn on_message(&self, msg: Incoming<Self::MessageType>) {
if self.block_list.contains(&msg.peer()) {
Expand All @@ -916,7 +918,7 @@ mod tests {
};

let state = [(PartitionId::MIN, partition_processor_status)].into();
let response = msg.to_rpc_response(NodeStateResponse {
let response = msg.to_rpc_response(PartitionsProcessorsStateResponse {
partition_processor_state: Some(state),
});

Expand Down
2 changes: 1 addition & 1 deletion crates/admin/src/cluster_controller/service/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use restate_bifrost::{Bifrost, BifrostAdmin};
use restate_core::metadata_store::MetadataStoreClient;
use restate_core::network::TransportConnect;
use restate_core::{my_node_id, Metadata, MetadataWriter};
use restate_types::cluster::cluster_state::{AliveNode, NodeState};
use restate_types::config::{AdminOptions, Configuration};
use restate_types::deprecated_cluster::cluster_state::{AliveNode, NodeState};
use restate_types::identifiers::PartitionId;
use restate_types::logs::{LogId, Lsn, SequenceNumber};
use restate_types::net::metadata::MetadataKind;
Expand Down
1 change: 1 addition & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ hostname = { workspace = true }
http = { workspace = true }
http-body = { workspace = true }
http-body-util = { workspace = true }
indexmap = { workspace = true }
humantime = { workspace = true }
hyper = { workspace = true }
hyper-util = { workspace = true, features = ["server-graceful", "server"] }
Expand Down
113 changes: 113 additions & 0 deletions crates/core/src/cluster_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright (c) 2023 - 2025 Restate Software, Inc., Restate GmbH.
// All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

use indexmap::IndexSet;
use restate_types::{GenerationalNodeId, NodeId, PlainNodeId};
use tokio::sync::watch;

use crate::{Metadata, TaskCenter};

//todo(azmy): Default is temporary, remove it when the implementation is done
#[derive(Debug, Clone, Default)]
pub struct ClusterState {
watch: watch::Sender<()>,
}

impl ClusterState {
// this is just a place holder since cluster state
// should only be constructed via the failure detector
pub fn new() -> Self {
Self {
watch: watch::Sender::new(()),
}
}

pub fn try_current() -> Option<Self> {
TaskCenter::with_current(|h| h.cluster_state())
}

#[track_caller]
pub fn current() -> Self {
TaskCenter::with_current(|h| h.cluster_state()).expect("called outside task-center scope")
}

pub fn watch(&self) -> watch::Receiver<()> {
self.watch.subscribe()
}

/// Gets an iterator over all alive nodes
pub fn alive(&self) -> impl Iterator<Item = GenerationalNodeId> {
// Dummy implementation

// assumes all nodes are alive
let nodes_config = Metadata::with_current(|m| m.nodes_config_ref());
nodes_config
.iter()
.map(|(_, n)| n.current_generation)
.collect::<Vec<_>>()
.into_iter()
}

/// Gets an iterator over all dead nodes
pub fn dead(&self) -> impl Iterator<Item = PlainNodeId> {
// Dummy implementation

// assumes all nodes are alive
std::iter::empty()
}

/// Checks if a node is a live, returning its generation node id if it is.
pub fn is_alive(&self, node_id: NodeId) -> Option<GenerationalNodeId> {
// Dummy implementation

// assume all nodes are alive
let nodes_config = Metadata::with_current(|m| m.nodes_config_ref());
nodes_config
.find_node_by_id(node_id)
.map(|n| n.current_generation)
.ok()
}

/// Finds the first alive node in the given slice.
pub fn first_alive(&self, nodes: &[PlainNodeId]) -> Option<GenerationalNodeId> {
// Dummy implementation

// assumes all nodes are alive hence
// always return the first node
if nodes.is_empty() {
return None;
}

let nodes_config = Metadata::with_current(|m| m.nodes_config_ref());
nodes_config
.find_node_by_id(nodes[0])
.map(|n| n.current_generation)
.ok()
}

/// Returns the subset of alive nodes from the given node set preserving their order.
pub fn intersect(&self, nodes: &IndexSet<PlainNodeId>) -> IndexSet<GenerationalNodeId> {
// Dummy implementation

// this dummy implementation just assumes
// all nodes in the set are alive and return the
// current known generational id
let nodes_config = Metadata::with_current(|m| m.nodes_config_ref());
nodes
.iter()
.filter_map(|plain_id| {
nodes_config
.find_node_by_id(*plain_id)
.map(|n| n.current_generation)
.ok()
})
.collect()
}
}
1 change: 1 addition & 0 deletions crates/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

pub mod cluster_state;
mod error;
mod metadata;
pub mod metadata_store;
Expand Down
4 changes: 2 additions & 2 deletions crates/core/src/network/connection_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ mod tests {
use restate_test_util::{assert_eq, let_assert};
use restate_types::net::codec::WireDecode;
use restate_types::net::metadata::{GetMetadataRequest, MetadataMessage};
use restate_types::net::node::GetNodeState;
use restate_types::net::node::GetPartitionsProcessorsState;
use restate_types::net::{
AdvertisedAddress, ProtocolVersion, CURRENT_PROTOCOL_VERSION,
MIN_SUPPORTED_PROTOCOL_VERSION,
Expand Down Expand Up @@ -1013,7 +1013,7 @@ mod tests {
.await
.into_test_result()?;

let request = GetNodeState {};
let request = GetPartitionsProcessorsState {};
let partition_table_version = metadata.partition_table_version().next();
let header = Header::new(
metadata.nodes_config_version(),
Expand Down
Loading
Loading