Proposed extensions to Context interface to abstract away a distributed runtime
interface Context {
StateMgr getStateMgr(); // State Manager for co ordination dedup and ordering semantics
UUID getConnectorId(); // Globally unique connector instance identifier
Integer getPartitionId(); // Globall unique connector instance identifier
Integer getInstanceCount(); // Expected total instances of this connector type operating
sendEvent(SendEvent event); // Notifying the external runtime of metrics and other related events
// Receving events from an external manager post initilization ie: dynamic reconfiguration
recvEvent(RecvEvent event);
}
interface StateMgr {
Map<String, Object> getState();
CheckpointMgr getCheckpointMgr();
AckMgr getAckMgr();
OffSetMgr getOffSetMgr();
}
// Focused of Source connector needs
interface AckMgr {
}
// Primarily for pub sub systems like kafka and pulsar
interface OffSetMgr {
}
// Focused of Sink connectors where things like distributed checkpoints are needed
interface CheckpointMgr{
}
MetricEvent extends SendEvent {
}
StatusEvent extends SendEvent {
}
NotificationEvent extends RecvEvent {
}
ConfigurationEvent extends RecvEvent {
}
Kafka Connect
sample applicaiton.conf
pulsar {
sink {
broker.root.url: "pulsar://localhost:6650"
topic: "persistent://sample/standalone/ns1/my-topic"
}
source {
broker.root.url: "pulsar://localhost:6650"
topic: "persistent://sample/standalone/ns1/my-topic"
subscription: "test-group"
}
}
Illustration
graph LR
A[Executor] -- Context --> B(Source-Partition1)
A[Executor] -- Context --> C(Source-Partition2)
B --> D{MetricSink}
C --> D{MetricSink}