summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2023-04-20 16:13:58 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2023-04-20 16:28:07 +0200
commit2c7d2141ac86b0d482d3221447d1ad920c557108 (patch)
tree5f349d8bed4a0eab1dc13d7ebcaa98bf7e8cc86d
parent010df43519b12e83b0ff2cba9e344cba698586bb (diff)
downloadbird-2c7d2141ac86b0d482d3221447d1ad920c557108.tar.gz
BMP: Fix connection management
Replace broken TCP connection management with a simple state machine. Handle failed attempts properly with a timeout, detect and handle TCP connection close and try to reconnect after that. Remove useless 'station_connected' flag. Keep open messages saved even after the BMP session establishment, so they can be used after BMP session flaps. Use proper log messages for session events.
-rw-r--r--proto/bmp/bmp.c291
-rw-r--r--proto/bmp/bmp.h1
2 files changed, 158 insertions, 134 deletions
diff --git a/proto/bmp/bmp.c b/proto/bmp/bmp.c
index 0ef13cd4..36cc30cc 100644
--- a/proto/bmp/bmp.c
+++ b/proto/bmp/bmp.c
@@ -20,6 +20,12 @@
* - Support DE_CONFIGURED PEER DOWN REASON code in PEER DOWN NOTIFICATION message
* - If connection with BMP collector will lost then we don't establish connection again
* - Set Peer Type by its a global and local-scope IP address
+ *
+ * The BMP session is managed by a simple state machine with three states: Idle
+ * (!started, !sk), Connect (!started, sk active), and Established (started). It
+ * has three events: connect successfull (Connect -> Established), socket error
+ * (any -> Idle), and connect timeout (Idle/Connect -> Connect, resetting the
+ * TCP socket).
*/
#include "proto/bmp/bmp.h"
@@ -166,8 +172,11 @@ enum bmp_term_reason {
// Default chunk size request when memory allocation
#define DEFAULT_MEM_BLOCK_SIZE 4096
+// Initial delay for connection to the BMP collector
+#define CONNECT_INIT_TIME (200 MS)
+
// Timeout for connection to the BMP collector retry
-#define CONNECT_RETRY_SEC (10 S)
+#define CONNECT_RETRY_TIME (10 S)
#define IP4_MAX_TTL 255
@@ -188,20 +197,15 @@ enum bmp_term_reason {
} while (0)
-// Handle BIRD socket error event
-static void
-bmp_sock_err(sock *sk, int err);
+static void bmp_connected(struct birdsock *sk);
+static void bmp_sock_err(sock *sk, int err);
+static void bmp_close_socket(struct bmp_proto *p);
static void
bmp_send_peer_up_notif_msg(struct bmp_proto *p, const struct bgp_proto *bgp,
const byte* tx_data, const size_t tx_data_size,
const byte* rx_data, const size_t rx_data_size);
-static void
-bmp_peer_map_walk_tx_open_msg_and_send_peer_up_notif(
- const struct bmp_peer_map_key key, const byte *tx_msg,
- const size_t tx_msg_size, void *bmp_);
-
// Stores necessary any data in list
struct bmp_data_node {
node n;
@@ -250,7 +254,7 @@ bmp_init_msg_serialize(buffer *stream, const char *sys_descr, const char *sys_na
static void
bmp_schedule_tx_packet(struct bmp_proto *p, const byte *payload, const size_t size)
{
- ASSERT(p->station_connected);
+ ASSERT(p->started);
struct bmp_data_node *tx_data = mb_alloc(p->tx_mem_pool, sizeof (struct bmp_data_node));
tx_data->data = mb_alloc(p->tx_mem_pool, size);
@@ -265,23 +269,6 @@ bmp_schedule_tx_packet(struct bmp_proto *p, const byte *payload, const size_t si
}
}
-/**
- * bmp_startup - connect to the BMP collector.
- * NOTE: Send Initiation Message to the BMP collector.
- */
-static void
-bmp_startup(struct bmp_proto *p)
-{
- ASSERT(p->station_connected && !p->started);
-
- buffer payload = bmp_buffer_alloc(p->buffer_mpool, DEFAULT_MEM_BLOCK_SIZE);
- bmp_init_msg_serialize(&payload, p->sys_descr, p->sys_name);
- bmp_schedule_tx_packet(p, bmp_buffer_data(&payload), bmp_buffer_pos(&payload));
- bmp_buffer_free(&payload);
-
- p->started = true;
-}
-
static void
bmp_fire_tx(void *p_)
{
@@ -333,43 +320,13 @@ bmp_tx(struct birdsock *sk)
bmp_fire_tx(sk->data);
}
-static inline int
-bmp_open_socket(struct bmp_proto *p)
+/* We need RX hook just to accept socket close events */
+static int
+bmp_rx(struct birdsock *sk UNUSED, uint size UNUSED)
{
- sock *s = p->sk;
- s->daddr = p->station_ip;
- s->dport = p->station_port;
- s->err_hook = bmp_sock_err;
-
- int rc = sk_open(s);
-
- if (rc < 0)
- sk_log_error(s, p->p.name);
-
- return rc;
+ return 0;
}
-static void
-bmp_connection_retry(timer *t)
-{
- struct bmp_proto *p = t->data;
-
- if (bmp_open_socket(p) < 0)
- {
- log(L_DEBUG "Failed to connect to BMP station");
- return;
- }
-
- log(L_DEBUG "Connected to BMP station after connection retry");
- tm_stop(t);
-}
-
-void
-bmp_sock_err(sock *sk, int err)
-{
- struct bmp_proto *p = sk->data;
- log(L_WARN "[BMP:%s] Socket error: %M", p->p.name, err);
-}
static inline void
bmp_put_ipa(buffer *stream, const ip_addr addr)
@@ -489,36 +446,13 @@ bmp_peer_down_notif_msg_serialize(buffer *stream, const bool is_peer_global,
bmp_put_data(stream, data, data_size);
}
-/**
- * bmp_open - initialize internal resources of BMP implementation.
- * NOTE: It does not connect to BMP collector yet.
- */
-void
-bmp_open(const struct proto *P)
-{
- struct bmp_proto *p = (void *) P;
-
- if (bmp_open_socket(p) < 0)
- {
- log(L_DEBUG "Failed to connect to BMP station");
- p->connect_retry_timer = tm_new_init(P->pool, bmp_connection_retry, p,
- CONNECT_RETRY_SEC, 0 /* not randomized */);
- tm_start(p->connect_retry_timer, CONNECT_RETRY_SEC);
- p->station_connected = false;
- }
- else
- {
- log(L_DEBUG "Connected to BMP station");
- }
-}
-
-void
+static void
bmp_peer_map_walk_tx_open_msg_and_send_peer_up_notif(
const struct bmp_peer_map_key key, const byte *tx_msg,
const size_t tx_msg_size, void *bmp_)
{
struct bmp_proto *p = bmp_;
- ASSERT(p->station_connected);
+ ASSERT(p->started);
const struct bmp_peer_map_entry *map_rx_msg = bmp_peer_map_get(&p->peer_open_msg.rx_msg, key);
IF_PTR_IS_NULL_PRINT_ERR_MSG_AND_RETURN_OPT_VAL(
@@ -630,7 +564,7 @@ bmp_send_peer_up_notif_msg(struct bmp_proto *p, const struct bgp_proto *bgp,
const byte* tx_data, const size_t tx_data_size,
const byte* rx_data, const size_t rx_data_size)
{
- ASSERT(p->station_connected);
+ ASSERT(p->started);
const struct birdsock *sk = bmp_get_birdsock_ext(bgp);
IF_PTR_IS_NULL_PRINT_ERR_MSG_AND_RETURN_OPT_VAL(
@@ -661,24 +595,19 @@ bmp_put_sent_bgp_open_msg(const struct bgp_proto *bgp, const byte* pkt,
return;
}
- struct bmp_peer_map_key key = bmp_peer_map_key_create(bgp->remote_ip,
- bgp->remote_as);
- const struct bmp_peer_map_entry *map_entry
+ struct bmp_peer_map_key key
+ = bmp_peer_map_key_create(bgp->remote_ip, bgp->remote_as);
+ const struct bmp_peer_map_entry *rx_msg
= bmp_peer_map_get(&p->peer_open_msg.rx_msg, key);
- if (!map_entry || !p->started)
- {
- bmp_peer_map_insert(&p->peer_open_msg.tx_msg, key, pkt, pkt_size);
- if (!map_entry)
- {
- bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
- }
+ bmp_peer_map_insert(&p->peer_open_msg.tx_msg, key, pkt, pkt_size);
- return;
- }
+ if (!rx_msg)
+ bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
- bmp_send_peer_up_notif_msg(p, bgp, pkt, pkt_size, map_entry->data.buf,
- map_entry->data.buf_size);
+ if (rx_msg && p->started)
+ bmp_send_peer_up_notif_msg(p, bgp, pkt, pkt_size, rx_msg->data.buf,
+ rx_msg->data.buf_size);
}
void
@@ -694,22 +623,17 @@ bmp_put_recv_bgp_open_msg(const struct bgp_proto *bgp, const byte* pkt,
struct bmp_peer_map_key key
= bmp_peer_map_key_create(bgp->remote_ip, bgp->remote_as);
- const struct bmp_peer_map_entry *map_data
+ const struct bmp_peer_map_entry *tx_msg
= bmp_peer_map_get(&p->peer_open_msg.tx_msg, key);
- if (!map_data || !p->started)
- {
- bmp_peer_map_insert(&p->peer_open_msg.rx_msg, key, pkt, pkt_size);
- if (!map_data)
- {
- bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
- }
+ bmp_peer_map_insert(&p->peer_open_msg.rx_msg, key, pkt, pkt_size);
- return;
- }
+ if (!tx_msg)
+ bmp_peer_map_insert(&p->bgp_peers, key, (const byte *) &bgp, sizeof (bgp));
- bmp_send_peer_up_notif_msg(p, bgp, map_data->data.buf, map_data->data.buf_size,
- pkt, pkt_size);
+ if (tx_msg && p->started)
+ bmp_send_peer_up_notif_msg(p, bgp, tx_msg->data.buf, tx_msg->data.buf_size,
+ pkt, pkt_size);
}
void
@@ -933,7 +857,7 @@ static void
bmp_send_peer_down_notif_msg(struct bmp_proto *p, const struct bgp_proto *bgp,
const byte* data, const size_t data_size)
{
- ASSERT(p->station_connected);
+ ASSERT(p->started);
const struct bgp_caps *remote_caps = bmp_get_bgp_remote_caps_ext(bgp);
bool is_global_instance_peer = bmp_is_peer_global_instance(bgp);
@@ -1035,36 +959,136 @@ bmp_send_termination_msg(struct bmp_proto *p,
bmp_buffer_free(&stream);
}
+/**
+ * bmp_startup - enter established state
+ * @p: BMP instance
+ *
+ * The bgp_startup() function is called when the BMP session is established.
+ * It sends initiation and peer up messagages.
+ */
static void
-bmp_station_connected(struct birdsock *sk)
+bmp_startup(struct bmp_proto *p)
{
- struct bmp_proto *p = (void *) sk->data;
+ ASSERT(!p->started);
+ p->started = true;
- sk->tx_hook = bmp_tx;
- p->station_connected = true;
+ TRACE(D_EVENTS, "BMP session established");
- bmp_startup(p);
+ /* Send initiation message */
+ buffer payload = bmp_buffer_alloc(p->buffer_mpool, DEFAULT_MEM_BLOCK_SIZE);
+ bmp_init_msg_serialize(&payload, p->sys_descr, p->sys_name);
+ bmp_schedule_tx_packet(p, bmp_buffer_data(&payload), bmp_buffer_pos(&payload));
+ bmp_buffer_free(&payload);
+ /* Send Peer Up messages */
bmp_peer_map_walk(&p->peer_open_msg.tx_msg,
bmp_peer_map_walk_tx_open_msg_and_send_peer_up_notif, p);
- bmp_peer_map_flush(&p->peer_open_msg.tx_msg);
- bmp_peer_map_flush(&p->peer_open_msg.rx_msg);
+
+ proto_notify_state(&p->p, PS_UP);
}
-static inline void
-bmp_setup_socket(struct bmp_proto *p)
+/**
+ * bmp_down - leave established state
+ * @p: BMP instance
+ *
+ * The bgp_down() function is called when the BMP session fails.
+ */
+static void
+bmp_down(struct bmp_proto *p)
{
- sock *sk = sk_new(p->tx_mem_pool);
+ ASSERT(p->started);
+ p->started = false;
+
+ TRACE(D_EVENTS, "BMP session closed");
+
+ proto_notify_state(&p->p, PS_START);
+}
+
+/**
+ * bmp_connect - initiate an outgoing connection
+ * @p: BMP instance
+ *
+ * The bmp_connect() function creates the socket and initiates an outgoing TCP
+ * connection to the monitoring station. It is called to enter Connect state.
+ */
+static void
+bmp_connect(struct bmp_proto *p)
+{
+ ASSERT(!p->started);
+
+ sock *sk = sk_new(p->p.pool);
sk->type = SK_TCP_ACTIVE;
+ sk->daddr = p->station_ip;
+ sk->dport = p->station_port;
sk->ttl = IP4_MAX_TTL;
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
- sk->tx_hook = bmp_station_connected;
+ sk->tx_hook = bmp_connected;
+ sk->err_hook = bmp_sock_err;
p->sk = sk;
sk->data = p;
+
+ int rc = sk_open(sk);
+
+ if (rc < 0)
+ sk_log_error(sk, p->p.name);
+
+ tm_start(p->connect_retry_timer, CONNECT_RETRY_TIME);
}
+/* BMP connect successfull event - switch from Connect to Established state */
+static void
+bmp_connected(struct birdsock *sk)
+{
+ struct bmp_proto *p = (void *) sk->data;
+
+ sk->rx_hook = bmp_rx;
+ sk->tx_hook = bmp_tx;
+ tm_stop(p->connect_retry_timer);
+
+ bmp_startup(p);
+}
+
+/* BMP socket error event - switch from any state to Idle state */
+static void
+bmp_sock_err(sock *sk, int err)
+{
+ struct bmp_proto *p = sk->data;
+
+ if (err)
+ TRACE(D_EVENTS, "Connection lost (%M)", err);
+ else
+ TRACE(D_EVENTS, "Connection closed");
+
+ if (p->started)
+ bmp_down(p);
+
+ bmp_close_socket(p);
+ tm_start(p->connect_retry_timer, CONNECT_RETRY_TIME);
+}
+
+/* BMP connect timeout event - switch from Idle/Connect state to Connect state */
+static void
+bmp_connection_retry(timer *t)
+{
+ struct bmp_proto *p = t->data;
+
+ if (p->started)
+ return;
+
+ bmp_close_socket(p);
+ bmp_connect(p);
+}
+
+static void
+bmp_close_socket(struct bmp_proto *p)
+{
+ rfree(p->sk);
+ p->sk = NULL;
+}
+
+
/** Configuration handle section **/
static struct proto *
bmp_init(struct proto_config *CF)
@@ -1097,6 +1121,8 @@ bmp_start(struct proto *P)
p->tx_mem_pool = rp_new(P->pool, "BMP Tx");
p->update_msg_mem_pool = rp_new(P->pool, "BMP Update");
p->tx_ev = ev_new_init(p->tx_mem_pool, bmp_fire_tx, p);
+ p->connect_retry_timer = tm_new_init(p->p.pool, bmp_connection_retry, p, 0, 0);
+ p->sk = NULL;
bmp_peer_map_init(&p->peer_open_msg.tx_msg, p->map_mem_pool);
bmp_peer_map_init(&p->peer_open_msg.rx_msg, p->map_mem_pool);
@@ -1104,26 +1130,25 @@ bmp_start(struct proto *P)
init_list(&p->tx_queue);
init_list(&p->rt_table_in_pre_policy.update_msg_queue);
- p->station_connected = false;
p->started = false;
- p->connect_retry_timer = NULL;
- bmp_setup_socket(p);
- bmp_open(P);
+ tm_start(p->connect_retry_timer, CONNECT_INIT_TIME);
g_bmp = p;
- return PS_UP;
+ return PS_START;
}
static int
bmp_shutdown(struct proto *P)
{
struct bmp_proto *p = (void *) P;
- bmp_send_termination_msg(p, BMP_TERM_REASON_ADM);
- p->station_connected = false;
- p->started = false;
+ if (p->started)
+ {
+ bmp_send_termination_msg(p, BMP_TERM_REASON_ADM);
+ p->started = false;
+ }
g_bmp = NULL;
diff --git a/proto/bmp/bmp.h b/proto/bmp/bmp.h
index 22ee79c3..19623e33 100644
--- a/proto/bmp/bmp.h
+++ b/proto/bmp/bmp.h
@@ -81,7 +81,6 @@ struct bmp_proto {
list tx_queue; // Stores queued packets going to be sent
timer *connect_retry_timer; // Timer for retrying connection to the BMP collector
struct rt_table_info rt_table_in_pre_policy; // Pre-policy route import table
- bool station_connected; // Flag that stores connection status with BMP station
bool started; // Flag that stores running status of BMP instance
};