Browse Source

bluetooth: fix bug when destroying tx queue buffers on disconnect

Channel tx_queue purging on disconnect was inconsistently handled
by the different channels: iso, l2cap, l2cap_br.

iso channels handled purging in the tx_data_pull hook.

l2cap and l2cap_br did the purging in channel delete functions
and did not expect tx_data_pull to be called for a disconnected
channel. Their data_pull functions could return a ptr to a
net_buf that was still on the tx_queue, which is problematic
when the conn tx_processor unrefs the returned buffer resulting
in multiple calls to the buf destroy function.

To make things consistent and correct, remove the code that tries
to purge tx_queues in the tx_processor and only do purging in
the channels themselves when they are deleted/disconnected.

Also refactor and clarify referencing of the net_buf returned
by tx_data_pull. It was confusing who had a reference and
when, which could vary depending on the length of the original
buffer. There are three cases: the buffer length is less
than the tx.mps, greater the mps but less than the mtu so
requiring segementation but not fragmentation, or greater than
both mps and mtu so requiring both segmentation and fragmentation.
The conn layer would increase the refcnt if the length was
greater than the mtu, but not have any awareness of whether
the net_buf was still on the tx_queue or not.

Now it is the tx_data_pull callbacks responsibitity to increment
the reference count if it is returning a pointer to a net_buf
that it is still keeping on the tx_queue for segmentation purposes.
The conn layer will now always transfer that reference into a
fragment view and not conditional it on the length relative to
the mtu, and always decrement the reference to the parent when
the fragment is destroyed.

So there is no risk of decrementing a reference to a net buf
that might still be on a tx_queue, which simplifies error
handling in particular.

Also add error handling paths for when asserts are not enabled.

Signed-off-by: Mike J. Chen <mjchen@google.com>
pull/85254/merge
Mike J. Chen 4 months ago committed by Daniel DeGrasse
parent
commit
a392c33c60
  1. 16
      subsys/bluetooth/host/classic/l2cap_br.c
  2. 111
      subsys/bluetooth/host/conn.c
  3. 14
      subsys/bluetooth/host/conn_internal.h
  4. 31
      subsys/bluetooth/host/iso.c
  5. 30
      subsys/bluetooth/host/l2cap.c

16
subsys/bluetooth/host/classic/l2cap_br.c

@ -1592,6 +1592,8 @@ struct net_buf *l2cap_br_data_pull(struct bt_conn *conn, size_t amount, size_t * @@ -1592,6 +1592,8 @@ struct net_buf *l2cap_br_data_pull(struct bt_conn *conn, size_t amount, size_t *
return NULL;
}
__ASSERT_NO_MSG(conn->state == BT_CONN_CONNECTED);
struct bt_l2cap_br_chan *br_chan;
br_chan = CONTAINER_OF(pdu_ready, struct bt_l2cap_br_chan, _pdu_ready);
@ -1609,13 +1611,15 @@ struct net_buf *l2cap_br_data_pull(struct bt_conn *conn, size_t amount, size_t * @@ -1609,13 +1611,15 @@ struct net_buf *l2cap_br_data_pull(struct bt_conn *conn, size_t amount, size_t *
__ASSERT(tx_pdu, "signaled ready but no PDUs in the TX queue");
struct net_buf *pdu = CONTAINER_OF(tx_pdu, struct net_buf, node);
struct net_buf *q_pdu = CONTAINER_OF(tx_pdu, struct net_buf, node);
if (bt_buf_has_view(pdu)) {
LOG_ERR("already have view on %p", pdu);
if (bt_buf_has_view(q_pdu)) {
LOG_ERR("already have view on %p", q_pdu);
return NULL;
}
struct net_buf *pdu = net_buf_ref(q_pdu);
/* We can't interleave ACL fragments from different channels for the
* same ACL conn -> we have to wait until a full L2 PDU is transferred
* before switching channels.
@ -1623,13 +1627,15 @@ struct net_buf *l2cap_br_data_pull(struct bt_conn *conn, size_t amount, size_t * @@ -1623,13 +1627,15 @@ struct net_buf *l2cap_br_data_pull(struct bt_conn *conn, size_t amount, size_t *
bool last_frag = amount >= pdu->len;
if (last_frag) {
LOG_DBG("last frag, removing %p", pdu);
LOG_DBG("last frag, removing %p", q_pdu);
__maybe_unused bool found;
found = sys_slist_find_and_remove(&br_chan->_pdu_tx_queue, &pdu->node);
found = sys_slist_find_and_remove(&br_chan->_pdu_tx_queue, &q_pdu->node);
__ASSERT_NO_MSG(found);
net_buf_unref(q_pdu);
LOG_DBG("chan %p done", br_chan);
lower_data_ready(br_chan);

111
subsys/bluetooth/host/conn.c

@ -649,7 +649,7 @@ static bool is_acl_conn(struct bt_conn *conn) @@ -649,7 +649,7 @@ static bool is_acl_conn(struct bt_conn *conn)
}
static int send_buf(struct bt_conn *conn, struct net_buf *buf,
size_t len, void *cb, void *ud)
size_t len, bt_conn_tx_cb_t cb, void *ud)
{
struct net_buf *frag = NULL;
struct bt_conn_tx *tx = NULL;
@ -659,13 +659,15 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf, @@ -659,13 +659,15 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf,
if (buf->len == 0) {
__ASSERT_NO_MSG(0);
return -EMSGSIZE;
err = -EMSGSIZE;
goto error_return;
}
if (bt_buf_has_view(buf)) {
__ASSERT_NO_MSG(0);
return -EIO;
err = -EIO;
goto error_return;
}
LOG_DBG("conn %p buf %p len %zu buf->len %u cb %p ud %p",
@ -680,7 +682,8 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf, @@ -680,7 +682,8 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf,
*/
__ASSERT(0, "No controller bufs");
return -ENOMEM;
err = -ENOMEM;
goto error_return;
}
/* Allocate and set the TX context */
@ -689,8 +692,9 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf, @@ -689,8 +692,9 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf,
/* See big comment above */
if (!tx) {
__ASSERT(0, "No TX context");
return -ENOMEM;
k_sem_give(bt_conn_get_pkts(conn));
err = -ENOMEM;
goto error_return;
}
tx->cb = cb;
@ -698,18 +702,17 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf, @@ -698,18 +702,17 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf,
uint16_t frag_len = MIN(conn_mtu(conn), len);
__ASSERT_NO_MSG(buf->ref == 1);
/* Check that buf->ref is 1 or 2. It would be 1 if this
* was the only reference (e.g. buf was removed
* from the conn tx_queue). It would be 2 if the
* tx_data_pull kept it on the tx_queue for segmentation.
*/
__ASSERT_NO_MSG((buf->ref == 1) || (buf->ref == 2));
if (buf->len > frag_len) {
LOG_DBG("keep %p around", buf);
frag = get_data_frag(net_buf_ref(buf), frag_len);
} else {
LOG_DBG("move %p ref in", buf);
/* Move the ref into `frag` for the last TX. That way `buf` will
* get destroyed when `frag` is destroyed.
*/
frag = get_data_frag(buf, frag_len);
}
/* The reference is always transferred to the frag, so when
* the frag is destroyed, the parent reference is decremented.
*/
frag = get_data_frag(buf, frag_len);
/* Caller is supposed to check we have all resources to send */
__ASSERT_NO_MSG(frag != NULL);
@ -723,7 +726,7 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf, @@ -723,7 +726,7 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf,
conn->next_is_frag = false;
}
LOG_DBG("send frag: buf %p len %d", buf, frag_len);
LOG_DBG("send frag: buf %p len %d", frag, frag_len);
/* At this point, the buffer is either a fragment or a full HCI packet.
* The flags are also valid.
@ -766,15 +769,26 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf, @@ -766,15 +769,26 @@ static int send_buf(struct bt_conn *conn, struct net_buf *buf,
*/
net_buf_unref(frag);
/* `buf` might not get destroyed right away, and its `tx`
* pointer will still be reachable. Make sure that we don't try
* to use the destroyed context later.
/* `buf` might not get destroyed right away because it may
* still be on a conn tx_queue, and its `tx` pointer will still
* be reachable. Make sure that we don't try to use the
* destroyed context later.
*/
conn_tx_destroy(conn, tx);
k_sem_give(bt_conn_get_pkts(conn));
/* Merge HCI driver errors */
return -EIO;
error_return:
/* Runtime handling of fatal errors when ASSERTS are disabled.
* Unref the buf and invoke callback with the error.
*/
net_buf_unref(buf);
if (cb) {
cb(conn, ud, err);
}
return err;
}
static struct k_poll_signal conn_change =
@ -956,8 +970,8 @@ struct bt_conn *get_conn_ready(void) @@ -956,8 +970,8 @@ struct bt_conn *get_conn_ready(void)
sys_slist_remove(&bt_dev.le.conn_ready, prev, &conn->_conn_ready);
(void)atomic_set(&conn->_conn_ready_lock, 0);
/* Append connection to list if it still has data */
if (conn->has_data(conn)) {
/* Append connection to list if it is connected and still has data */
if (conn->has_data(conn) && (conn->state == BT_CONN_CONNECTED)) {
LOG_DBG("appending %p to back of TX queue", conn);
bt_conn_data_ready(conn);
}
@ -985,30 +999,6 @@ static void acl_get_and_clear_cb(struct bt_conn *conn, struct net_buf *buf, @@ -985,30 +999,6 @@ static void acl_get_and_clear_cb(struct bt_conn *conn, struct net_buf *buf,
}
#endif /* defined(CONFIG_BT_CONN) */
/* Acts as a "null-routed" bt_send(). This fn will decrease the refcount of
* `buf` and call the user callback with an error code.
*/
static void destroy_and_callback(struct bt_conn *conn,
struct net_buf *buf,
bt_conn_tx_cb_t cb,
void *ud)
{
if (!cb) {
conn->get_and_clear_cb(conn, buf, &cb, &ud);
}
LOG_DBG("pop: cb %p userdata %p", cb, ud);
/* bt_send() would've done an unref. Do it here also, so the buffer is
* hopefully destroyed and the user callback can allocate a new one.
*/
net_buf_unref(buf);
if (cb) {
cb(conn, ud, -ESHUTDOWN);
}
}
static volatile bool _suspend_tx;
#if defined(CONFIG_BT_TESTING)
@ -1051,17 +1041,7 @@ void bt_conn_tx_processor(void) @@ -1051,17 +1041,7 @@ void bt_conn_tx_processor(void)
if (conn->state != BT_CONN_CONNECTED) {
LOG_WRN("conn %p: not connected", conn);
/* Call the user callbacks & destroy (final-unref) the buffers
* we were supposed to send.
*/
buf = conn->tx_data_pull(conn, SIZE_MAX, &buf_len);
while (buf) {
destroy_and_callback(conn, buf, cb, ud);
buf = conn->tx_data_pull(conn, SIZE_MAX, &buf_len);
}
goto exit;
goto raise_and_exit;
}
/* now that we are guaranteed resources, we can pull data from the upper
@ -1095,25 +1075,12 @@ void bt_conn_tx_processor(void) @@ -1095,25 +1075,12 @@ void bt_conn_tx_processor(void)
int err = send_buf(conn, buf, buf_len, cb, ud);
if (err) {
/* -EIO means `unrecoverable error`. It can be an assertion that
* failed or an error from the HCI driver.
*
* -ENOMEM means we thought we had all the resources to send the
* buf (ie. TX context + controller buffer) but one of them was
* not available. This is likely due to a failure of
* assumption, likely that we have been pre-empted somehow and
* that `tx_processor()` has been re-entered.
*
* In both cases, we destroy the buffer and mark the connection
* as dead.
*/
LOG_ERR("Fatal error (%d). Disconnecting %p", err, conn);
destroy_and_callback(conn, buf, cb, ud);
bt_conn_disconnect(conn, BT_HCI_ERR_REMOTE_USER_TERM_CONN);
goto exit;
}
raise_and_exit:
/* Always kick the TX work. It will self-suspend if it doesn't get
* resources or there is nothing left to send.
*/

14
subsys/bluetooth/host/conn_internal.h

@ -288,10 +288,22 @@ struct bt_conn { @@ -288,10 +288,22 @@ struct bt_conn {
#endif
/* Callback into the higher-layers (L2CAP / ISO) to return a buffer for
* sending `amount` of bytes to HCI.
* sending `amount` of bytes to HCI. Will only be called when
* the state is connected. The higher-layer is responsible for purging
* the remaining buffers on disconnect.
*
* Scheduling from which channel to pull (e.g. for L2CAP) is done at the
* upper layer's discretion.
*
* Details about the returned net_buf when it is not NULL:
* - If the net_buf->len <= *length, then the net_buf has been removed
* from the tx_queue of the connection and the caller is now the
* owner of the only reference to the net_buf.
* - Otherwise, the net_buf is still on the tx_queue of the connection,
* and the callback has incremented the reference count to account
* for it having a reference still.
* - The caller must consume *length bytes from the net_buf before
* calling this function again.
*/
struct net_buf * (*tx_data_pull)(struct bt_conn *conn,
size_t amount,

31
subsys/bluetooth/host/iso.c

@ -454,10 +454,18 @@ void bt_iso_connected(struct bt_conn *iso) @@ -454,10 +454,18 @@ void bt_iso_connected(struct bt_conn *iso)
static void bt_iso_chan_disconnected(struct bt_iso_chan *chan, uint8_t reason)
{
const uint8_t conn_type = chan->iso->iso.info.type;
struct net_buf *buf;
LOG_DBG("%p, reason 0x%02x", chan, reason);
__ASSERT(chan->iso != NULL, "NULL conn for iso chan %p", chan);
/* release buffers from tx_queue */
while ((buf = k_fifo_get(&chan->iso->iso.txq, K_NO_WAIT))) {
__ASSERT_NO_MSG(!bt_buf_has_view(buf));
net_buf_unref(buf);
}
bt_iso_chan_set_state(chan, BT_ISO_STATE_DISCONNECTED);
bt_conn_set_state(chan->iso, BT_CONN_DISCONNECT_COMPLETE);
@ -775,7 +783,8 @@ void bt_iso_recv(struct bt_conn *iso, struct net_buf *buf, uint8_t flags) @@ -775,7 +783,8 @@ void bt_iso_recv(struct bt_conn *iso, struct net_buf *buf, uint8_t flags)
static bool iso_has_data(struct bt_conn *conn)
{
#if defined(CONFIG_BT_ISO_TX)
return !k_fifo_is_empty(&conn->iso.txq);
return ((conn->iso.chan->state == BT_ISO_STATE_CONNECTED) &&
!k_fifo_is_empty(&conn->iso.txq));
#else /* !CONFIG_BT_ISO_TX */
return false;
#endif /* CONFIG_BT_ISO_TX */
@ -789,9 +798,9 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t @@ -789,9 +798,9 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t
/* Leave the PDU buffer in the queue until we have sent all its
* fragments.
*/
struct net_buf *frag = k_fifo_peek_head(&conn->iso.txq);
struct net_buf *q_frag = k_fifo_peek_head(&conn->iso.txq);
if (!frag) {
if (!q_frag) {
BT_ISO_DATA_DBG("signaled ready but no frag available");
/* Service other connections */
bt_tx_irq_raise();
@ -799,13 +808,10 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t @@ -799,13 +808,10 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t
return NULL;
}
if (conn->iso.chan->state != BT_ISO_STATE_CONNECTED) {
__maybe_unused struct net_buf *b = k_fifo_get(&conn->iso.txq, K_NO_WAIT);
__ASSERT_NO_MSG(conn->state == BT_CONN_CONNECTED);
if (conn->iso.chan->state != BT_ISO_STATE_CONNECTED) {
LOG_DBG("channel has been disconnected");
__ASSERT_NO_MSG(b == frag);
net_buf_unref(b);
/* Service other connections */
bt_tx_irq_raise();
@ -813,7 +819,7 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t @@ -813,7 +819,7 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t
return NULL;
}
if (bt_buf_has_view(frag)) {
if (bt_buf_has_view(q_frag)) {
/* This should not happen. conn.c should wait until the view is
* destroyed before requesting more data.
*/
@ -821,13 +827,16 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t @@ -821,13 +827,16 @@ static struct net_buf *iso_data_pull(struct bt_conn *conn, size_t amount, size_t
return NULL;
}
struct net_buf *frag = net_buf_ref(q_frag);
bool last_frag = amount >= frag->len;
if (last_frag) {
__maybe_unused struct net_buf *b = k_fifo_get(&conn->iso.txq, K_NO_WAIT);
q_frag = k_fifo_get(&conn->iso.txq, K_NO_WAIT);
BT_ISO_DATA_DBG("last frag, pop buf");
__ASSERT_NO_MSG(b == frag);
__ASSERT_NO_MSG(q_frag == frag);
net_buf_unref(q_frag);
}
*length = frag->len;

30
subsys/bluetooth/host/l2cap.c

@ -83,11 +83,6 @@ NET_BUF_POOL_FIXED_DEFINE(disc_pool, 1, @@ -83,11 +83,6 @@ NET_BUF_POOL_FIXED_DEFINE(disc_pool, 1,
#define l2cap_remove_ident(conn, ident) __l2cap_lookup_ident(conn, ident, true)
static sys_slist_t servers = SYS_SLIST_STATIC_INIT(&servers);
static void l2cap_tx_buf_destroy(struct bt_conn *conn, struct net_buf *buf, int err)
{
net_buf_unref(buf);
}
#endif /* CONFIG_BT_L2CAP_DYNAMIC_CHANNEL */
/* L2CAP signalling channel specific context */
@ -257,6 +252,7 @@ void bt_l2cap_chan_del(struct bt_l2cap_chan *chan) @@ -257,6 +252,7 @@ void bt_l2cap_chan_del(struct bt_l2cap_chan *chan)
{
const struct bt_l2cap_chan_ops *ops = chan->ops;
struct bt_l2cap_le_chan *le_chan = BT_L2CAP_LE_CHAN(chan);
struct net_buf *buf;
LOG_DBG("conn %p chan %p", chan->conn, chan);
@ -269,9 +265,7 @@ void bt_l2cap_chan_del(struct bt_l2cap_chan *chan) @@ -269,9 +265,7 @@ void bt_l2cap_chan_del(struct bt_l2cap_chan *chan)
/* Remove buffers on the PDU TX queue. We can't do that in
* `l2cap_chan_destroy()` as it is not called for fixed channels.
*/
while (chan_has_data(le_chan)) {
struct net_buf *buf = k_fifo_get(&le_chan->tx_queue, K_NO_WAIT);
while ((buf = k_fifo_get(&le_chan->tx_queue, K_NO_WAIT))) {
net_buf_unref(buf);
}
@ -919,20 +913,22 @@ struct net_buf *l2cap_data_pull(struct bt_conn *conn, @@ -919,20 +913,22 @@ struct net_buf *l2cap_data_pull(struct bt_conn *conn,
* For SDUs we do the same, we keep it in the queue until all the
* segments have been sent, adding the PDU headers just-in-time.
*/
struct net_buf *pdu = k_fifo_peek_head(&lechan->tx_queue);
struct net_buf *fifo_pdu = k_fifo_peek_head(&lechan->tx_queue);
/* We don't have anything to send for the current channel. We could
* however have something to send on another channel that is attached to
* the same ACL connection. Re-trigger the TX processor: it will call us
* again and this time we will select another channel to pull data from.
*/
if (!pdu) {
if (!fifo_pdu) {
bt_tx_irq_raise();
return NULL;
}
if (bt_buf_has_view(pdu)) {
LOG_ERR("already have view on %p", pdu);
__ASSERT_NO_MSG(conn->state == BT_CONN_CONNECTED);
if (bt_buf_has_view(fifo_pdu)) {
LOG_ERR("already have view on %p", fifo_pdu);
return NULL;
}
@ -946,6 +942,8 @@ struct net_buf *l2cap_data_pull(struct bt_conn *conn, @@ -946,6 +942,8 @@ struct net_buf *l2cap_data_pull(struct bt_conn *conn,
return NULL;
}
struct net_buf *pdu = net_buf_ref(fifo_pdu);
/* Add PDU header */
if (lechan->_pdu_remaining == 0) {
struct bt_l2cap_hdr *hdr;
@ -975,9 +973,11 @@ struct net_buf *l2cap_data_pull(struct bt_conn *conn, @@ -975,9 +973,11 @@ struct net_buf *l2cap_data_pull(struct bt_conn *conn,
if (last_frag && last_seg) {
LOG_DBG("last frag of last seg, dequeuing %p", pdu);
__maybe_unused struct net_buf *b = k_fifo_get(&lechan->tx_queue, K_NO_WAIT);
fifo_pdu = k_fifo_get(&lechan->tx_queue, K_NO_WAIT);
__ASSERT_NO_MSG(b == pdu);
__ASSERT_NO_MSG(fifo_pdu == pdu);
net_buf_unref(fifo_pdu);
}
if (last_frag && L2CAP_LE_CID_IS_DYN(lechan->tx.cid)) {
@ -2290,7 +2290,7 @@ static void l2cap_chan_shutdown(struct bt_l2cap_chan *chan) @@ -2290,7 +2290,7 @@ static void l2cap_chan_shutdown(struct bt_l2cap_chan *chan)
/* Remove buffers on the TX queue */
while ((buf = k_fifo_get(&le_chan->tx_queue, K_NO_WAIT))) {
l2cap_tx_buf_destroy(chan->conn, buf, -ESHUTDOWN);
net_buf_unref(buf);
}
/* Remove buffers on the RX queue */

Loading…
Cancel
Save