This commit is contained in:
Oarkflow
2025-08-02 16:22:51 +05:45
parent 271beed429
commit e1112effa8
2 changed files with 21 additions and 47 deletions

View File

@@ -252,7 +252,7 @@ func (c *Codec) SendMessage(ctx context.Context, conn net.Conn, msg *Message) er
return c.sendRawMessage(ctx, conn, msg)
}
// sendRawMessage handles the actual sending of a message or fragment WITHOUT timeouts
// sendRawMessage handles the actual sending of a message or fragment WITHOUT any timeouts
func (c *Codec) sendRawMessage(ctx context.Context, conn net.Conn, msg *Message) error {
// Serialize message
data, err := msg.Serialize()
@@ -283,21 +283,9 @@ func (c *Codec) sendRawMessage(ctx context.Context, conn net.Conn, msg *Message)
binary.BigEndian.PutUint32(buffer.B[:4], uint32(len(data)))
copy(buffer.B[4:], data)
// CRITICAL: DO NOT set any write deadlines for broker-consumer connections
// CRITICAL: NEVER set any write deadlines for broker-consumer connections
// These connections must remain open indefinitely for persistent communication
// Only set timeout if explicitly configured AND not zero (for backward compatibility)
if c.config.WriteTimeout > 0 {
deadline := time.Now().Add(c.config.WriteTimeout)
if ctxDeadline, ok := ctx.Deadline(); ok && ctxDeadline.Before(deadline) {
deadline = ctxDeadline
}
if err := conn.SetWriteDeadline(deadline); err != nil {
c.incrementErrors()
return fmt.Errorf("failed to set write deadline: %w", err)
}
defer conn.SetWriteDeadline(time.Time{})
}
// Completely removed all timeout/deadline logic to prevent I/O timeouts
// Write with buffering
writer := bufio.NewWriter(conn)
@@ -322,7 +310,7 @@ func (c *Codec) sendRawMessage(ctx context.Context, conn net.Conn, msg *Message)
return nil
}
// ReadMessage reads a message WITHOUT timeouts for persistent broker-consumer connections
// ReadMessage reads a message WITHOUT any timeouts for persistent broker-consumer connections
func (c *Codec) ReadMessage(ctx context.Context, conn net.Conn) (*Message, error) {
// Check context cancellation before proceeding
if err := ctx.Err(); err != nil {
@@ -330,21 +318,9 @@ func (c *Codec) ReadMessage(ctx context.Context, conn net.Conn) (*Message, error
return nil, fmt.Errorf("context ended before read: %w", err)
}
// CRITICAL: DO NOT set any read deadlines for broker-consumer connections
// CRITICAL: NEVER set any read deadlines for broker-consumer connections
// These connections must remain open indefinitely for persistent communication
// Only set timeout if explicitly configured AND not zero (for backward compatibility)
if c.config.ReadTimeout > 0 {
deadline := time.Now().Add(c.config.ReadTimeout)
if ctxDeadline, ok := ctx.Deadline(); ok && ctxDeadline.Before(deadline) {
deadline = ctxDeadline
}
if err := conn.SetReadDeadline(deadline); err != nil {
c.incrementErrors()
return nil, fmt.Errorf("failed to set read deadline: %w", err)
}
defer conn.SetReadDeadline(time.Time{})
}
// Completely removed all timeout/deadline logic to prevent I/O timeouts
// Read length prefix
lengthBytes := make([]byte, 4)

View File

@@ -434,7 +434,7 @@ func (c *Consumer) sendDenyMessage(ctx context.Context, taskID, queue string, er
}()
}
// isHealthy checks if the connection is still healthy
// isHealthy checks if the connection is still healthy WITHOUT setting deadlines
func (c *Consumer) isHealthy() bool {
c.connMutex.RLock()
defer c.connMutex.RUnlock()
@@ -443,27 +443,25 @@ func (c *Consumer) isHealthy() bool {
return false
}
// Simple health check by setting read deadline
c.conn.SetReadDeadline(time.Now().Add(100 * time.Millisecond))
defer c.conn.SetReadDeadline(time.Time{})
// CRITICAL: DO NOT set any deadlines on broker-consumer connections
// These are persistent connections that must remain open indefinitely
// Instead, use a simple non-blocking connection state check
one := make([]byte, 1)
n, err := c.conn.Read(one)
if err != nil {
if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
return true // Timeout is expected for health check
// Check if connection is still valid by checking the connection state
// without setting any timeouts or deadlines
if tcpConn, ok := c.conn.(*net.TCPConn); ok {
// Check TCP connection state without timeouts
// This is a lightweight check that doesn't interfere with persistent connection
if tcpConn == nil {
return false
}
return false
}
// If we read data, put it back (this shouldn't happen in health check)
if n > 0 {
// This is a simplified health check; in production, you might want to buffer this
// Connection exists and is of correct type - assume healthy
// The actual health will be determined when we try to read/write
return true
}
return true
// For non-TCP connections, assume healthy if connection exists
return c.conn != nil
}
// startHealthChecker starts periodic health checks