Files
mq/handlers/grouping_handler.go
2025-08-06 08:25:43 +05:45

339 lines
8.1 KiB
Go

package handlers
import (
"context"
"fmt"
"reflect"
"sort"
"strconv"
"strings"
"github.com/oarkflow/json"
"github.com/oarkflow/mq"
"github.com/oarkflow/mq/dag"
)
// GroupingHandler groups data by specified fields and applies aggregations
type GroupingHandler struct {
dag.Operation
GroupByFields []string `json:"group_by_fields"` // fields to group by
Aggregations []AggregationConfig `json:"aggregations"` // aggregation configurations
SourceField string `json:"source_field"` // field containing array to group
TargetField string `json:"target_field"` // field to store grouped result
Options GroupingOptions `json:"options"` // additional options
}
type AggregationConfig struct {
Field string `json:"field"` // field to aggregate
Operation string `json:"operation"` // sum, count, avg, min, max, concat, first, last
Alias string `json:"alias"` // optional alias for result field
}
type GroupingOptions struct {
SortBy string `json:"sort_by"` // field to sort groups by
SortDirection string `json:"sort_direction"` // asc or desc
IncludeCount bool `json:"include_count"` // include count of items in each group
CountAlias string `json:"count_alias"` // alias for count field (default: "count")
}
func (g *GroupingHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
var data map[string]any
if err := json.Unmarshal(task.Payload, &data); err != nil {
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
}
// Get source data
sourceData, exists := data[g.SourceField]
if !exists {
return mq.Result{Error: fmt.Errorf("source field '%s' not found", g.SourceField), Ctx: ctx}
}
// Convert to slice of maps
items, err := g.convertToSliceOfMaps(sourceData)
if err != nil {
return mq.Result{Error: err, Ctx: ctx}
}
// Group the data
groups := g.groupData(items)
// Apply aggregations
result := g.applyAggregations(groups)
// Sort if requested
if g.Options.SortBy != "" {
result = g.sortGroups(result)
}
// Set target field
targetField := g.TargetField
if targetField == "" {
targetField = "grouped_data"
}
data[targetField] = result
bt, _ := json.Marshal(data)
return mq.Result{Payload: bt, Ctx: ctx}
}
func (g *GroupingHandler) convertToSliceOfMaps(data any) ([]map[string]any, error) {
rv := reflect.ValueOf(data)
if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array {
return nil, fmt.Errorf("source data must be an array or slice")
}
var items []map[string]any
for i := 0; i < rv.Len(); i++ {
item := rv.Index(i).Interface()
// Convert item to map[string]any
itemMap := make(map[string]any)
itemBytes, err := json.Marshal(item)
if err != nil {
return nil, fmt.Errorf("failed to marshal item at index %d: %v", i, err)
}
if err := json.Unmarshal(itemBytes, &itemMap); err != nil {
return nil, fmt.Errorf("failed to unmarshal item at index %d: %v", i, err)
}
items = append(items, itemMap)
}
return items, nil
}
func (g *GroupingHandler) groupData(items []map[string]any) map[string][]map[string]any {
groups := make(map[string][]map[string]any)
for _, item := range items {
// Create group key
var keyParts []string
for _, field := range g.GroupByFields {
value := fmt.Sprintf("%v", item[field])
keyParts = append(keyParts, value)
}
groupKey := strings.Join(keyParts, "|")
// Add item to group
groups[groupKey] = append(groups[groupKey], item)
}
return groups
}
func (g *GroupingHandler) applyAggregations(groups map[string][]map[string]any) []map[string]any {
var result []map[string]any
for groupKey, items := range groups {
groupResult := make(map[string]any)
// Add group key fields
keyParts := strings.Split(groupKey, "|")
for i, field := range g.GroupByFields {
if i < len(keyParts) {
groupResult[field] = keyParts[i]
}
}
// Add count if requested
if g.Options.IncludeCount {
countAlias := g.Options.CountAlias
if countAlias == "" {
countAlias = "count"
}
groupResult[countAlias] = len(items)
}
// Apply aggregations
for _, agg := range g.Aggregations {
fieldAlias := agg.Alias
if fieldAlias == "" {
fieldAlias = agg.Field + "_" + agg.Operation
}
aggregatedValue := g.performAggregation(items, agg)
groupResult[fieldAlias] = aggregatedValue
}
result = append(result, groupResult)
}
return result
}
func (g *GroupingHandler) performAggregation(items []map[string]any, agg AggregationConfig) any {
switch agg.Operation {
case "count":
return len(items)
case "sum":
return g.sumValues(items, agg.Field)
case "avg":
sum := g.sumValues(items, agg.Field)
if count := len(items); count > 0 {
return sum / float64(count)
}
return 0
case "min":
return g.minValue(items, agg.Field)
case "max":
return g.maxValue(items, agg.Field)
case "first":
if len(items) > 0 {
return items[0][agg.Field]
}
return nil
case "last":
if len(items) > 0 {
return items[len(items)-1][agg.Field]
}
return nil
case "concat":
return g.concatValues(items, agg.Field)
case "unique":
return g.uniqueValues(items, agg.Field)
default:
return nil
}
}
func (g *GroupingHandler) sumValues(items []map[string]any, field string) float64 {
var sum float64
for _, item := range items {
if value, exists := item[field]; exists {
if num := g.toFloat64(value); num != 0 {
sum += num
}
}
}
return sum
}
func (g *GroupingHandler) minValue(items []map[string]any, field string) any {
var min any
for _, item := range items {
if value, exists := item[field]; exists {
if min == nil {
min = value
} else {
if g.compareValues(value, min) < 0 {
min = value
}
}
}
}
return min
}
func (g *GroupingHandler) maxValue(items []map[string]any, field string) any {
var max any
for _, item := range items {
if value, exists := item[field]; exists {
if max == nil {
max = value
} else {
if g.compareValues(value, max) > 0 {
max = value
}
}
}
}
return max
}
func (g *GroupingHandler) concatValues(items []map[string]any, field string) string {
var values []string
for _, item := range items {
if value, exists := item[field]; exists {
values = append(values, fmt.Sprintf("%v", value))
}
}
return strings.Join(values, ", ")
}
func (g *GroupingHandler) uniqueValues(items []map[string]any, field string) []any {
seen := make(map[string]bool)
var unique []any
for _, item := range items {
if value, exists := item[field]; exists {
key := fmt.Sprintf("%v", value)
if !seen[key] {
seen[key] = true
unique = append(unique, value)
}
}
}
return unique
}
func (g *GroupingHandler) toFloat64(value any) float64 {
switch v := value.(type) {
case int:
return float64(v)
case int32:
return float64(v)
case int64:
return float64(v)
case float32:
return float64(v)
case float64:
return v
case string:
if num, err := strconv.ParseFloat(v, 64); err == nil {
return num
}
}
return 0
}
func (g *GroupingHandler) compareValues(a, b any) int {
aFloat := g.toFloat64(a)
bFloat := g.toFloat64(b)
if aFloat < bFloat {
return -1
} else if aFloat > bFloat {
return 1
}
// If numeric comparison doesn't work, compare as strings
aStr := fmt.Sprintf("%v", a)
bStr := fmt.Sprintf("%v", b)
return strings.Compare(aStr, bStr)
}
func (g *GroupingHandler) sortGroups(groups []map[string]any) []map[string]any {
sort.Slice(groups, func(i, j int) bool {
valueI := groups[i][g.Options.SortBy]
valueJ := groups[j][g.Options.SortBy]
comparison := g.compareValues(valueI, valueJ)
if g.Options.SortDirection == "desc" {
return comparison > 0
}
return comparison < 0
})
return groups
}
// Factory function
func NewGroupingHandler(id, sourceField, targetField string, groupByFields []string, aggregations []AggregationConfig, options GroupingOptions) *GroupingHandler {
return &GroupingHandler{
Operation: dag.Operation{
ID: id,
Key: "group_data",
Type: dag.Function,
Tags: []string{"data", "grouping", "aggregation"},
},
GroupByFields: groupByFields,
Aggregations: aggregations,
SourceField: sourceField,
TargetField: targetField,
Options: options,
}
}