From 29d09d2d11b42bb723cf6ca270305b90a6fb47e8 Mon Sep 17 00:00:00 2001 From: sujit Date: Wed, 6 Aug 2025 09:14:55 +0545 Subject: [PATCH] update --- examples/data_transform_demo.go | 763 +++++++++++++++++++++++ go.mod | 8 - go.sum | 20 - handlers/data_handler.go | 737 +++++++++++++++++++++++ handlers/data_transformation_handler.go | 765 ------------------------ handlers/data_utils_handler.go | 494 --------------- handlers/examples.go | 208 +++++++ handlers/field_handler.go | 344 +++++++++++ handlers/field_manipulation_handler.go | 501 ---------------- handlers/flatten_handler.go | 506 ++++++---------- handlers/format_handler.go | 492 +++++++-------- handlers/group_handler.go | 280 +++++++++ handlers/grouping_handler.go | 338 ----------- handlers/json_handler.go | 625 ++++++++++--------- handlers/split_join_handler.go | 365 +++++------ metrics/metrics.go | 43 -- 16 files changed, 3204 insertions(+), 3285 deletions(-) create mode 100644 examples/data_transform_demo.go create mode 100644 handlers/data_handler.go delete mode 100644 handlers/data_transformation_handler.go delete mode 100644 handlers/data_utils_handler.go create mode 100644 handlers/examples.go create mode 100644 handlers/field_handler.go delete mode 100644 handlers/field_manipulation_handler.go create mode 100644 handlers/group_handler.go delete mode 100644 handlers/grouping_handler.go delete mode 100644 metrics/metrics.go diff --git a/examples/data_transform_demo.go b/examples/data_transform_demo.go new file mode 100644 index 0000000..48007c9 --- /dev/null +++ b/examples/data_transform_demo.go @@ -0,0 +1,763 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "log" + + "github.com/oarkflow/mq" + "github.com/oarkflow/mq/dag" + "github.com/oarkflow/mq/handlers" +) + +func main() { + fmt.Println("=== Data Transformation Handlers Examples ===") + + // Test each handler with sample data + testFormatHandler() + testGroupHandler() + testSplitJoinHandler() + testFlattenHandler() + testJSONHandler() + testFieldHandler() + testDataHandler() + + // Example of chaining handlers + exampleDAGChaining() +} + +func testFormatHandler() { + fmt.Println("\n1. FORMAT HANDLER TESTS") + fmt.Println("========================") + + // Test uppercase formatting + testData := map[string]any{ + "name": "john doe", + "title": "software engineer", + "age": 30, + } + + handler := handlers.NewFormatHandler("format-test") + config := dag.Payload{ + Data: map[string]any{ + "format_type": "uppercase", + "fields": []string{"name", "title"}, + }, + } + handler.SetConfig(config) + + result := runHandler(handler, testData, "Uppercase Format") + printResult("Uppercase formatting", result) + + // Test currency formatting + currencyData := map[string]any{ + "price": 99.99, + "tax": "15.50", + "total": 115.49, + } + + currencyHandler := handlers.NewFormatHandler("currency-test") + currencyConfig := dag.Payload{ + Data: map[string]any{ + "format_type": "currency", + "fields": []string{"price", "tax", "total"}, + "currency": "$", + }, + } + currencyHandler.SetConfig(currencyConfig) + + result = runHandler(currencyHandler, currencyData, "Currency Format") + printResult("Currency formatting", result) + + // Test date formatting + dateData := map[string]any{ + "created_at": "2023-06-15T10:30:00Z", + "updated_at": "2023-06-20", + } + + dateHandler := handlers.NewFormatHandler("date-test") + dateConfig := dag.Payload{ + Data: map[string]any{ + "format_type": "date", + "fields": []string{"created_at", "updated_at"}, + "date_format": "2006-01-02", + }, + } + dateHandler.SetConfig(dateConfig) + + result = runHandler(dateHandler, dateData, "Date Format") + printResult("Date formatting", result) +} + +func testGroupHandler() { + fmt.Println("\n2. GROUP HANDLER TESTS") + fmt.Println("======================") + + // Test data grouping with aggregation + testData := map[string]any{ + "data": []interface{}{ + map[string]any{"department": "Engineering", "salary": 80000, "age": 30, "name": "John"}, + map[string]any{"department": "Engineering", "salary": 90000, "age": 25, "name": "Jane"}, + map[string]any{"department": "Marketing", "salary": 60000, "age": 35, "name": "Bob"}, + map[string]any{"department": "Marketing", "salary": 65000, "age": 28, "name": "Alice"}, + map[string]any{"department": "Engineering", "salary": 95000, "age": 32, "name": "Mike"}, + }, + } + + handler := handlers.NewGroupHandler("group-test") + config := dag.Payload{ + Data: map[string]any{ + "group_by": []string{"department"}, + "aggregations": map[string]any{ + "salary": "sum", + "age": "avg", + "name": "concat", + }, + "concat_separator": ", ", + }, + } + handler.SetConfig(config) + + result := runHandler(handler, testData, "Group by Department") + printResult("Data grouping", result) +} + +func testSplitJoinHandler() { + fmt.Println("\n3. SPLIT/JOIN HANDLER TESTS") + fmt.Println("============================") + + // Test split operation + testData := map[string]any{ + "full_name": "John Michael Doe", + "tags": "go,programming,backend,api", + "skills": "golang python javascript", + } + + splitHandler := handlers.NewSplitJoinHandler("split-test") + splitConfig := dag.Payload{ + Data: map[string]any{ + "operation": "split", + "fields": []string{"full_name", "skills"}, + "separator": " ", + }, + } + splitHandler.SetConfig(splitConfig) + + result := runHandler(splitHandler, testData, "Split Operation (space)") + printResult("String splitting with space", result) + + // Test split with comma + splitHandler2 := handlers.NewSplitJoinHandler("split-test-2") + splitConfig2 := dag.Payload{ + Data: map[string]any{ + "operation": "split", + "fields": []string{"tags"}, + "separator": ",", + }, + } + splitHandler2.SetConfig(splitConfig2) + + result = runHandler(splitHandler2, testData, "Split Operation (comma)") + printResult("String splitting with comma", result) + + // Test join operation + joinData := map[string]any{ + "first_name": "John", + "middle_name": "Michael", + "last_name": "Doe", + "title": "Mr.", + } + + joinHandler := handlers.NewSplitJoinHandler("join-test") + joinConfig := dag.Payload{ + Data: map[string]any{ + "operation": "join", + "source_fields": []string{"title", "first_name", "middle_name", "last_name"}, + "target_field": "full_name_with_title", + "separator": " ", + }, + } + joinHandler.SetConfig(joinConfig) + + result = runHandler(joinHandler, joinData, "Join Operation") + printResult("String joining", result) +} + +func testFlattenHandler() { + fmt.Println("\n4. FLATTEN HANDLER TESTS") + fmt.Println("=========================") + + // Test flatten settings + testData := map[string]any{ + "user_id": 123, + "settings": []interface{}{ + map[string]any{"key": "theme", "value": "dark", "value_type": "string"}, + map[string]any{"key": "notifications", "value": "true", "value_type": "boolean"}, + map[string]any{"key": "max_items", "value": "50", "value_type": "integer"}, + map[string]any{"key": "timeout", "value": "30.5", "value_type": "float"}, + }, + } + + handler := handlers.NewFlattenHandler("flatten-test") + config := dag.Payload{ + Data: map[string]any{ + "operation": "flatten_settings", + "source_field": "settings", + "target_field": "user_config", + }, + } + handler.SetConfig(config) + + result := runHandler(handler, testData, "Flatten Settings") + printResult("Settings flattening", result) + + // Test flatten key-value pairs + kvData := map[string]any{ + "user_id": 456, + "properties": []interface{}{ + map[string]any{"name": "color", "val": "blue"}, + map[string]any{"name": "size", "val": "large"}, + map[string]any{"name": "weight", "val": "heavy"}, + }, + } + + kvHandler := handlers.NewFlattenHandler("kv-test") + kvConfig := dag.Payload{ + Data: map[string]any{ + "operation": "flatten_key_value", + "source_field": "properties", + "key_field": "name", + "value_field": "val", + "target_field": "flattened_props", + }, + } + kvHandler.SetConfig(kvConfig) + + result = runHandler(kvHandler, kvData, "Flatten Key-Value") + printResult("Key-value flattening", result) + + // Test flatten nested objects + nestedData := map[string]any{ + "user": map[string]any{ + "id": 123, + "profile": map[string]any{ + "name": "John Doe", + "email": "john@example.com", + "address": map[string]any{ + "street": "123 Main St", + "city": "New York", + "country": "USA", + }, + "preferences": map[string]any{ + "theme": "dark", + "language": "en", + }, + }, + }, + } + + nestedHandler := handlers.NewFlattenHandler("nested-test") + nestedConfig := dag.Payload{ + Data: map[string]any{ + "operation": "flatten_nested_objects", + "separator": "_", + }, + } + nestedHandler.SetConfig(nestedConfig) + + result = runHandler(nestedHandler, nestedData, "Flatten Nested Objects") + printResult("Nested object flattening", result) +} + +func testJSONHandler() { + fmt.Println("\n5. JSON HANDLER TESTS") + fmt.Println("=====================") + + // Test JSON parsing + testData := map[string]any{ + "config": `{"theme": "dark", "language": "en", "notifications": true, "max_items": 100}`, + "metadata": `["tag1", "tag2", "tag3"]`, + "user": `{"id": 123, "name": "John Doe", "active": true}`, + } + + parseHandler := handlers.NewJSONHandler("json-parse-test") + parseConfig := dag.Payload{ + Data: map[string]any{ + "operation": "parse", + "fields": []string{"config", "metadata", "user"}, + }, + } + parseHandler.SetConfig(parseConfig) + + result := runHandler(parseHandler, testData, "JSON Parsing") + printResult("JSON parsing", result) + + // Test JSON stringifying + objData := map[string]any{ + "user": map[string]any{ + "id": 123, + "name": "John Doe", + "active": true, + "roles": []string{"admin", "user"}, + }, + "preferences": map[string]any{ + "theme": "dark", + "notifications": true, + "language": "en", + }, + } + + stringifyHandler := handlers.NewJSONHandler("json-stringify-test") + stringifyConfig := dag.Payload{ + Data: map[string]any{ + "operation": "stringify", + "fields": []string{"user", "preferences"}, + "indent": true, + }, + } + stringifyHandler.SetConfig(stringifyConfig) + + result = runHandler(stringifyHandler, objData, "JSON Stringifying") + printResult("JSON stringifying", result) + + // Test JSON validation + validationData := map[string]any{ + "valid_json": `{"key": "value"}`, + "invalid_json": `{"key": value}`, // Missing quotes around value + "valid_array": `[1, 2, 3]`, + } + + validateHandler := handlers.NewJSONHandler("json-validate-test") + validateConfig := dag.Payload{ + Data: map[string]any{ + "operation": "validate", + "fields": []string{"valid_json", "invalid_json", "valid_array"}, + }, + } + validateHandler.SetConfig(validateConfig) + + result = runHandler(validateHandler, validationData, "JSON Validation") + printResult("JSON validation", result) +} + +func testFieldHandler() { + fmt.Println("\n6. FIELD HANDLER TESTS") + fmt.Println("======================") + + testData := map[string]any{ + "id": 123, + "first_name": "John", + "last_name": "Doe", + "email_addr": "john@example.com", + "phone_number": "555-1234", + "internal_id": "INT-123", + "created_at": "2023-01-15", + "updated_at": "2023-06-20", + "is_active": true, + "salary": 75000.50, + } + + // Test field filtering/selection + filterHandler := handlers.NewFieldHandler("filter-test") + filterConfig := dag.Payload{ + Data: map[string]any{ + "operation": "filter", + "fields": []string{"id", "first_name", "last_name", "email_addr", "is_active"}, + }, + } + filterHandler.SetConfig(filterConfig) + + result := runHandler(filterHandler, testData, "Filter/Select Fields") + printResult("Field filtering", result) + + // Test field exclusion/removal + excludeHandler := handlers.NewFieldHandler("exclude-test") + excludeConfig := dag.Payload{ + Data: map[string]any{ + "operation": "exclude", + "fields": []string{"internal_id", "created_at", "updated_at"}, + }, + } + excludeHandler.SetConfig(excludeConfig) + + result = runHandler(excludeHandler, testData, "Exclude Fields") + printResult("Field exclusion", result) + + // Test field renaming + renameHandler := handlers.NewFieldHandler("rename-test") + renameConfig := dag.Payload{ + Data: map[string]any{ + "operation": "rename", + "mapping": map[string]any{ + "first_name": "firstName", + "last_name": "lastName", + "email_addr": "email", + "phone_number": "phone", + "created_at": "createdAt", + "updated_at": "updatedAt", + "is_active": "active", + }, + }, + } + renameHandler.SetConfig(renameConfig) + + result = runHandler(renameHandler, testData, "Rename Fields") + printResult("Field renaming", result) + + // Test adding new fields + addHandler := handlers.NewFieldHandler("add-test") + addConfig := dag.Payload{ + Data: map[string]any{ + "operation": "add", + "new_fields": map[string]any{ + "status": "active", + "version": "1.0", + "is_verified": true, + "last_login": "2023-06-20T10:30:00Z", + "department": "Engineering", + "access_level": 3, + }, + }, + } + addHandler.SetConfig(addConfig) + + result = runHandler(addHandler, testData, "Add Fields") + printResult("Adding fields", result) + + // Test field copying + copyHandler := handlers.NewFieldHandler("copy-test") + copyConfig := dag.Payload{ + Data: map[string]any{ + "operation": "copy", + "mapping": map[string]any{ + "first_name": "display_name", + "email_addr": "contact_email", + "id": "user_id", + }, + }, + } + copyHandler.SetConfig(copyConfig) + + result = runHandler(copyHandler, testData, "Copy Fields") + printResult("Field copying", result) + + // Test key transformation + transformHandler := handlers.NewFieldHandler("transform-test") + transformConfig := dag.Payload{ + Data: map[string]any{ + "operation": "transform_keys", + "transformation": "snake_case", + }, + } + transformHandler.SetConfig(transformConfig) + + result = runHandler(transformHandler, testData, "Transform Keys") + printResult("Key transformation", result) +} + +func testDataHandler() { + fmt.Println("\n7. DATA HANDLER TESTS") + fmt.Println("=====================") + + // Test data sorting + testData := map[string]any{ + "data": []interface{}{ + map[string]any{"name": "John", "age": 30, "salary": 80000, "department": "Engineering"}, + map[string]any{"name": "Jane", "age": 25, "salary": 90000, "department": "Engineering"}, + map[string]any{"name": "Bob", "age": 35, "salary": 75000, "department": "Marketing"}, + map[string]any{"name": "Alice", "age": 28, "salary": 85000, "department": "Marketing"}, + }, + } + + sortHandler := handlers.NewDataHandler("sort-test") + sortConfig := dag.Payload{ + Data: map[string]any{ + "operation": "sort", + "sort_field": "salary", + "sort_order": "desc", + }, + } + sortHandler.SetConfig(sortConfig) + + result := runHandler(sortHandler, testData, "Sort Data by Salary (Desc)") + printResult("Data sorting", result) + + // Test field calculations + calcData := map[string]any{ + "base_price": 100.0, + "tax_rate": 0.15, + "shipping_cost": 10.0, + "discount": 5.0, + "quantity": 2, + } + + calcHandler := handlers.NewDataHandler("calc-test") + calcConfig := dag.Payload{ + Data: map[string]any{ + "operation": "calculate", + "calculations": map[string]any{ + "tax_amount": map[string]any{ + "operation": "multiply", + "fields": []string{"base_price", "tax_rate"}, + }, + "subtotal": map[string]any{ + "operation": "sum", + "fields": []string{"base_price", "tax_amount", "shipping_cost"}, + }, + "total": map[string]any{ + "operation": "subtract", + "fields": []string{"subtotal", "discount"}, + }, + "grand_total": map[string]any{ + "operation": "multiply", + "fields": []string{"total", "quantity"}, + }, + }, + }, + } + calcHandler.SetConfig(calcConfig) + + result = runHandler(calcHandler, calcData, "Field Calculations") + printResult("Field calculations", result) + + // Test data deduplication + dupData := map[string]any{ + "data": []interface{}{ + map[string]any{"email": "john@example.com", "name": "John Doe", "id": 1}, + map[string]any{"email": "jane@example.com", "name": "Jane Smith", "id": 2}, + map[string]any{"email": "john@example.com", "name": "John D.", "id": 3}, // duplicate email + map[string]any{"email": "bob@example.com", "name": "Bob Jones", "id": 4}, + map[string]any{"email": "jane@example.com", "name": "Jane S.", "id": 5}, // duplicate email + }, + } + + dedupHandler := handlers.NewDataHandler("dedup-test") + dedupConfig := dag.Payload{ + Data: map[string]any{ + "operation": "deduplicate", + "dedupe_fields": []string{"email"}, + }, + } + dedupHandler.SetConfig(dedupConfig) + + result = runHandler(dedupHandler, dupData, "Data Deduplication") + printResult("Data deduplication", result) + + // Test type casting + castData := map[string]any{ + "user_id": "123", + "age": "30", + "salary": "75000.50", + "is_active": "true", + "score": "95.5", + "name": 123, + "is_verified": "false", + } + + castHandler := handlers.NewDataHandler("cast-test") + castConfig := dag.Payload{ + Data: map[string]any{ + "operation": "type_cast", + "cast": map[string]any{ + "user_id": "int", + "age": "int", + "salary": "float", + "is_active": "bool", + "score": "float", + "name": "string", + "is_verified": "bool", + }, + }, + } + castHandler.SetConfig(castConfig) + + result = runHandler(castHandler, castData, "Type Casting") + printResult("Type casting", result) + + // Test conditional field setting + condData := map[string]any{ + "age": 25, + "salary": 60000, + "years_experience": 3, + } + + condHandler := handlers.NewDataHandler("conditional-test") + condConfig := dag.Payload{ + Data: map[string]any{ + "operation": "conditional_set", + "conditions": map[string]any{ + "salary_level": map[string]any{ + "condition": "salary > 70000", + "if_true": "high", + "if_false": "standard", + }, + "experience_level": map[string]any{ + "condition": "years_experience >= 5", + "if_true": "senior", + "if_false": "junior", + }, + }, + }, + } + condHandler.SetConfig(condConfig) + + result = runHandler(condHandler, condData, "Conditional Field Setting") + printResult("Conditional setting", result) +} + +// Helper functions +func runHandler(handler dag.Processor, data map[string]any, description string) map[string]any { + fmt.Printf("\n--- Testing: %s ---\n", description) + + // Convert data to JSON payload + payload, err := json.Marshal(data) + if err != nil { + log.Printf("Error marshaling test data: %v", err) + return nil + } + + // Create a task + task := &mq.Task{ + ID: mq.NewID(), + Payload: payload, + } + + // Process the task + ctx := context.Background() + result := handler.ProcessTask(ctx, task) + + if result.Error != nil { + log.Printf("Handler error: %v", result.Error) + return nil + } + + // Parse result payload + var resultData map[string]any + if err := json.Unmarshal(result.Payload, &resultData); err != nil { + log.Printf("Error unmarshaling result: %v", err) + return nil + } + + return resultData +} + +func printResult(operation string, result map[string]any) { + if result == nil { + fmt.Printf("❌ %s failed\n", operation) + return + } + + fmt.Printf("✅ %s succeeded\n", operation) + + // Pretty print the result (truncated for readability) + resultJSON, err := json.MarshalIndent(result, "", " ") + if err != nil { + fmt.Printf("Error formatting result: %v\n", err) + return + } + + // Truncate very long results + resultStr := string(resultJSON) + if len(resultStr) > 1000 { + resultStr = resultStr[:997] + "..." + } + + fmt.Printf("Result:\n%s\n", resultStr) +} + +// Example of chaining handlers in a DAG workflow +func exampleDAGChaining() { + fmt.Println("\n=== CHAINING HANDLERS EXAMPLE ===") + fmt.Println("==================================") + + // Sample input data with nested JSON and various formatting needs + inputData := map[string]any{ + "user_data": `{"firstName": "john", "lastName": "doe", "age": "30", "salary": "75000.50", "isActive": "true"}`, + "metadata": `{"department": "engineering", "level": "senior", "skills": ["go", "python", "javascript"]}`, + } + + fmt.Println("🔗 Chaining multiple handlers to transform data...") + fmt.Printf("Input data: %+v\n", inputData) + + // Step 1: Parse JSON strings + jsonHandler := handlers.NewJSONHandler("json-step") + jsonConfig := dag.Payload{ + Data: map[string]any{ + "operation": "parse", + "fields": []string{"user_data", "metadata"}, + }, + } + jsonHandler.SetConfig(jsonConfig) + + step1Result := runHandler(jsonHandler, inputData, "Step 1: Parse JSON strings") + + if step1Result != nil { + // Step 2: Flatten the parsed nested data + flattenHandler := handlers.NewFlattenHandler("flatten-step") + flattenConfig := dag.Payload{ + Data: map[string]any{ + "operation": "flatten_nested_objects", + "separator": "_", + }, + } + flattenHandler.SetConfig(flattenConfig) + + step2Result := runHandler(flattenHandler, step1Result, "Step 2: Flatten nested objects") + + if step2Result != nil { + // Step 3: Format name fields to proper case + formatHandler := handlers.NewFormatHandler("format-step") + formatConfig := dag.Payload{ + Data: map[string]any{ + "format_type": "capitalize", + "fields": []string{"user_data_parsed_firstName", "user_data_parsed_lastName"}, + }, + } + formatHandler.SetConfig(formatConfig) + + step3Result := runHandler(formatHandler, step2Result, "Step 3: Format names to proper case") + + if step3Result != nil { + // Step 4: Rename fields to standard naming + fieldHandler := handlers.NewFieldHandler("rename-step") + renameConfig := dag.Payload{ + Data: map[string]any{ + "operation": "rename", + "mapping": map[string]any{ + "user_data_parsed_firstName": "first_name", + "user_data_parsed_lastName": "last_name", + "user_data_parsed_age": "age", + "user_data_parsed_salary": "salary", + "user_data_parsed_isActive": "is_active", + "metadata_parsed_department": "department", + "metadata_parsed_level": "level", + }, + }, + } + fieldHandler.SetConfig(renameConfig) + + step4Result := runHandler(fieldHandler, step3Result, "Step 4: Rename fields") + + if step4Result != nil { + // Step 5: Cast data types + dataHandler := handlers.NewDataHandler("cast-step") + castConfig := dag.Payload{ + Data: map[string]any{ + "operation": "type_cast", + "cast": map[string]any{ + "age": "int", + "salary": "float", + "is_active": "bool", + }, + }, + } + dataHandler.SetConfig(castConfig) + + finalResult := runHandler(dataHandler, step4Result, "Step 5: Cast data types") + printResult("🎉 Final chained transformation result", finalResult) + } + } + } + } +} diff --git a/go.mod b/go.mod index faae29c..3b0ea69 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,6 @@ require ( github.com/oarkflow/json v0.0.21 github.com/oarkflow/log v1.0.79 github.com/oarkflow/xid v1.2.8 - github.com/prometheus/client_golang v1.21.1 golang.org/x/crypto v0.33.0 golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 golang.org/x/time v0.11.0 @@ -31,22 +30,15 @@ require ( require ( github.com/andybalholm/brotli v1.1.1 // indirect - github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/goccy/go-reflect v1.2.0 // indirect github.com/google/uuid v1.6.0 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect - github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/oarkflow/jsonschema v0.0.4 - github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.63.0 // indirect - github.com/prometheus/procfs v0.16.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasthttp v1.59.0 // indirect golang.org/x/sys v0.31.0 // indirect - google.golang.org/protobuf v1.36.6 // indirect ) diff --git a/go.sum b/go.sum index 1419666..10396a8 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,5 @@ github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= -github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= -github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= -github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= @@ -14,8 +10,6 @@ github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= github.com/gofiber/fiber/v2 v2.52.6 h1:Rfp+ILPiYSvvVuIPvxrBns+HJp8qGLDnLJawAu27XVI= github.com/gofiber/fiber/v2 v2.52.6/go.mod h1:YEcBbO/FB+5M1IZNBP9FO3J9281zgPAreiI1oqg8nDw= -github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= -github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= @@ -28,16 +22,12 @@ github.com/kaptinlin/go-i18n v0.1.4 h1:wCiwAn1LOcvymvWIVAM4m5dUAMiHunTdEubLDk4hT github.com/kaptinlin/go-i18n v0.1.4/go.mod h1:g1fn1GvTgT4CiLE8/fFE1hboHWJ6erivrDpiDtCcFKg= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= -github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= -github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= -github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/oarkflow/date v0.0.4 h1:EwY/wiS3CqZNBx7b2x+3kkJwVNuGk+G0dls76kL/fhU= github.com/oarkflow/date v0.0.4/go.mod h1:xQTFc6p6O5VX6J75ZrPJbelIFGca1ASmhpgirFqL8vM= github.com/oarkflow/dipper v0.0.6 h1:E+ak9i4R1lxx0B04CjfG5DTLTmwuWA1nrdS6KIHdUxQ= @@ -62,14 +52,6 @@ github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0 github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk= -github.com/prometheus/client_golang v1.21.1/go.mod h1:U9NM32ykUErtVBxdvD3zfi+EuFkkaBvMb09mIfe0Zgg= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k= -github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18= -github.com/prometheus/procfs v0.16.0 h1:xh6oHhKwnOJKMYiYBDWmkHqQPyiY40sny36Cmx2bbsM= -github.com/prometheus/procfs v0.16.0/go.mod h1:8veyXUu3nGP7oaCxhX6yeaM5u4stL2FeMXnCqhDthZg= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= @@ -92,7 +74,5 @@ golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/handlers/data_handler.go b/handlers/data_handler.go new file mode 100644 index 0000000..6554d15 --- /dev/null +++ b/handlers/data_handler.go @@ -0,0 +1,737 @@ +package handlers + +import ( + "context" + "fmt" + "math" + "reflect" + "sort" + "strconv" + "strings" + + "github.com/oarkflow/json" + "github.com/oarkflow/mq" + "github.com/oarkflow/mq/dag" +) + +// DataHandler handles miscellaneous data operations +type DataHandler struct { + dag.Operation +} + +func (h *DataHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { + var data map[string]any + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} + } + + operation, ok := h.Payload.Data["operation"].(string) + if !ok { + return mq.Result{Error: fmt.Errorf("operation not specified")} + } + + var result map[string]any + switch operation { + case "sort": + result = h.sortData(data) + case "deduplicate": + result = h.deduplicateData(data) + case "calculate": + result = h.calculateFields(data) + case "conditional_set": + result = h.conditionalSet(data) + case "type_cast": + result = h.typeCast(data) + case "validate_fields": + result = h.validateFields(data) + case "normalize": + result = h.normalizeData(data) + case "pivot": + result = h.pivotData(data) + case "unpivot": + result = h.unpivotData(data) + default: + return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)} + } + + resultPayload, err := json.Marshal(result) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} + } + + return mq.Result{Payload: resultPayload, Ctx: ctx} +} + +func (h *DataHandler) sortData(data map[string]any) map[string]any { + result := make(map[string]any) + + // Copy non-array data + for key, value := range data { + if key != "data" { + result[key] = value + } + } + + if dataArray, ok := data["data"].([]interface{}); ok { + sortField := h.getSortField() + sortOrder := h.getSortOrder() // "asc" or "desc" + + // Convert to slice of maps for sorting + var records []map[string]interface{} + for _, item := range dataArray { + if record, ok := item.(map[string]interface{}); ok { + records = append(records, record) + } + } + + // Sort the records + sort.Slice(records, func(i, j int) bool { + vi := records[i][sortField] + vj := records[j][sortField] + + comparison := h.compareValues(vi, vj) + if sortOrder == "desc" { + return comparison > 0 + } + return comparison < 0 + }) + + // Convert back to []interface{} + var sortedData []interface{} + for _, record := range records { + sortedData = append(sortedData, record) + } + + result["data"] = sortedData + } + + return result +} + +func (h *DataHandler) deduplicateData(data map[string]any) map[string]any { + result := make(map[string]any) + + // Copy non-array data + for key, value := range data { + if key != "data" { + result[key] = value + } + } + + if dataArray, ok := data["data"].([]interface{}); ok { + dedupeFields := h.getDedupeFields() + seen := make(map[string]bool) + var uniqueData []interface{} + + for _, item := range dataArray { + if record, ok := item.(map[string]interface{}); ok { + key := h.createDedupeKey(record, dedupeFields) + if !seen[key] { + seen[key] = true + uniqueData = append(uniqueData, item) + } + } + } + + result["data"] = uniqueData + result["original_count"] = len(dataArray) + result["deduplicated_count"] = len(uniqueData) + result["duplicates_removed"] = len(dataArray) - len(uniqueData) + } + + return result +} + +func (h *DataHandler) calculateFields(data map[string]any) map[string]any { + result := make(map[string]any) + calculations := h.getCalculations() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for targetField, calc := range calculations { + operation := calc["operation"].(string) + sourceFields := calc["fields"].([]string) + + switch operation { + case "sum": + result[targetField] = h.sumFields(data, sourceFields) + case "subtract": + result[targetField] = h.subtractFields(data, sourceFields) + case "multiply": + result[targetField] = h.multiplyFields(data, sourceFields) + case "divide": + result[targetField] = h.divideFields(data, sourceFields) + case "average": + result[targetField] = h.averageFields(data, sourceFields) + case "min": + result[targetField] = h.minFields(data, sourceFields) + case "max": + result[targetField] = h.maxFields(data, sourceFields) + } + } + + return result +} + +func (h *DataHandler) conditionalSet(data map[string]any) map[string]any { + result := make(map[string]any) + conditions := h.getConditions() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for targetField, condConfig := range conditions { + condition := condConfig["condition"].(string) + ifTrue := condConfig["if_true"] + ifFalse := condConfig["if_false"] + + if h.evaluateCondition(data, condition) { + result[targetField] = ifTrue + } else { + result[targetField] = ifFalse + } + } + + return result +} + +func (h *DataHandler) typeCast(data map[string]any) map[string]any { + result := make(map[string]any) + castConfig := h.getCastConfig() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for field, targetType := range castConfig { + if val, ok := data[field]; ok { + result[field] = h.castValue(val, targetType) + } + } + + return result +} + +func (h *DataHandler) validateFields(data map[string]any) map[string]any { + result := make(map[string]any) + validationRules := h.getValidationRules() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + validationResults := make(map[string]interface{}) + allValid := true + + for field, rules := range validationRules { + if val, ok := data[field]; ok { + fieldResult := h.validateField(val, rules) + validationResults[field] = fieldResult + if !fieldResult["valid"].(bool) { + allValid = false + } + } + } + + result["validation_results"] = validationResults + result["all_valid"] = allValid + + return result +} + +func (h *DataHandler) normalizeData(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() + normalizationType := h.getNormalizationType() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for _, field := range fields { + if val, ok := data[field]; ok { + result[field] = h.normalizeValue(val, normalizationType) + } + } + + return result +} + +func (h *DataHandler) pivotData(data map[string]any) map[string]any { + // Simplified pivot implementation + result := make(map[string]any) + + if dataArray, ok := data["data"].([]interface{}); ok { + pivotField := h.getPivotField() + valueField := h.getValueField() + + pivoted := make(map[string]interface{}) + + for _, item := range dataArray { + if record, ok := item.(map[string]interface{}); ok { + if pivotVal, ok := record[pivotField]; ok { + if val, ok := record[valueField]; ok { + key := fmt.Sprintf("%v", pivotVal) + pivoted[key] = val + } + } + } + } + + result["pivoted_data"] = pivoted + } + + return result +} + +func (h *DataHandler) unpivotData(data map[string]any) map[string]any { + // Simplified unpivot implementation + result := make(map[string]any) + unpivotFields := h.getUnpivotFields() + + var unpivotedData []interface{} + + for _, field := range unpivotFields { + if val, ok := data[field]; ok { + record := map[string]interface{}{ + "field": field, + "value": val, + } + unpivotedData = append(unpivotedData, record) + } + } + + result["data"] = unpivotedData + result["unpivoted"] = true + + return result +} + +// Helper functions +func (h *DataHandler) compareValues(a, b interface{}) int { + if a == nil && b == nil { + return 0 + } + if a == nil { + return -1 + } + if b == nil { + return 1 + } + + // Try numeric comparison first + if aNum, aOk := toFloat64(a); aOk { + if bNum, bOk := toFloat64(b); bOk { + if aNum < bNum { + return -1 + } else if aNum > bNum { + return 1 + } + return 0 + } + } + + // Fall back to string comparison + aStr := fmt.Sprintf("%v", a) + bStr := fmt.Sprintf("%v", b) + if aStr < bStr { + return -1 + } else if aStr > bStr { + return 1 + } + return 0 +} + +func (h *DataHandler) createDedupeKey(record map[string]interface{}, fields []string) string { + var keyParts []string + for _, field := range fields { + keyParts = append(keyParts, fmt.Sprintf("%v", record[field])) + } + return strings.Join(keyParts, "|") +} + +func (h *DataHandler) sumFields(data map[string]any, fields []string) float64 { + var sum float64 + for _, field := range fields { + if val, ok := data[field]; ok { + if num, ok := toFloat64(val); ok { + sum += num + } + } + } + return sum +} + +func (h *DataHandler) subtractFields(data map[string]any, fields []string) float64 { + if len(fields) < 2 { + return 0 + } + + var result float64 + if val, ok := data[fields[0]]; ok { + if num, ok := toFloat64(val); ok { + result = num + } + } + + for _, field := range fields[1:] { + if val, ok := data[field]; ok { + if num, ok := toFloat64(val); ok { + result -= num + } + } + } + return result +} + +func (h *DataHandler) multiplyFields(data map[string]any, fields []string) float64 { + result := 1.0 + for _, field := range fields { + if val, ok := data[field]; ok { + if num, ok := toFloat64(val); ok { + result *= num + } + } + } + return result +} + +func (h *DataHandler) divideFields(data map[string]any, fields []string) float64 { + if len(fields) < 2 { + return 0 + } + + var result float64 + if val, ok := data[fields[0]]; ok { + if num, ok := toFloat64(val); ok { + result = num + } + } + + for _, field := range fields[1:] { + if val, ok := data[field]; ok { + if num, ok := toFloat64(val); ok && num != 0 { + result /= num + } + } + } + return result +} + +func (h *DataHandler) averageFields(data map[string]any, fields []string) float64 { + sum := h.sumFields(data, fields) + return sum / float64(len(fields)) +} + +func (h *DataHandler) minFields(data map[string]any, fields []string) float64 { + min := math.Inf(1) + for _, field := range fields { + if val, ok := data[field]; ok { + if num, ok := toFloat64(val); ok { + if num < min { + min = num + } + } + } + } + return min +} + +func (h *DataHandler) maxFields(data map[string]any, fields []string) float64 { + max := math.Inf(-1) + for _, field := range fields { + if val, ok := data[field]; ok { + if num, ok := toFloat64(val); ok { + if num > max { + max = num + } + } + } + } + return max +} + +func (h *DataHandler) evaluateCondition(data map[string]any, condition string) bool { + // Simple condition evaluation - can be extended + parts := strings.Fields(condition) + if len(parts) >= 3 { + field := parts[0] + operator := parts[1] + value := parts[2] + + if fieldVal, ok := data[field]; ok { + switch operator { + case "==", "=": + return fmt.Sprintf("%v", fieldVal) == value + case "!=": + return fmt.Sprintf("%v", fieldVal) != value + case ">": + if fieldNum, ok := toFloat64(fieldVal); ok { + if valueNum, ok := toFloat64(value); ok { + return fieldNum > valueNum + } + } + case "<": + if fieldNum, ok := toFloat64(fieldVal); ok { + if valueNum, ok := toFloat64(value); ok { + return fieldNum < valueNum + } + } + } + } + } + return false +} + +func (h *DataHandler) castValue(val interface{}, targetType string) interface{} { + switch targetType { + case "string": + return fmt.Sprintf("%v", val) + case "int": + if num, ok := toFloat64(val); ok { + return int(num) + } + return val + case "float": + if num, ok := toFloat64(val); ok { + return num + } + return val + case "bool": + if str, ok := val.(string); ok { + return str == "true" || str == "1" + } + return val + default: + return val + } +} + +func (h *DataHandler) validateField(val interface{}, rules map[string]interface{}) map[string]interface{} { + result := map[string]interface{}{ + "valid": true, + "errors": []string{}, + } + + var errors []string + + // Required validation + if required, ok := rules["required"].(bool); ok && required { + if val == nil || val == "" { + errors = append(errors, "field is required") + } + } + + // Type validation + if expectedType, ok := rules["type"].(string); ok { + if !h.validateType(val, expectedType) { + errors = append(errors, fmt.Sprintf("expected type %s", expectedType)) + } + } + + // Range validation for numbers + if minVal, ok := rules["min"]; ok { + if num, numOk := toFloat64(val); numOk { + if minNum, minOk := toFloat64(minVal); minOk { + if num < minNum { + errors = append(errors, fmt.Sprintf("value must be >= %v", minVal)) + } + } + } + } + + if len(errors) > 0 { + result["valid"] = false + result["errors"] = errors + } + + return result +} + +func (h *DataHandler) validateType(val interface{}, expectedType string) bool { + actualType := reflect.TypeOf(val).String() + switch expectedType { + case "string": + return actualType == "string" + case "int", "integer": + return actualType == "int" || actualType == "float64" + case "float", "number": + return actualType == "float64" || actualType == "int" + case "bool", "boolean": + return actualType == "bool" + default: + return true + } +} + +func (h *DataHandler) normalizeValue(val interface{}, normType string) interface{} { + switch normType { + case "lowercase": + if str, ok := val.(string); ok { + return strings.ToLower(str) + } + case "uppercase": + if str, ok := val.(string); ok { + return strings.ToUpper(str) + } + case "trim": + if str, ok := val.(string); ok { + return strings.TrimSpace(str) + } + } + return val +} + +func toFloat64(val interface{}) (float64, bool) { + switch v := val.(type) { + case float64: + return v, true + case int: + return float64(v), true + case int64: + return float64(v), true + case string: + if f, err := strconv.ParseFloat(v, 64); err == nil { + return f, true + } + } + return 0, false +} + +// Configuration getters +func (h *DataHandler) getSortField() string { + if field, ok := h.Payload.Data["sort_field"].(string); ok { + return field + } + return "" +} + +func (h *DataHandler) getSortOrder() string { + if order, ok := h.Payload.Data["sort_order"].(string); ok { + return order + } + return "asc" +} + +func (h *DataHandler) getDedupeFields() []string { + if fields, ok := h.Payload.Data["dedupe_fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *DataHandler) getCalculations() map[string]map[string]interface{} { + result := make(map[string]map[string]interface{}) + if calc, ok := h.Payload.Data["calculations"].(map[string]interface{}); ok { + for key, value := range calc { + if calcMap, ok := value.(map[string]interface{}); ok { + result[key] = calcMap + } + } + } + return result +} + +func (h *DataHandler) getConditions() map[string]map[string]interface{} { + result := make(map[string]map[string]interface{}) + if cond, ok := h.Payload.Data["conditions"].(map[string]interface{}); ok { + for key, value := range cond { + if condMap, ok := value.(map[string]interface{}); ok { + result[key] = condMap + } + } + } + return result +} + +func (h *DataHandler) getCastConfig() map[string]string { + result := make(map[string]string) + if cast, ok := h.Payload.Data["cast"].(map[string]interface{}); ok { + for key, value := range cast { + if str, ok := value.(string); ok { + result[key] = str + } + } + } + return result +} + +func (h *DataHandler) getValidationRules() map[string]map[string]interface{} { + result := make(map[string]map[string]interface{}) + if rules, ok := h.Payload.Data["validation_rules"].(map[string]interface{}); ok { + for key, value := range rules { + if ruleMap, ok := value.(map[string]interface{}); ok { + result[key] = ruleMap + } + } + } + return result +} + +func (h *DataHandler) getTargetFields() []string { + if fields, ok := h.Payload.Data["fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *DataHandler) getNormalizationType() string { + if normType, ok := h.Payload.Data["normalize_type"].(string); ok { + return normType + } + return "trim" +} + +func (h *DataHandler) getPivotField() string { + if field, ok := h.Payload.Data["pivot_field"].(string); ok { + return field + } + return "" +} + +func (h *DataHandler) getValueField() string { + if field, ok := h.Payload.Data["value_field"].(string); ok { + return field + } + return "" +} + +func (h *DataHandler) getUnpivotFields() []string { + if fields, ok := h.Payload.Data["unpivot_fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func NewDataHandler(id string) *DataHandler { + return &DataHandler{ + Operation: dag.Operation{ID: id, Key: "data", Type: dag.Function, Tags: []string{"data", "transformation", "misc"}}, + } +} diff --git a/handlers/data_transformation_handler.go b/handlers/data_transformation_handler.go deleted file mode 100644 index cbfc130..0000000 --- a/handlers/data_transformation_handler.go +++ /dev/null @@ -1,765 +0,0 @@ -package handlers - -import ( - "context" - "fmt" - "math" - "reflect" - "sort" - "strconv" - "strings" - - "github.com/oarkflow/json" - "github.com/oarkflow/mq" - "github.com/oarkflow/mq/dag" -) - -// DataTransformationHandler provides comprehensive data transformation capabilities -type DataTransformationHandler struct { - dag.Operation - Transformations []DataTransformation `json:"transformations"` // list of transformations to apply -} - -type DataTransformation struct { - Name string `json:"name"` // transformation name/identifier - Type string `json:"type"` // transformation type - SourceField string `json:"source_field"` // source field (can be empty for data-wide operations) - TargetField string `json:"target_field"` // target field (can be empty to overwrite source) - Config map[string]any `json:"config"` // transformation configuration - Condition *TransformCondition `json:"condition"` // optional condition for when to apply -} - -type TransformCondition struct { - Field string `json:"field"` // field to check - Operator string `json:"operator"` // eq, ne, gt, lt, ge, le, contains, regex - Value any `json:"value"` // value to compare against -} - -func (d *DataTransformationHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { - var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} - } - - // Apply transformations in sequence - for i, transformation := range d.Transformations { - // Check condition if specified - if transformation.Condition != nil { - if !d.evaluateCondition(data, transformation.Condition) { - continue // skip this transformation - } - } - - var err error - data, err = d.applyTransformation(data, transformation) - if err != nil { - return mq.Result{Error: fmt.Errorf("transformation %d (%s) failed: %v", i+1, transformation.Name, err), Ctx: ctx} - } - } - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} -} - -func (d *DataTransformationHandler) evaluateCondition(data map[string]any, condition *TransformCondition) bool { - fieldValue, exists := data[condition.Field] - if !exists { - return false - } - - switch condition.Operator { - case "eq": - return fmt.Sprintf("%v", fieldValue) == fmt.Sprintf("%v", condition.Value) - case "ne": - return fmt.Sprintf("%v", fieldValue) != fmt.Sprintf("%v", condition.Value) - case "gt": - return d.compareNumeric(fieldValue, condition.Value) > 0 - case "lt": - return d.compareNumeric(fieldValue, condition.Value) < 0 - case "ge": - return d.compareNumeric(fieldValue, condition.Value) >= 0 - case "le": - return d.compareNumeric(fieldValue, condition.Value) <= 0 - case "contains": - return strings.Contains(fmt.Sprintf("%v", fieldValue), fmt.Sprintf("%v", condition.Value)) - case "regex": - // Basic regex support - in production, use proper regex library - return strings.Contains(fmt.Sprintf("%v", fieldValue), fmt.Sprintf("%v", condition.Value)) - default: - return false - } -} - -func (d *DataTransformationHandler) compareNumeric(a, b any) int { - aFloat := d.toFloat64(a) - bFloat := d.toFloat64(b) - - if aFloat < bFloat { - return -1 - } else if aFloat > bFloat { - return 1 - } - return 0 -} - -func (d *DataTransformationHandler) applyTransformation(data map[string]any, transformation DataTransformation) (map[string]any, error) { - switch transformation.Type { - case "normalize": - return d.normalizeData(data, transformation) - case "aggregate": - return d.aggregateData(data, transformation) - case "pivot": - return d.pivotData(data, transformation) - case "unpivot": - return d.unpivotData(data, transformation) - case "calculate": - return d.calculateField(data, transformation) - case "lookup": - return d.lookupTransform(data, transformation) - case "bucket": - return d.bucketize(data, transformation) - case "rank": - return d.rankData(data, transformation) - case "window": - return d.windowFunction(data, transformation) - case "encode": - return d.encodeData(data, transformation) - case "decode": - return d.decodeData(data, transformation) - case "validate": - return d.validateData(data, transformation) - default: - return nil, fmt.Errorf("unsupported transformation type: %s", transformation.Type) - } -} - -func (d *DataTransformationHandler) normalizeData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - sourceValue := data[transformation.SourceField] - normalizeType, _ := transformation.Config["type"].(string) - - var normalized any - var err error - - switch normalizeType { - case "min_max": - normalized, err = d.minMaxNormalize(sourceValue, transformation.Config) - case "z_score": - normalized, err = d.zScoreNormalize(sourceValue, transformation.Config) - case "unit_vector": - normalized, err = d.unitVectorNormalize(sourceValue, transformation.Config) - default: - return nil, fmt.Errorf("unsupported normalization type: %s", normalizeType) - } - - if err != nil { - return nil, err - } - - targetField := transformation.TargetField - if targetField == "" { - targetField = transformation.SourceField - } - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - result[targetField] = normalized - - return result, nil -} - -func (d *DataTransformationHandler) minMaxNormalize(value any, config map[string]any) (float64, error) { - num := d.toFloat64(value) - min, _ := config["min"].(float64) - max, _ := config["max"].(float64) - - if max == min { - return 0, nil - } - - return (num - min) / (max - min), nil -} - -func (d *DataTransformationHandler) zScoreNormalize(value any, config map[string]any) (float64, error) { - num := d.toFloat64(value) - mean, _ := config["mean"].(float64) - stdDev, _ := config["std_dev"].(float64) - - if stdDev == 0 { - return 0, nil - } - - return (num - mean) / stdDev, nil -} - -func (d *DataTransformationHandler) unitVectorNormalize(value any, config map[string]any) (float64, error) { - num := d.toFloat64(value) - magnitude, _ := config["magnitude"].(float64) - - if magnitude == 0 { - return 0, nil - } - - return num / magnitude, nil -} - -func (d *DataTransformationHandler) calculateField(data map[string]any, transformation DataTransformation) (map[string]any, error) { - expression, _ := transformation.Config["expression"].(string) - - // Simple expression evaluator - in production, use a proper expression library - result, err := d.evaluateExpression(expression, data) - if err != nil { - return nil, err - } - - targetField := transformation.TargetField - if targetField == "" { - return nil, fmt.Errorf("target field is required for calculate transformation") - } - - resultData := make(map[string]any) - for k, v := range data { - resultData[k] = v - } - resultData[targetField] = result - - return resultData, nil -} - -func (d *DataTransformationHandler) evaluateExpression(expression string, data map[string]any) (any, error) { - // Basic expression evaluation - replace with proper expression evaluator - // This is a simplified implementation for common cases - - expression = strings.TrimSpace(expression) - - // Handle simple field references - if value, exists := data[expression]; exists { - return value, nil - } - - // Handle simple arithmetic operations - if strings.Contains(expression, "+") { - parts := strings.Split(expression, "+") - if len(parts) == 2 { - left := strings.TrimSpace(parts[0]) - right := strings.TrimSpace(parts[1]) - - leftVal := d.getValueOrNumber(left, data) - rightVal := d.getValueOrNumber(right, data) - - return d.toFloat64(leftVal) + d.toFloat64(rightVal), nil - } - } - - if strings.Contains(expression, "-") { - parts := strings.Split(expression, "-") - if len(parts) == 2 { - left := strings.TrimSpace(parts[0]) - right := strings.TrimSpace(parts[1]) - - leftVal := d.getValueOrNumber(left, data) - rightVal := d.getValueOrNumber(right, data) - - return d.toFloat64(leftVal) - d.toFloat64(rightVal), nil - } - } - - if strings.Contains(expression, "*") { - parts := strings.Split(expression, "*") - if len(parts) == 2 { - left := strings.TrimSpace(parts[0]) - right := strings.TrimSpace(parts[1]) - - leftVal := d.getValueOrNumber(left, data) - rightVal := d.getValueOrNumber(right, data) - - return d.toFloat64(leftVal) * d.toFloat64(rightVal), nil - } - } - - if strings.Contains(expression, "/") { - parts := strings.Split(expression, "/") - if len(parts) == 2 { - left := strings.TrimSpace(parts[0]) - right := strings.TrimSpace(parts[1]) - - leftVal := d.getValueOrNumber(left, data) - rightVal := d.toFloat64(d.getValueOrNumber(right, data)) - - if rightVal == 0 { - return nil, fmt.Errorf("division by zero") - } - - return d.toFloat64(leftVal) / rightVal, nil - } - } - - return nil, fmt.Errorf("unable to evaluate expression: %s", expression) -} - -func (d *DataTransformationHandler) getValueOrNumber(str string, data map[string]any) any { - // Check if it's a field reference - if value, exists := data[str]; exists { - return value - } - - // Try to parse as number - if num, err := strconv.ParseFloat(str, 64); err == nil { - return num - } - - // Return as string - return str -} - -func (d *DataTransformationHandler) bucketize(data map[string]any, transformation DataTransformation) (map[string]any, error) { - sourceValue := data[transformation.SourceField] - buckets, _ := transformation.Config["buckets"].([]any) - labels, _ := transformation.Config["labels"].([]any) - - num := d.toFloat64(sourceValue) - - // Find the appropriate bucket - var bucketIndex int = -1 - for i, bucket := range buckets { - if bucketVal := d.toFloat64(bucket); num <= bucketVal { - bucketIndex = i - break - } - } - - var result any - if bucketIndex >= 0 && bucketIndex < len(labels) { - result = labels[bucketIndex] - } else { - result = "out_of_range" - } - - targetField := transformation.TargetField - if targetField == "" { - targetField = transformation.SourceField - } - - resultData := make(map[string]any) - for k, v := range data { - resultData[k] = v - } - resultData[targetField] = result - - return resultData, nil -} - -func (d *DataTransformationHandler) encodeData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - sourceValue := data[transformation.SourceField] - encodingType, _ := transformation.Config["type"].(string) - - var encoded any - var err error - - switch encodingType { - case "one_hot": - encoded, err = d.oneHotEncode(sourceValue, transformation.Config) - case "label": - encoded, err = d.labelEncode(sourceValue, transformation.Config) - case "ordinal": - encoded, err = d.ordinalEncode(sourceValue, transformation.Config) - default: - return nil, fmt.Errorf("unsupported encoding type: %s", encodingType) - } - - if err != nil { - return nil, err - } - - targetField := transformation.TargetField - if targetField == "" { - targetField = transformation.SourceField - } - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - result[targetField] = encoded - - return result, nil -} - -func (d *DataTransformationHandler) oneHotEncode(value any, config map[string]any) (map[string]any, error) { - categories, _ := config["categories"].([]any) - valueStr := fmt.Sprintf("%v", value) - - result := make(map[string]any) - for _, category := range categories { - categoryStr := fmt.Sprintf("%v", category) - if valueStr == categoryStr { - result[categoryStr] = 1 - } else { - result[categoryStr] = 0 - } - } - - return result, nil -} - -func (d *DataTransformationHandler) labelEncode(value any, config map[string]any) (int, error) { - mapping, _ := config["mapping"].(map[string]any) - valueStr := fmt.Sprintf("%v", value) - - if encoded, exists := mapping[valueStr]; exists { - return int(d.toFloat64(encoded)), nil - } - - return -1, fmt.Errorf("value '%s' not found in encoding mapping", valueStr) -} - -func (d *DataTransformationHandler) ordinalEncode(value any, config map[string]any) (int, error) { - order, _ := config["order"].([]any) - valueStr := fmt.Sprintf("%v", value) - - for i, item := range order { - if fmt.Sprintf("%v", item) == valueStr { - return i, nil - } - } - - return -1, fmt.Errorf("value '%s' not found in ordinal order", valueStr) -} - -func (d *DataTransformationHandler) aggregateData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // This is a simplified version - for complex aggregations, use GroupingHandler - aggregationType, _ := transformation.Config["type"].(string) - sourceField := transformation.SourceField - - // Assume source field contains an array of values - sourceValue, exists := data[sourceField] - if !exists { - return nil, fmt.Errorf("source field '%s' not found", sourceField) - } - - values := d.extractNumbers(sourceValue) - if len(values) == 0 { - return nil, fmt.Errorf("no numeric values found in source field") - } - - var result float64 - - switch aggregationType { - case "sum": - for _, v := range values { - result += v - } - case "avg", "mean": - for _, v := range values { - result += v - } - result /= float64(len(values)) - case "min": - result = values[0] - for _, v := range values { - if v < result { - result = v - } - } - case "max": - result = values[0] - for _, v := range values { - if v > result { - result = v - } - } - case "std": - // Calculate standard deviation - mean := 0.0 - for _, v := range values { - mean += v - } - mean /= float64(len(values)) - - variance := 0.0 - for _, v := range values { - variance += math.Pow(v-mean, 2) - } - variance /= float64(len(values)) - result = math.Sqrt(variance) - default: - return nil, fmt.Errorf("unsupported aggregation type: %s", aggregationType) - } - - targetField := transformation.TargetField - if targetField == "" { - targetField = sourceField - } - - resultData := make(map[string]any) - for k, v := range data { - resultData[k] = v - } - resultData[targetField] = result - - return resultData, nil -} - -func (d *DataTransformationHandler) extractNumbers(value any) []float64 { - var numbers []float64 - - rv := reflect.ValueOf(value) - if rv.Kind() == reflect.Slice || rv.Kind() == reflect.Array { - for i := 0; i < rv.Len(); i++ { - if num := d.toFloat64(rv.Index(i).Interface()); num != 0 { - numbers = append(numbers, num) - } - } - } else { - if num := d.toFloat64(value); num != 0 { - numbers = append(numbers, num) - } - } - - return numbers -} - -func (d *DataTransformationHandler) rankData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // For ranking, we need the data to contain an array of items - arrayField, _ := transformation.Config["array_field"].(string) - rankField := transformation.SourceField - - arrayData, exists := data[arrayField] - if !exists { - return nil, fmt.Errorf("array field '%s' not found", arrayField) - } - - // Convert to slice and extract values for ranking - rv := reflect.ValueOf(arrayData) - if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array { - return nil, fmt.Errorf("array field must contain an array") - } - - type rankItem struct { - index int - value float64 - } - - var items []rankItem - for i := 0; i < rv.Len(); i++ { - item := rv.Index(i).Interface() - if itemMap, ok := item.(map[string]any); ok { - if val, exists := itemMap[rankField]; exists { - items = append(items, rankItem{ - index: i, - value: d.toFloat64(val), - }) - } - } - } - - // Sort by value - sort.Slice(items, func(i, j int) bool { - return items[i].value > items[j].value // descending order - }) - - // Assign ranks - ranks := make(map[int]int) - for rank, item := range items { - ranks[item.index] = rank + 1 - } - - // Update the original data with ranks - targetField := transformation.TargetField - if targetField == "" { - targetField = rankField + "_rank" - } - - for i := 0; i < rv.Len(); i++ { - item := rv.Index(i).Interface() - if itemMap, ok := item.(map[string]any); ok { - itemMap[targetField] = ranks[i] - } - } - - return data, nil -} - -func (d *DataTransformationHandler) pivotData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // Pivot transformation implementation - pivotField, _ := transformation.Config["pivot_field"].(string) - valueField, _ := transformation.Config["value_field"].(string) - - if pivotField == "" || valueField == "" { - return nil, fmt.Errorf("pivot_field and value_field are required") - } - - result := make(map[string]any) - for key, value := range data { - if key == pivotField { - result[fmt.Sprintf("%v", value)] = data[valueField] - } - } - - return result, nil -} - -func (d *DataTransformationHandler) unpivotData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // Unpivot transformation implementation - unpivotFields, _ := transformation.Config["fields"].([]string) - if len(unpivotFields) == 0 { - return nil, fmt.Errorf("fields for unpivoting are required") - } - - result := make(map[string]any) - for _, field := range unpivotFields { - if value, exists := data[field]; exists { - result[field] = value - } - } - - return result, nil -} - -func (d *DataTransformationHandler) lookupTransform(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // Lookup transformation implementation - lookupTable, _ := transformation.Config["lookup_table"].(map[string]any) - lookupKey, _ := transformation.Config["lookup_key"].(string) - - if lookupTable == nil || lookupKey == "" { - return nil, fmt.Errorf("lookup_table and lookup_key are required") - } - - lookupValue := data[lookupKey] - if result, exists := lookupTable[fmt.Sprintf("%v", lookupValue)]; exists { - return map[string]any{lookupKey: result}, nil - } - - return nil, fmt.Errorf("lookup value not found") -} - -func (d *DataTransformationHandler) windowFunction(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // Window function transformation implementation - windowField, _ := transformation.Config["window_field"].(string) - operation, _ := transformation.Config["operation"].(string) - - if windowField == "" || operation == "" { - return nil, fmt.Errorf("window_field and operation are required") - } - - values := d.extractNumbers(data[windowField]) - if len(values) == 0 { - return nil, fmt.Errorf("no numeric values found in window_field") - } - - var result float64 - switch operation { - case "sum": - for _, v := range values { - result += v - } - case "avg": - for _, v := range values { - result += v - } - result /= float64(len(values)) - default: - return nil, fmt.Errorf("unsupported window operation: %s", operation) - } - - return map[string]any{windowField: result}, nil -} - -func (d *DataTransformationHandler) decodeData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // Data decoding implementation - encodingType, _ := transformation.Config["type"].(string) - - if encodingType == "" { - return nil, fmt.Errorf("encoding type is required") - } - - sourceValue := data[transformation.SourceField] - var decoded any - var err error - - switch encodingType { - case "base64": - decoded, err = d.decodeBase64(fmt.Sprintf("%v", sourceValue)) - case "hex": - decoded, err = d.decodeHex(fmt.Sprintf("%v", sourceValue)) - default: - return nil, fmt.Errorf("unsupported decoding type: %s", encodingType) - } - - if err != nil { - return nil, err - } - - return map[string]any{transformation.TargetField: decoded}, nil -} - -func (d *DataTransformationHandler) decodeBase64(value string) (string, error) { - decoded, err := strconv.ParseFloat(value, 64) - if err != nil { - return "", err - } - return fmt.Sprintf("%v", decoded), nil -} - -func (d *DataTransformationHandler) decodeHex(value string) (string, error) { - decoded, err := strconv.ParseFloat(value, 64) - if err != nil { - return "", err - } - return fmt.Sprintf("%v", decoded), nil -} - -func (d *DataTransformationHandler) validateData(data map[string]any, transformation DataTransformation) (map[string]any, error) { - // Data validation implementation - validationRules, _ := transformation.Config["rules"].([]map[string]any) - - if len(validationRules) == 0 { - return nil, fmt.Errorf("validation rules are required") - } - - for _, rule := range validationRules { - field, _ := rule["field"].(string) - operator, _ := rule["operator"].(string) - value := rule["value"] - - if !d.evaluateCondition(data, &TransformCondition{Field: field, Operator: operator, Value: value}) { - return nil, fmt.Errorf("validation failed for field: %s", field) - } - } - - return data, nil -} - -func (d *DataTransformationHandler) toFloat64(value any) float64 { - switch v := value.(type) { - case int: - return float64(v) - case int32: - return float64(v) - case int64: - return float64(v) - case float32: - return float64(v) - case float64: - return v - case string: - if num, err := strconv.ParseFloat(v, 64); err == nil { - return num - } - } - return 0 -} - -// Factory function -func NewDataTransformationHandler(id string, transformations []DataTransformation) *DataTransformationHandler { - return &DataTransformationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "data_transformation", - Type: dag.Function, - Tags: []string{"data", "transformation", "advanced"}, - }, - Transformations: transformations, - } -} diff --git a/handlers/data_utils_handler.go b/handlers/data_utils_handler.go deleted file mode 100644 index 7ca9033..0000000 --- a/handlers/data_utils_handler.go +++ /dev/null @@ -1,494 +0,0 @@ -package handlers - -import ( - "context" - "fmt" - - "github.com/oarkflow/json" - "github.com/oarkflow/mq" - "github.com/oarkflow/mq/dag" -) - -// DataUtilsHandler provides utility functions for common data operations -type DataUtilsHandler struct { - dag.Operation - UtilityType string `json:"utility_type"` // type of utility operation - Config map[string]any `json:"config"` // operation configuration -} - -// Utility operation types: -// - "deduplicate": Remove duplicate entries from arrays or objects -// - "merge": Merge multiple objects or arrays -// - "diff": Compare two data structures and return differences -// - "sort": Sort arrays or object keys -// - "reverse": Reverse arrays or strings -// - "sample": Take a sample of data -// - "validate_schema": Validate data against a schema -// - "convert_types": Convert data types in bulk - -func (d *DataUtilsHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { - var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} - } - - var result map[string]any - var err error - - switch d.UtilityType { - case "deduplicate": - result, err = d.deduplicate(data) - case "merge": - result, err = d.merge(data) - case "diff": - result, err = d.diff(data) - case "sort": - result, err = d.sort(data) - case "reverse": - result, err = d.reverse(data) - case "sample": - result, err = d.sample(data) - case "validate_schema": - result, err = d.validateSchema(data) - case "convert_types": - result, err = d.convertTypes(data) - default: - return mq.Result{Error: fmt.Errorf("unsupported utility type: %s", d.UtilityType), Ctx: ctx} - } - - if err != nil { - return mq.Result{Error: err, Ctx: ctx} - } - - bt, _ := json.Marshal(result) - return mq.Result{Payload: bt, Ctx: ctx} -} - -func (d *DataUtilsHandler) deduplicate(data map[string]any) (map[string]any, error) { - sourceField, _ := d.Config["source_field"].(string) - targetField, _ := d.Config["target_field"].(string) - dedupeBy, _ := d.Config["dedupe_by"].(string) // field to dedupe by, or empty for exact match - - if targetField == "" { - targetField = sourceField - } - - sourceData, exists := data[sourceField] - if !exists { - return nil, fmt.Errorf("source field '%s' not found", sourceField) - } - - // Implementation depends on data type - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - - // Basic deduplication logic - can be extended - if arrayData, ok := sourceData.([]any); ok { - seen := make(map[string]bool) - var dedupedArray []any - - for _, item := range arrayData { - var key string - if dedupeBy != "" { - // Dedupe by specific field - if itemMap, ok := item.(map[string]any); ok { - key = fmt.Sprintf("%v", itemMap[dedupeBy]) - } - } else { - // Dedupe by entire item - key = fmt.Sprintf("%v", item) - } - - if !seen[key] { - seen[key] = true - dedupedArray = append(dedupedArray, item) - } - } - - result[targetField] = dedupedArray - } - - return result, nil -} - -func (d *DataUtilsHandler) merge(data map[string]any) (map[string]any, error) { - sourceFields, _ := d.Config["source_fields"].([]any) - targetField, _ := d.Config["target_field"].(string) - mergeStrategy, _ := d.Config["strategy"].(string) // "overwrite", "append", "combine" - - if len(sourceFields) < 2 { - return nil, fmt.Errorf("at least 2 source fields required for merge") - } - - var mergedResult any - - switch mergeStrategy { - case "overwrite": - // Merge objects by overwriting keys - merged := make(map[string]any) - for _, fieldName := range sourceFields { - if field, ok := fieldName.(string); ok { - if fieldData, exists := data[field]; exists { - if fieldMap, ok := fieldData.(map[string]any); ok { - for k, v := range fieldMap { - merged[k] = v - } - } - } - } - } - mergedResult = merged - - case "append": - // Merge arrays by appending - var merged []any - for _, fieldName := range sourceFields { - if field, ok := fieldName.(string); ok { - if fieldData, exists := data[field]; exists { - if fieldArray, ok := fieldData.([]any); ok { - merged = append(merged, fieldArray...) - } - } - } - } - mergedResult = merged - - default: - return nil, fmt.Errorf("unsupported merge strategy: %s", mergeStrategy) - } - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - result[targetField] = mergedResult - - return result, nil -} - -func (d *DataUtilsHandler) diff(data map[string]any) (map[string]any, error) { - field1, _ := d.Config["first_field"].(string) - field2, _ := d.Config["second_field"].(string) - targetField, _ := d.Config["target_field"].(string) - - data1, exists1 := data[field1] - data2, exists2 := data[field2] - - if !exists1 || !exists2 { - return nil, fmt.Errorf("both comparison fields must exist") - } - - // Basic diff implementation - diffResult := map[string]any{ - "equal": fmt.Sprintf("%v", data1) == fmt.Sprintf("%v", data2), - "first_only": d.findUniqueElements(data1, data2), - "second_only": d.findUniqueElements(data2, data1), - "common": d.findCommonElements(data1, data2), - } - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - result[targetField] = diffResult - - return result, nil -} - -func (d *DataUtilsHandler) findUniqueElements(data1, data2 any) []any { - // Simplified implementation for arrays - if array1, ok := data1.([]any); ok { - if array2, ok := data2.([]any); ok { - set2 := make(map[string]bool) - for _, item := range array2 { - set2[fmt.Sprintf("%v", item)] = true - } - - var unique []any - for _, item := range array1 { - if !set2[fmt.Sprintf("%v", item)] { - unique = append(unique, item) - } - } - return unique - } - } - return nil -} - -func (d *DataUtilsHandler) findCommonElements(data1, data2 any) []any { - // Simplified implementation for arrays - if array1, ok := data1.([]any); ok { - if array2, ok := data2.([]any); ok { - set2 := make(map[string]bool) - for _, item := range array2 { - set2[fmt.Sprintf("%v", item)] = true - } - - var common []any - seen := make(map[string]bool) - for _, item := range array1 { - key := fmt.Sprintf("%v", item) - if set2[key] && !seen[key] { - common = append(common, item) - seen[key] = true - } - } - return common - } - } - return nil -} - -func (d *DataUtilsHandler) sort(data map[string]any) (map[string]any, error) { - sourceField, _ := d.Config["source_field"].(string) - targetField, _ := d.Config["target_field"].(string) - // sortBy, _ := d.Config["sort_by"].(string) - // direction, _ := d.Config["direction"].(string) // "asc" or "desc" - - if targetField == "" { - targetField = sourceField - } - - sourceData, exists := data[sourceField] - if !exists { - return nil, fmt.Errorf("source field '%s' not found", sourceField) - } - - // Basic sorting implementation - // For production, use more sophisticated sorting - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - - // This is a placeholder - implement proper sorting based on data type - result[targetField] = sourceData - - return result, nil -} - -func (d *DataUtilsHandler) reverse(data map[string]any) (map[string]any, error) { - sourceField, _ := d.Config["source_field"].(string) - targetField, _ := d.Config["target_field"].(string) - - if targetField == "" { - targetField = sourceField - } - - sourceData, exists := data[sourceField] - if !exists { - return nil, fmt.Errorf("source field '%s' not found", sourceField) - } - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - - // Reverse arrays - if arrayData, ok := sourceData.([]any); ok { - reversed := make([]any, len(arrayData)) - for i, item := range arrayData { - reversed[len(arrayData)-1-i] = item - } - result[targetField] = reversed - } else if strData, ok := sourceData.(string); ok { - // Reverse strings - runes := []rune(strData) - for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { - runes[i], runes[j] = runes[j], runes[i] - } - result[targetField] = string(runes) - } else { - result[targetField] = sourceData - } - - return result, nil -} - -func (d *DataUtilsHandler) sample(data map[string]any) (map[string]any, error) { - sourceField, _ := d.Config["source_field"].(string) - targetField, _ := d.Config["target_field"].(string) - sampleSize, _ := d.Config["sample_size"].(float64) - - if targetField == "" { - targetField = sourceField - } - - sourceData, exists := data[sourceField] - if !exists { - return nil, fmt.Errorf("source field '%s' not found", sourceField) - } - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - - // Basic sampling for arrays - if arrayData, ok := sourceData.([]any); ok { - size := int(sampleSize) - if size > len(arrayData) { - size = len(arrayData) - } - - if size <= 0 { - result[targetField] = []any{} - } else if size >= len(arrayData) { - result[targetField] = arrayData - } else { - // Simple sampling - take first N elements - // For production, implement proper random sampling - sample := make([]any, size) - copy(sample, arrayData[:size]) - result[targetField] = sample - } - } else { - result[targetField] = sourceData - } - - return result, nil -} - -func (d *DataUtilsHandler) validateSchema(data map[string]any) (map[string]any, error) { - // Basic schema validation placeholder - // For production, implement proper JSON schema validation - sourceField, _ := d.Config["source_field"].(string) - schema, _ := d.Config["schema"].(map[string]any) - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - - // Placeholder validation result - result["validation_result"] = map[string]any{ - "valid": true, - "errors": []string{}, - "schema": schema, - "data": data[sourceField], - } - - return result, nil -} - -func (d *DataUtilsHandler) convertTypes(data map[string]any) (map[string]any, error) { - conversions, _ := d.Config["conversions"].(map[string]any) - - result := make(map[string]any) - for k, v := range data { - result[k] = v - } - - // Apply type conversions - for field, targetType := range conversions { - if value, exists := result[field]; exists { - converted, err := d.convertType(value, fmt.Sprintf("%v", targetType)) - if err == nil { - result[field] = converted - } - } - } - - return result, nil -} - -func (d *DataUtilsHandler) convertType(value any, targetType string) (any, error) { - switch targetType { - case "string": - return fmt.Sprintf("%v", value), nil - case "int": - if num := d.toFloat64(value); num != 0 { - return int(num), nil - } - return 0, nil - case "float": - return d.toFloat64(value), nil - case "bool": - str := fmt.Sprintf("%v", value) - return str == "true" || str == "1" || str == "yes", nil - default: - return value, fmt.Errorf("unsupported target type: %s", targetType) - } -} - -func (d *DataUtilsHandler) toFloat64(value any) float64 { - switch v := value.(type) { - case int: - return float64(v) - case int32: - return float64(v) - case int64: - return float64(v) - case float32: - return float64(v) - case float64: - return v - case string: - var result float64 - if n, err := fmt.Sscanf(v, "%f", &result); err == nil && n == 1 { - return result - } - } - return 0 -} - -// Factory functions for common utilities -func NewDeduplicateHandler(id, sourceField, targetField, dedupeBy string) *DataUtilsHandler { - return &DataUtilsHandler{ - Operation: dag.Operation{ - ID: id, - Key: "deduplicate_data", - Type: dag.Function, - Tags: []string{"data", "utils", "deduplicate"}, - }, - UtilityType: "deduplicate", - Config: map[string]any{ - "source_field": sourceField, - "target_field": targetField, - "dedupe_by": dedupeBy, - }, - } -} - -func NewMergeHandler(id string, sourceFields []string, targetField, strategy string) *DataUtilsHandler { - var anyFields []any - for _, field := range sourceFields { - anyFields = append(anyFields, field) - } - - return &DataUtilsHandler{ - Operation: dag.Operation{ - ID: id, - Key: "merge_data", - Type: dag.Function, - Tags: []string{"data", "utils", "merge"}, - }, - UtilityType: "merge", - Config: map[string]any{ - "source_fields": anyFields, - "target_field": targetField, - "strategy": strategy, - }, - } -} - -func NewDataDiffHandler(id, field1, field2, targetField string) *DataUtilsHandler { - return &DataUtilsHandler{ - Operation: dag.Operation{ - ID: id, - Key: "diff_data", - Type: dag.Function, - Tags: []string{"data", "utils", "diff"}, - }, - UtilityType: "diff", - Config: map[string]any{ - "first_field": field1, - "second_field": field2, - "target_field": targetField, - }, - } -} diff --git a/handlers/examples.go b/handlers/examples.go new file mode 100644 index 0000000..55065a6 --- /dev/null +++ b/handlers/examples.go @@ -0,0 +1,208 @@ +package handlers + +/* +Data Transformation Handlers Usage Examples + +This file contains examples of how to configure and use the various data transformation handlers. +All configurations are done through the dag.Operation.Payload.Data map - no handler-specific configurations. + +1. FORMAT HANDLER +================= +Supports: string, number, date, currency, uppercase, lowercase, capitalize, trim + +Example configuration: +{ + "format_type": "uppercase", + "fields": ["name", "title"], + "currency": "$", + "date_format": "2006-01-02" +} + +2. GROUP HANDLER +================ +Groups data with aggregation functions + +Example configuration: +{ + "group_by": ["department", "status"], + "aggregations": { + "salary": "sum", + "age": "avg", + "count": "count", + "name": "concat" + }, + "concat_separator": ", " +} + +3. SPLIT/JOIN HANDLER +==================== +Handles string operations + +Split example: +{ + "operation": "split", + "fields": ["full_name"], + "separator": " " +} + +Join example: +{ + "operation": "join", + "source_fields": ["first_name", "last_name"], + "target_field": "full_name", + "separator": " " +} + +4. FLATTEN HANDLER +================== +Flattens nested data structures + +Flatten settings example (key-value pairs): +{ + "operation": "flatten_settings", + "source_field": "settings", + "target_field": "config" +} + +Input: {"settings": [{"key": "theme", "value": "dark", "value_type": "string"}]} +Output: {"config": {"theme": "dark"}} + +5. JSON HANDLER +=============== +JSON parsing and manipulation + +Parse JSON string: +{ + "operation": "parse", + "fields": ["json_data"] +} + +Stringify object: +{ + "operation": "stringify", + "fields": ["object_data"], + "indent": true +} + +6. FIELD HANDLER +================ +Field manipulation operations + +Filter fields: +{ + "operation": "filter", + "fields": ["name", "email", "age"] +} + +Rename fields: +{ + "operation": "rename", + "mapping": { + "old_name": "new_name", + "email_addr": "email" + } +} + +Add fields: +{ + "operation": "add", + "new_fields": { + "created_at": "2023-01-01", + "status": "active" + } +} + +Transform keys: +{ + "operation": "transform_keys", + "transformation": "snake_case" // or camel_case, kebab_case, etc. +} + +7. DATA HANDLER +=============== +Miscellaneous data operations + +Sort data: +{ + "operation": "sort", + "sort_field": "created_at", + "sort_order": "desc" +} + +Deduplicate: +{ + "operation": "deduplicate", + "dedupe_fields": ["email", "phone"] +} + +Calculate fields: +{ + "operation": "calculate", + "calculations": { + "total": { + "operation": "sum", + "fields": ["amount1", "amount2"] + }, + "average_score": { + "operation": "average", + "fields": ["score1", "score2", "score3"] + } + } +} + +Type casting: +{ + "operation": "type_cast", + "cast": { + "age": "int", + "salary": "float", + "active": "bool" + } +} + +Validate fields: +{ + "operation": "validate_fields", + "validation_rules": { + "email": { + "required": true, + "type": "string" + }, + "age": { + "required": true, + "type": "int", + "min": 0 + } + } +} + +USAGE IN DAG: +============= + +import "github.com/oarkflow/mq/handlers" +import "github.com/oarkflow/mq/dag" + +// Create handler +formatHandler := handlers.NewFormatHandler("format-1") + +// Configure through Operation.Payload +config := dag.Payload{ + Data: map[string]any{ + "format_type": "uppercase", + "fields": []string{"name", "title"}, + }, +} +formatHandler.SetConfig(config) + +// Use in DAG +dag := dag.NewDAG("data-processing") +dag.AddNode(formatHandler) + +CHAINING OPERATIONS: +=================== + +You can chain multiple handlers in a DAG: +1. Parse JSON → 2. Flatten → 3. Filter fields → 4. Format → 5. Group + +Each handler receives the output of the previous handler as input. +*/ diff --git a/handlers/field_handler.go b/handlers/field_handler.go new file mode 100644 index 0000000..8191bad --- /dev/null +++ b/handlers/field_handler.go @@ -0,0 +1,344 @@ +package handlers + +import ( + "context" + "fmt" + "strings" + + "github.com/oarkflow/json" + "github.com/oarkflow/mq" + "github.com/oarkflow/mq/dag" +) + +// FieldHandler handles field manipulation operations (filter, add, remove, rename, etc.) +type FieldHandler struct { + dag.Operation +} + +func (h *FieldHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { + var data map[string]any + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} + } + + operation, ok := h.Payload.Data["operation"].(string) + if !ok { + return mq.Result{Error: fmt.Errorf("operation not specified")} + } + + var result map[string]any + switch operation { + case "filter", "select": + result = h.filterFields(data) + case "exclude", "remove": + result = h.excludeFields(data) + case "rename": + result = h.renameFields(data) + case "add", "set": + result = h.addFields(data) + case "copy": + result = h.copyFields(data) + case "merge": + result = h.mergeFields(data) + case "prefix": + result = h.prefixFields(data) + case "suffix": + result = h.suffixFields(data) + case "transform_keys": + result = h.transformKeys(data) + default: + return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)} + } + + resultPayload, err := json.Marshal(result) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} + } + + return mq.Result{Payload: resultPayload, Ctx: ctx} +} + +func (h *FieldHandler) filterFields(data map[string]any) map[string]any { + result := make(map[string]any) + allowedFields := h.getTargetFields() + + if len(allowedFields) == 0 { + return data // If no fields specified, return all + } + + for _, field := range allowedFields { + if val, ok := data[field]; ok { + result[field] = val + } + } + + return result +} + +func (h *FieldHandler) excludeFields(data map[string]any) map[string]any { + result := make(map[string]any) + excludeFields := h.getTargetFields() + + // Copy all fields except excluded ones + for key, value := range data { + if !contains(excludeFields, key) { + result[key] = value + } + } + + return result +} + +func (h *FieldHandler) renameFields(data map[string]any) map[string]any { + result := make(map[string]any) + renameMap := h.getFieldMapping() + + // Copy all fields, renaming as specified + for key, value := range data { + if newKey, ok := renameMap[key]; ok { + result[newKey] = value + } else { + result[key] = value + } + } + + return result +} + +func (h *FieldHandler) addFields(data map[string]any) map[string]any { + result := make(map[string]any) + newFields := h.getNewFields() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + // Add new fields + for key, value := range newFields { + result[key] = value + } + + return result +} + +func (h *FieldHandler) copyFields(data map[string]any) map[string]any { + result := make(map[string]any) + copyMap := h.getFieldMapping() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + // Copy fields to new names + for sourceKey, targetKey := range copyMap { + if val, ok := data[sourceKey]; ok { + result[targetKey] = val + } + } + + return result +} + +func (h *FieldHandler) mergeFields(data map[string]any) map[string]any { + result := make(map[string]any) + mergeConfig := h.getMergeConfig() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + // Merge fields + for targetField, config := range mergeConfig { + sourceFields := config["fields"].([]string) + separator := config["separator"].(string) + + var values []string + for _, field := range sourceFields { + if val, ok := data[field]; ok && val != nil { + values = append(values, fmt.Sprintf("%v", val)) + } + } + + if len(values) > 0 { + result[targetField] = strings.Join(values, separator) + } + } + + return result +} + +func (h *FieldHandler) prefixFields(data map[string]any) map[string]any { + result := make(map[string]any) + prefix := h.getPrefix() + targetFields := h.getTargetFields() + + for key, value := range data { + if len(targetFields) == 0 || contains(targetFields, key) { + result[prefix+key] = value + } else { + result[key] = value + } + } + + return result +} + +func (h *FieldHandler) suffixFields(data map[string]any) map[string]any { + result := make(map[string]any) + suffix := h.getSuffix() + targetFields := h.getTargetFields() + + for key, value := range data { + if len(targetFields) == 0 || contains(targetFields, key) { + result[key+suffix] = value + } else { + result[key] = value + } + } + + return result +} + +func (h *FieldHandler) transformKeys(data map[string]any) map[string]any { + result := make(map[string]any) + transformation := h.getKeyTransformation() + + for key, value := range data { + newKey := h.applyKeyTransformation(key, transformation) + result[newKey] = value + } + + return result +} + +func (h *FieldHandler) applyKeyTransformation(key string, transformation string) string { + switch transformation { + case "lowercase": + return strings.ToLower(key) + case "uppercase": + return strings.ToUpper(key) + case "snake_case": + return h.toSnakeCase(key) + case "camel_case": + return h.toCamelCase(key) + case "kebab_case": + return h.toKebabCase(key) + case "pascal_case": + return h.toPascalCase(key) + default: + return key + } +} + +func (h *FieldHandler) toSnakeCase(s string) string { + result := strings.ReplaceAll(s, " ", "_") + result = strings.ReplaceAll(result, "-", "_") + return strings.ToLower(result) +} + +func (h *FieldHandler) toCamelCase(s string) string { + parts := strings.FieldsFunc(s, func(c rune) bool { + return c == ' ' || c == '_' || c == '-' + }) + + if len(parts) == 0 { + return s + } + + result := strings.ToLower(parts[0]) + for _, part := range parts[1:] { + if len(part) > 0 { + result += strings.ToUpper(part[:1]) + strings.ToLower(part[1:]) + } + } + return result +} + +func (h *FieldHandler) toKebabCase(s string) string { + result := strings.ReplaceAll(s, " ", "-") + result = strings.ReplaceAll(result, "_", "-") + return strings.ToLower(result) +} + +func (h *FieldHandler) toPascalCase(s string) string { + camel := h.toCamelCase(s) + if len(camel) > 0 { + return strings.ToUpper(camel[:1]) + camel[1:] + } + return camel +} + +func (h *FieldHandler) getTargetFields() []string { + if fields, ok := h.Payload.Data["fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *FieldHandler) getFieldMapping() map[string]string { + result := make(map[string]string) + if mapping, ok := h.Payload.Data["mapping"].(map[string]interface{}); ok { + for key, value := range mapping { + if str, ok := value.(string); ok { + result[key] = str + } + } + } + return result +} + +func (h *FieldHandler) getNewFields() map[string]interface{} { + if fields, ok := h.Payload.Data["new_fields"].(map[string]interface{}); ok { + return fields + } + return make(map[string]interface{}) +} + +func (h *FieldHandler) getMergeConfig() map[string]map[string]interface{} { + result := make(map[string]map[string]interface{}) + if config, ok := h.Payload.Data["merge_config"].(map[string]interface{}); ok { + for key, value := range config { + if configMap, ok := value.(map[string]interface{}); ok { + result[key] = configMap + } + } + } + return result +} + +func (h *FieldHandler) getPrefix() string { + if prefix, ok := h.Payload.Data["prefix"].(string); ok { + return prefix + } + return "" +} + +func (h *FieldHandler) getSuffix() string { + if suffix, ok := h.Payload.Data["suffix"].(string); ok { + return suffix + } + return "" +} + +func (h *FieldHandler) getKeyTransformation() string { + if transform, ok := h.Payload.Data["transformation"].(string); ok { + return transform + } + return "" +} + +func NewFieldHandler(id string) *FieldHandler { + return &FieldHandler{ + Operation: dag.Operation{ID: id, Key: "field", Type: dag.Function, Tags: []string{"data", "transformation", "field"}}, + } +} diff --git a/handlers/field_manipulation_handler.go b/handlers/field_manipulation_handler.go deleted file mode 100644 index 3222fe1..0000000 --- a/handlers/field_manipulation_handler.go +++ /dev/null @@ -1,501 +0,0 @@ -package handlers - -import ( - "context" - "fmt" - "reflect" - "regexp" - "strings" - - "github.com/oarkflow/json" - "github.com/oarkflow/mq" - "github.com/oarkflow/mq/dag" -) - -// FieldManipulationHandler handles various field operations on data -type FieldManipulationHandler struct { - dag.Operation - Operations []FieldOperation `json:"operations"` // list of field operations to perform -} - -type FieldOperation struct { - Type string `json:"type"` // "filter", "add", "remove", "rename", "copy", "transform" - Config FieldOperationConfig `json:"config"` // operation-specific configuration -} - -type FieldOperationConfig struct { - // Common fields - Fields []string `json:"fields"` // fields to operate on - Pattern string `json:"pattern"` // regex pattern for field matching - CaseSensitive bool `json:"case_sensitive"` // case sensitive pattern matching - - // Filter operation - IncludeOnly []string `json:"include_only"` // only include these fields - Exclude []string `json:"exclude"` // exclude these fields - KeepNulls bool `json:"keep_nulls"` // keep fields with null values - KeepEmpty bool `json:"keep_empty"` // keep fields with empty values - - // Add operation - NewFields map[string]any `json:"new_fields"` // fields to add with their values - DefaultValue any `json:"default_value"` // default value for new fields - - // Rename operation - FieldMapping map[string]string `json:"field_mapping"` // old field name -> new field name - - // Copy operation - CopyMapping map[string]string `json:"copy_mapping"` // source field -> target field - OverwriteCopy bool `json:"overwrite_copy"` // overwrite target if exists - - // Transform operation - Transformation string `json:"transformation"` // transformation type - TransformConfig map[string]any `json:"transform_config"` // transformation configuration -} - -func (f *FieldManipulationHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { - var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} - } - - // Apply operations in sequence - for i, operation := range f.Operations { - var err error - - switch operation.Type { - case "filter": - data, err = f.filterFields(data, operation.Config) - case "add": - data, err = f.addFields(data, operation.Config) - case "remove": - data, err = f.removeFields(data, operation.Config) - case "rename": - data, err = f.renameFields(data, operation.Config) - case "copy": - data, err = f.copyFields(data, operation.Config) - case "transform": - data, err = f.transformFields(data, operation.Config) - default: - return mq.Result{Error: fmt.Errorf("unsupported operation type: %s", operation.Type), Ctx: ctx} - } - - if err != nil { - return mq.Result{Error: fmt.Errorf("operation %d (%s) failed: %v", i+1, operation.Type, err), Ctx: ctx} - } - } - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} -} - -func (f *FieldManipulationHandler) filterFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) { - result := make(map[string]any) - - // If include_only is specified, only include those fields - if len(config.IncludeOnly) > 0 { - for _, field := range config.IncludeOnly { - if value, exists := data[field]; exists { - if f.shouldKeepValue(value, config) { - result[field] = value - } - } - } - return result, nil - } - - // Otherwise, include all except excluded fields - excludeSet := make(map[string]bool) - for _, field := range config.Exclude { - excludeSet[field] = true - } - - // Compile regex pattern if provided - var pattern *regexp.Regexp - if config.Pattern != "" { - flags := "" - if !config.CaseSensitive { - flags = "(?i)" - } - var err error - pattern, err = regexp.Compile(flags + config.Pattern) - if err != nil { - return nil, fmt.Errorf("invalid regex pattern: %v", err) - } - } - - for field, value := range data { - // Check if field should be excluded - if excludeSet[field] { - continue - } - - // Check pattern matching - if pattern != nil && !pattern.MatchString(field) { - continue - } - - // Check value conditions - if f.shouldKeepValue(value, config) { - result[field] = value - } - } - - return result, nil -} - -func (f *FieldManipulationHandler) shouldKeepValue(value any, config FieldOperationConfig) bool { - if value == nil { - return config.KeepNulls - } - - // Check for empty values - if f.isEmpty(value) { - return config.KeepEmpty - } - - return true -} - -func (f *FieldManipulationHandler) isEmpty(value any) bool { - if value == nil { - return true - } - - rv := reflect.ValueOf(value) - switch rv.Kind() { - case reflect.String: - return rv.String() == "" - case reflect.Slice, reflect.Array, reflect.Map: - return rv.Len() == 0 - default: - return false - } -} - -func (f *FieldManipulationHandler) addFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) { - result := make(map[string]any) - - // Copy existing data - for k, v := range data { - result[k] = v - } - - // Add new fields from new_fields map - for field, value := range config.NewFields { - result[field] = value - } - - // Add fields from fields list with default value - for _, field := range config.Fields { - if _, exists := result[field]; !exists { - result[field] = config.DefaultValue - } - } - - return result, nil -} - -func (f *FieldManipulationHandler) removeFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) { - result := make(map[string]any) - - // Create set of fields to remove - removeSet := make(map[string]bool) - for _, field := range config.Fields { - removeSet[field] = true - } - - // Compile regex pattern if provided - var pattern *regexp.Regexp - if config.Pattern != "" { - flags := "" - if !config.CaseSensitive { - flags = "(?i)" - } - var err error - pattern, err = regexp.Compile(flags + config.Pattern) - if err != nil { - return nil, fmt.Errorf("invalid regex pattern: %v", err) - } - } - - // Copy fields that should not be removed - for field, value := range data { - shouldRemove := removeSet[field] - - // Check pattern matching - if !shouldRemove && pattern != nil { - shouldRemove = pattern.MatchString(field) - } - - if !shouldRemove { - result[field] = value - } - } - - return result, nil -} - -func (f *FieldManipulationHandler) renameFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) { - result := make(map[string]any) - - // Copy and rename fields - for field, value := range data { - newName := field - if mappedName, exists := config.FieldMapping[field]; exists { - newName = mappedName - } - result[newName] = value - } - - return result, nil -} - -func (f *FieldManipulationHandler) copyFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) { - result := make(map[string]any) - - // Copy existing data - for k, v := range data { - result[k] = v - } - - // Copy fields based on mapping - for sourceField, targetField := range config.CopyMapping { - if value, exists := data[sourceField]; exists { - // Check if target already exists and overwrite is not allowed - if _, targetExists := result[targetField]; targetExists && !config.OverwriteCopy { - continue - } - result[targetField] = value - } - } - - return result, nil -} - -func (f *FieldManipulationHandler) transformFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) { - result := make(map[string]any) - - // Copy existing data - for k, v := range data { - result[k] = v - } - - // Apply transformations to specified fields - for _, field := range config.Fields { - if value, exists := result[field]; exists { - transformedValue, err := f.applyTransformation(value, config.Transformation, config.TransformConfig) - if err != nil { - return nil, fmt.Errorf("transformation failed for field '%s': %v", field, err) - } - result[field] = transformedValue - } - } - - return result, nil -} - -func (f *FieldManipulationHandler) applyTransformation(value any, transformationType string, config map[string]any) (any, error) { - switch transformationType { - case "uppercase": - return strings.ToUpper(fmt.Sprintf("%v", value)), nil - - case "lowercase": - return strings.ToLower(fmt.Sprintf("%v", value)), nil - - case "title": - return strings.Title(fmt.Sprintf("%v", value)), nil - - case "trim": - return strings.TrimSpace(fmt.Sprintf("%v", value)), nil - - case "prefix": - prefix, _ := config["prefix"].(string) - return prefix + fmt.Sprintf("%v", value), nil - - case "suffix": - suffix, _ := config["suffix"].(string) - return fmt.Sprintf("%v", value) + suffix, nil - - case "replace": - old, _ := config["old"].(string) - new, _ := config["new"].(string) - return strings.ReplaceAll(fmt.Sprintf("%v", value), old, new), nil - - case "regex_replace": - pattern, _ := config["pattern"].(string) - replacement, _ := config["replacement"].(string) - re, err := regexp.Compile(pattern) - if err != nil { - return nil, fmt.Errorf("invalid regex pattern: %v", err) - } - return re.ReplaceAllString(fmt.Sprintf("%v", value), replacement), nil - - case "multiply": - if multiplier, ok := config["multiplier"].(float64); ok { - if num := f.toFloat64(value); num != 0 { - return num * multiplier, nil - } - } - return value, nil - - case "add": - if addend, ok := config["addend"].(float64); ok { - if num := f.toFloat64(value); num != 0 { - return num + addend, nil - } - } - return value, nil - - case "absolute": - if num := f.toFloat64(value); num != 0 { - if num < 0 { - return -num, nil - } - return num, nil - } - return value, nil - - case "default_if_empty": - defaultVal := config["default"] - if f.isEmpty(value) { - return defaultVal, nil - } - return value, nil - - default: - return nil, fmt.Errorf("unsupported transformation type: %s", transformationType) - } -} - -func (f *FieldManipulationHandler) toFloat64(value any) float64 { - switch v := value.(type) { - case int: - return float64(v) - case int32: - return float64(v) - case int64: - return float64(v) - case float32: - return float64(v) - case float64: - return v - case string: - var result float64 - if n, err := fmt.Sscanf(v, "%f", &result); err == nil && n == 1 { - return result - } - } - return 0 -} - -// Factory functions for common operations -func NewFieldFilter(id string, includeOnly, exclude []string, options FieldOperationConfig) *FieldManipulationHandler { - options.IncludeOnly = includeOnly - options.Exclude = exclude - - return &FieldManipulationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "filter_fields", - Type: dag.Function, - Tags: []string{"data", "fields", "filter"}, - }, - Operations: []FieldOperation{ - { - Type: "filter", - Config: options, - }, - }, - } -} - -func NewFieldAdder(id string, newFields map[string]any, defaultValue any) *FieldManipulationHandler { - return &FieldManipulationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "add_fields", - Type: dag.Function, - Tags: []string{"data", "fields", "add"}, - }, - Operations: []FieldOperation{ - { - Type: "add", - Config: FieldOperationConfig{ - NewFields: newFields, - DefaultValue: defaultValue, - }, - }, - }, - } -} - -func NewFieldRemover(id string, fieldsToRemove []string, pattern string) *FieldManipulationHandler { - return &FieldManipulationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "remove_fields", - Type: dag.Function, - Tags: []string{"data", "fields", "remove"}, - }, - Operations: []FieldOperation{ - { - Type: "remove", - Config: FieldOperationConfig{ - Fields: fieldsToRemove, - Pattern: pattern, - }, - }, - }, - } -} - -func NewFieldRenamer(id string, fieldMapping map[string]string) *FieldManipulationHandler { - return &FieldManipulationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "rename_fields", - Type: dag.Function, - Tags: []string{"data", "fields", "rename"}, - }, - Operations: []FieldOperation{ - { - Type: "rename", - Config: FieldOperationConfig{ - FieldMapping: fieldMapping, - }, - }, - }, - } -} - -func NewFieldTransformer(id string, fields []string, transformation string, transformConfig map[string]any) *FieldManipulationHandler { - return &FieldManipulationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "transform_fields", - Type: dag.Function, - Tags: []string{"data", "fields", "transform"}, - }, - Operations: []FieldOperation{ - { - Type: "transform", - Config: FieldOperationConfig{ - Fields: fields, - Transformation: transformation, - TransformConfig: transformConfig, - }, - }, - }, - } -} - -func NewAdvancedFieldManipulator(id string, operations []FieldOperation) *FieldManipulationHandler { - return &FieldManipulationHandler{ - Operation: dag.Operation{ - ID: id, - Key: "advanced_field_manipulation", - Type: dag.Function, - Tags: []string{"data", "fields", "advanced"}, - }, - Operations: operations, - } -} diff --git a/handlers/flatten_handler.go b/handlers/flatten_handler.go index 66c3498..44a5dee 100644 --- a/handlers/flatten_handler.go +++ b/handlers/flatten_handler.go @@ -3,385 +3,265 @@ package handlers import ( "context" "fmt" - "reflect" - "strings" "github.com/oarkflow/json" "github.com/oarkflow/mq" "github.com/oarkflow/mq/dag" ) -// FlattenHandler flattens array of objects to a single object or performs other flattening operations +// FlattenHandler handles flattening array of objects to single objects type FlattenHandler struct { dag.Operation - FlattenType string `json:"flatten_type"` // "array_to_object", "nested_object", "key_value_pairs" - SourceField string `json:"source_field"` // field containing data to flatten - TargetField string `json:"target_field"` // field to store flattened result - Config FlattenConfiguration `json:"config"` // configuration for flattening } -type FlattenConfiguration struct { - // For array_to_object flattening - KeyField string `json:"key_field"` // field to use as key - ValueField string `json:"value_field"` // field to use as value - TypeField string `json:"type_field"` // optional field for value type conversion - - // For nested_object flattening - Separator string `json:"separator"` // separator for nested keys (default: ".") - MaxDepth int `json:"max_depth"` // maximum depth to flatten (-1 for unlimited) - Prefix string `json:"prefix"` // prefix for flattened keys - SkipArrays bool `json:"skip_arrays"` // skip array flattening - SkipObjects bool `json:"skip_objects"` // skip object flattening - - // For key_value_pairs flattening - PairSeparator string `json:"pair_separator"` // separator between key-value pairs - KVSeparator string `json:"kv_separator"` // separator between key and value - - // General options - OverwriteExisting bool `json:"overwrite_existing"` // overwrite existing keys - PreserveTypes bool `json:"preserve_types"` // preserve original data types -} - -func (f *FlattenHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { +func (h *FlattenHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} } - // Get source data - sourceData, exists := data[f.SourceField] - if !exists { - return mq.Result{Error: fmt.Errorf("source field '%s' not found", f.SourceField), Ctx: ctx} + operation, ok := h.Payload.Data["operation"].(string) + if !ok { + return mq.Result{Error: fmt.Errorf("operation not specified")} } - var result any - var err error - - switch f.FlattenType { - case "array_to_object": - result, err = f.flattenArrayToObject(sourceData) - case "nested_object": - result, err = f.flattenNestedObject(sourceData) - case "key_value_pairs": - result, err = f.flattenKeyValuePairs(sourceData) + var result map[string]any + switch operation { + case "flatten_settings": + result = h.flattenSettings(data) + case "flatten_key_value": + result = h.flattenKeyValue(data) + case "flatten_nested_objects": + result = h.flattenNestedObjects(data) + case "flatten_array": + result = h.flattenArray(data) default: - return mq.Result{Error: fmt.Errorf("unsupported flatten type: %s", f.FlattenType), Ctx: ctx} + return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)} } + resultPayload, err := json.Marshal(result) if err != nil { - return mq.Result{Error: err, Ctx: ctx} + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} } - // Set target field - targetField := f.TargetField - if targetField == "" { - targetField = f.SourceField // overwrite source if no target specified - } - data[targetField] = result - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} + return mq.Result{Payload: resultPayload, Ctx: ctx} } -func (f *FlattenHandler) flattenArrayToObject(data any) (map[string]any, error) { - // Convert to slice of maps - items, err := f.convertToSliceOfMaps(data) - if err != nil { - return nil, err - } - +// flattenSettings converts array of settings objects with key, value, value_type to a flat object +func (h *FlattenHandler) flattenSettings(data map[string]any) map[string]any { result := make(map[string]any) + sourceField := h.getSourceField() - for _, item := range items { - key, keyExists := item[f.Config.KeyField] - if !keyExists { - continue - } - - value, valueExists := item[f.Config.ValueField] - if !valueExists { - continue - } - - keyStr := fmt.Sprintf("%v", key) - - // Handle type conversion if type field is specified - if f.Config.TypeField != "" { - if typeValue, typeExists := item[f.Config.TypeField]; typeExists { - convertedValue, err := f.convertValueByType(value, fmt.Sprintf("%v", typeValue)) - if err == nil { - value = convertedValue - } - } - } - - // Check for overwrites - if !f.Config.OverwriteExisting { - if _, exists := result[keyStr]; exists { - continue // skip if key already exists - } - } - - result[keyStr] = value + // Copy all original data + for key, value := range data { + result[key] = value } - return result, nil -} + if settingsArray, ok := data[sourceField].([]interface{}); ok { + flattened := make(map[string]any) -func (f *FlattenHandler) flattenNestedObject(data any) (map[string]any, error) { - result := make(map[string]any) - f.flattenRecursive(data, "", result, 0) - return result, nil -} + for _, item := range settingsArray { + if setting, ok := item.(map[string]interface{}); ok { + key, keyExists := setting["key"].(string) + value, valueExists := setting["value"] + valueType, typeExists := setting["value_type"].(string) -func (f *FlattenHandler) flattenRecursive(data any, prefix string, result map[string]any, depth int) { - // Check depth limit - if f.Config.MaxDepth > 0 && depth >= f.Config.MaxDepth { - key := prefix - if key == "" { - key = "root" - } - result[key] = data - return - } - - rv := reflect.ValueOf(data) - if !rv.IsValid() { - return - } - - switch rv.Kind() { - case reflect.Map: - if f.Config.SkipObjects { - result[prefix] = data - return - } - - if rv.Type().Key().Kind() == reflect.String { - for _, key := range rv.MapKeys() { - keyStr := key.String() - value := rv.MapIndex(key).Interface() - - newPrefix := keyStr - if prefix != "" { - separator := f.Config.Separator - if separator == "" { - separator = "." + if keyExists && valueExists { + // Convert value based on value_type + if typeExists { + flattened[key] = h.convertValue(value, valueType) + } else { + flattened[key] = value } - newPrefix = prefix + separator + keyStr } - if f.Config.Prefix != "" { - newPrefix = f.Config.Prefix + newPrefix - } - - f.flattenRecursive(value, newPrefix, result, depth+1) } - } else { - result[prefix] = data } - case reflect.Slice, reflect.Array: - if f.Config.SkipArrays { - result[prefix] = data - return - } - - for i := 0; i < rv.Len(); i++ { - value := rv.Index(i).Interface() - newPrefix := fmt.Sprintf("%s[%d]", prefix, i) - f.flattenRecursive(value, newPrefix, result, depth+1) - } - - default: - if prefix == "" { - prefix = "value" - } - result[prefix] = data + targetField := h.getTargetField() + result[targetField] = flattened } + + return result } -func (f *FlattenHandler) flattenKeyValuePairs(data any) (map[string]any, error) { - str := fmt.Sprintf("%v", data) +// flattenKeyValue converts array of key-value objects to a flat object +func (h *FlattenHandler) flattenKeyValue(data map[string]any) map[string]any { result := make(map[string]any) + sourceField := h.getSourceField() + keyField := h.getKeyField() + valueField := h.getValueField() - pairSeparator := f.Config.PairSeparator - if pairSeparator == "" { - pairSeparator = "," + // Copy all original data + for key, value := range data { + result[key] = value } - kvSeparator := f.Config.KVSeparator - if kvSeparator == "" { - kvSeparator = "=" - } + if kvArray, ok := data[sourceField].([]interface{}); ok { + flattened := make(map[string]any) - pairs := strings.Split(str, pairSeparator) - for _, pair := range pairs { - pair = strings.TrimSpace(pair) - if pair == "" { - continue - } - - kv := strings.SplitN(pair, kvSeparator, 2) - if len(kv) != 2 { - continue - } - - key := strings.TrimSpace(kv[0]) - value := strings.TrimSpace(kv[1]) - - // Check for overwrites - if !f.Config.OverwriteExisting { - if _, exists := result[key]; exists { - continue + for _, item := range kvArray { + if kvPair, ok := item.(map[string]interface{}); ok { + if key, keyExists := kvPair[keyField]; keyExists { + if value, valueExists := kvPair[valueField]; valueExists { + if keyStr, ok := key.(string); ok { + flattened[keyStr] = value + } + } + } } } - // Try to preserve types if requested - if f.Config.PreserveTypes { - if convertedValue := f.tryConvertType(value); convertedValue != nil { - result[key] = convertedValue - } else { - result[key] = value - } - } else { + targetField := h.getTargetField() + result[targetField] = flattened + } + + return result +} + +// flattenNestedObjects flattens nested objects using dot notation +func (h *FlattenHandler) flattenNestedObjects(data map[string]any) map[string]any { + result := make(map[string]any) + separator := h.getSeparator() + + h.flattenRecursive(data, "", result, separator) + return result +} + +// flattenArray flattens arrays by creating numbered fields +func (h *FlattenHandler) flattenArray(data map[string]any) map[string]any { + result := make(map[string]any) + sourceField := h.getSourceField() + + // Copy all original data except the source field + for key, value := range data { + if key != sourceField { result[key] = value } } - return result, nil -} - -func (f *FlattenHandler) convertToSliceOfMaps(data any) ([]map[string]any, error) { - rv := reflect.ValueOf(data) - if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array { - return nil, fmt.Errorf("data must be an array or slice") + if array, ok := data[sourceField].([]interface{}); ok { + for i, item := range array { + if obj, ok := item.(map[string]interface{}); ok { + for key, value := range obj { + result[fmt.Sprintf("%s_%d_%s", sourceField, i, key)] = value + } + } else { + result[fmt.Sprintf("%s_%d", sourceField, i)] = item + } + } } - var items []map[string]any - for i := 0; i < rv.Len(); i++ { - item := rv.Index(i).Interface() - - // Convert item to map[string]any - itemMap := make(map[string]any) - itemBytes, err := json.Marshal(item) - if err != nil { - return nil, fmt.Errorf("failed to marshal item at index %d: %v", i, err) - } - - if err := json.Unmarshal(itemBytes, &itemMap); err != nil { - return nil, fmt.Errorf("failed to unmarshal item at index %d: %v", i, err) - } - - items = append(items, itemMap) - } - - return items, nil + return result } -func (f *FlattenHandler) convertValueByType(value any, typeStr string) (any, error) { - valueStr := fmt.Sprintf("%v", value) +func (h *FlattenHandler) flattenRecursive(obj map[string]any, prefix string, result map[string]any, separator string) { + for key, value := range obj { + newKey := key + if prefix != "" { + newKey = prefix + separator + key + } - switch strings.ToLower(typeStr) { - case "string", "str": - return valueStr, nil + switch v := value.(type) { + case map[string]interface{}: + nestedMap := make(map[string]any) + for k, val := range v { + nestedMap[k] = val + } + h.flattenRecursive(nestedMap, newKey, result, separator) + case []interface{}: + // For arrays, create numbered fields + for i, item := range v { + itemKey := fmt.Sprintf("%s%s%d", newKey, separator, i) + if itemMap, ok := item.(map[string]interface{}); ok { + nestedMap := make(map[string]any) + for k, val := range itemMap { + nestedMap[k] = val + } + h.flattenRecursive(nestedMap, itemKey, result, separator) + } else { + result[itemKey] = item + } + } + default: + result[newKey] = value + } + } +} + +func (h *FlattenHandler) convertValue(value interface{}, valueType string) interface{} { + switch valueType { + case "string": + return fmt.Sprintf("%v", value) case "int", "integer": - if i, err := fmt.Sscanf(valueStr, "%d", new(int)); err == nil && i == 1 { - var result int - fmt.Sscanf(valueStr, "%d", &result) - return result, nil + if str, ok := value.(string); ok { + var intVal int + fmt.Sscanf(str, "%d", &intVal) + return intVal } - case "float", "double", "number": - if i, err := fmt.Sscanf(valueStr, "%f", new(float64)); err == nil && i == 1 { - var result float64 - fmt.Sscanf(valueStr, "%f", &result) - return result, nil + return value + case "float", "number": + if str, ok := value.(string); ok { + var floatVal float64 + fmt.Sscanf(str, "%f", &floatVal) + return floatVal } + return value case "bool", "boolean": - lower := strings.ToLower(valueStr) - return lower == "true" || lower == "yes" || lower == "1" || lower == "on", nil - case "json": - var result any - if err := json.Unmarshal([]byte(valueStr), &result); err == nil { - return result, nil + if str, ok := value.(string); ok { + return str == "true" || str == "1" } + return value + case "json": + if str, ok := value.(string); ok { + var jsonVal interface{} + if err := json.Unmarshal([]byte(str), &jsonVal); err == nil { + return jsonVal + } + } + return value + default: + return value } - - return value, fmt.Errorf("unable to convert to type %s", typeStr) } -func (f *FlattenHandler) tryConvertType(value string) any { - // Try int - var intVal int - if n, err := fmt.Sscanf(value, "%d", &intVal); err == nil && n == 1 { - return intVal +func (h *FlattenHandler) getSourceField() string { + if field, ok := h.Payload.Data["source_field"].(string); ok { + return field } - - // Try float - var floatVal float64 - if n, err := fmt.Sscanf(value, "%f", &floatVal); err == nil && n == 1 { - return floatVal - } - - // Try bool - lower := strings.ToLower(value) - if lower == "true" || lower == "false" { - return lower == "true" - } - - // Try JSON - var jsonVal any - if err := json.Unmarshal([]byte(value), &jsonVal); err == nil { - return jsonVal - } - - return nil // Unable to convert, return nil to use original string + return "settings" // Default } -// Factory functions -func NewArrayToObjectFlattener(id, sourceField, targetField, keyField, valueField string, config FlattenConfiguration) *FlattenHandler { - config.KeyField = keyField - config.ValueField = valueField +func (h *FlattenHandler) getTargetField() string { + if field, ok := h.Payload.Data["target_field"].(string); ok { + return field + } + return "flattened" // Default +} +func (h *FlattenHandler) getKeyField() string { + if field, ok := h.Payload.Data["key_field"].(string); ok { + return field + } + return "key" // Default +} + +func (h *FlattenHandler) getValueField() string { + if field, ok := h.Payload.Data["value_field"].(string); ok { + return field + } + return "value" // Default +} + +func (h *FlattenHandler) getSeparator() string { + if sep, ok := h.Payload.Data["separator"].(string); ok { + return sep + } + return "." // Default separator for flattening +} + +func NewFlattenHandler(id string) *FlattenHandler { return &FlattenHandler{ - Operation: dag.Operation{ - ID: id, - Key: "flatten_array_to_object", - Type: dag.Function, - Tags: []string{"data", "flatten", "array", "object"}, - }, - FlattenType: "array_to_object", - SourceField: sourceField, - TargetField: targetField, - Config: config, - } -} - -func NewNestedObjectFlattener(id, sourceField, targetField string, config FlattenConfiguration) *FlattenHandler { - return &FlattenHandler{ - Operation: dag.Operation{ - ID: id, - Key: "flatten_nested_object", - Type: dag.Function, - Tags: []string{"data", "flatten", "nested", "object"}, - }, - FlattenType: "nested_object", - SourceField: sourceField, - TargetField: targetField, - Config: config, - } -} - -func NewKeyValuePairsFlattener(id, sourceField, targetField string, config FlattenConfiguration) *FlattenHandler { - return &FlattenHandler{ - Operation: dag.Operation{ - ID: id, - Key: "flatten_key_value_pairs", - Type: dag.Function, - Tags: []string{"data", "flatten", "key-value", "string"}, - }, - FlattenType: "key_value_pairs", - SourceField: sourceField, - TargetField: targetField, - Config: config, + Operation: dag.Operation{ID: id, Key: "flatten", Type: dag.Function, Tags: []string{"data", "transformation", "flatten"}}, } } diff --git a/handlers/format_handler.go b/handlers/format_handler.go index 3e371ac..4a75b3b 100644 --- a/handlers/format_handler.go +++ b/handlers/format_handler.go @@ -3,7 +3,6 @@ package handlers import ( "context" "fmt" - "reflect" "strconv" "strings" "time" @@ -13,302 +12,249 @@ import ( "github.com/oarkflow/mq/dag" ) -// FormatHandler handles various data formatting operations +// FormatHandler handles data formatting operations type FormatHandler struct { dag.Operation - FormatType string `json:"format_type"` // date, number, string, currency, etc. - SourceField string `json:"source_field"` // field to format - TargetField string `json:"target_field"` // field to store formatted result - FormatConfig map[string]string `json:"format_config"` // format-specific configuration } -func (f *FormatHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { +func (h *FormatHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} } - // Get source value - sourceValue, exists := data[f.SourceField] - if !exists { - return mq.Result{Error: fmt.Errorf("source field '%s' not found", f.SourceField), Ctx: ctx} + formatType, ok := h.Payload.Data["format_type"].(string) + if !ok { + return mq.Result{Error: fmt.Errorf("format_type not specified")} } - // Format based on type - var formattedValue any - var err error - - switch f.FormatType { - case "date": - formattedValue, err = f.formatDate(sourceValue) - case "number": - formattedValue, err = f.formatNumber(sourceValue) - case "currency": - formattedValue, err = f.formatCurrency(sourceValue) + var result map[string]any + switch formatType { case "string": - formattedValue, err = f.formatString(sourceValue) - case "boolean": - formattedValue, err = f.formatBoolean(sourceValue) - case "array": - formattedValue, err = f.formatArray(sourceValue) - default: - return mq.Result{Error: fmt.Errorf("unsupported format type: %s", f.FormatType), Ctx: ctx} - } - - if err != nil { - return mq.Result{Error: err, Ctx: ctx} - } - - // Set target field - targetField := f.TargetField - if targetField == "" { - targetField = f.SourceField // overwrite source if no target specified - } - data[targetField] = formattedValue - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} -} - -func (f *FormatHandler) formatDate(value any) (string, error) { - var t time.Time - var err error - - switch v := value.(type) { - case string: - // Try parsing various date formats - formats := []string{ - time.RFC3339, - "2006-01-02 15:04:05", - "2006-01-02", - "01/02/2006", - "02-01-2006", - "2006/01/02", - } - - for _, format := range formats { - if t, err = time.Parse(format, v); err == nil { - break - } - } - if err != nil { - return "", fmt.Errorf("unable to parse date string: %s", v) - } - case time.Time: - t = v - case int64: - t = time.Unix(v, 0) - case float64: - t = time.Unix(int64(v), 0) - default: - return "", fmt.Errorf("unsupported date type: %T", value) - } - - // Get output format from config - outputFormat := f.FormatConfig["output_format"] - if outputFormat == "" { - outputFormat = "2006-01-02 15:04:05" // default format - } - - return t.Format(outputFormat), nil -} - -func (f *FormatHandler) formatNumber(value any) (string, error) { - var num float64 - var err error - - switch v := value.(type) { - case string: - num, err = strconv.ParseFloat(v, 64) - if err != nil { - return "", fmt.Errorf("unable to parse number string: %s", v) - } - case int: - num = float64(v) - case int32: - num = float64(v) - case int64: - num = float64(v) - case float32: - num = float64(v) - case float64: - num = v - default: - return "", fmt.Errorf("unsupported number type: %T", value) - } - - // Get precision from config - precision := 2 - if p, exists := f.FormatConfig["precision"]; exists { - if parsed, err := strconv.Atoi(p); err == nil { - precision = parsed - } - } - - // Get format style - style := f.FormatConfig["style"] - switch style { - case "scientific": - return fmt.Sprintf("%e", num), nil - case "percentage": - return fmt.Sprintf("%."+strconv.Itoa(precision)+"f%%", num*100), nil - default: - return fmt.Sprintf("%."+strconv.Itoa(precision)+"f", num), nil - } -} - -func (f *FormatHandler) formatCurrency(value any) (string, error) { - num, err := f.formatNumber(value) - if err != nil { - return "", err - } - - symbol := f.FormatConfig["symbol"] - if symbol == "" { - symbol = "$" // default currency symbol - } - - position := f.FormatConfig["position"] - if position == "suffix" { - return num + " " + symbol, nil - } - return symbol + num, nil -} - -func (f *FormatHandler) formatString(value any) (string, error) { - str := fmt.Sprintf("%v", value) - - operation := f.FormatConfig["operation"] - switch operation { + result = h.formatToString(data) + case "number": + result = h.formatToNumber(data) + case "date": + result = h.formatDate(data) + case "currency": + result = h.formatCurrency(data) case "uppercase": - return strings.ToUpper(str), nil + result = h.formatUppercase(data) case "lowercase": - return strings.ToLower(str), nil - case "title": - return strings.Title(str), nil + result = h.formatLowercase(data) + case "capitalize": + result = h.formatCapitalize(data) case "trim": - return strings.TrimSpace(str), nil - case "truncate": - if lengthStr, exists := f.FormatConfig["length"]; exists { - if length, err := strconv.Atoi(lengthStr); err == nil && len(str) > length { - return str[:length] + "...", nil + result = h.formatTrim(data) + default: + return mq.Result{Error: fmt.Errorf("unsupported format_type: %s", formatType)} + } + + resultPayload, err := json.Marshal(result) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} + } + + return mq.Result{Payload: resultPayload, Ctx: ctx} +} + +func (h *FormatHandler) formatToString(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + result[key] = fmt.Sprintf("%v", value) + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatToNumber(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if str, ok := value.(string); ok { + if num, err := strconv.ParseFloat(str, 64); err == nil { + result[key] = num + } else { + result[key] = value // Keep original if conversion fails + } + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatDate(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + dateFormat := h.getDateFormat() + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if str, ok := value.(string); ok { + if t, err := time.Parse(time.RFC3339, str); err == nil { + result[key] = t.Format(dateFormat) + } else if t, err := time.Parse("2006-01-02", str); err == nil { + result[key] = t.Format(dateFormat) + } else { + result[key] = value // Keep original if parsing fails + } + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatCurrency(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + currency := h.getCurrency() + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if num, ok := value.(float64); ok { + result[key] = fmt.Sprintf("%s%.2f", currency, num) + } else if str, ok := value.(string); ok { + if num, err := strconv.ParseFloat(str, 64); err == nil { + result[key] = fmt.Sprintf("%s%.2f", currency, num) + } else { + result[key] = value + } + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatUppercase(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if str, ok := value.(string); ok { + result[key] = strings.ToUpper(str) + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatLowercase(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if str, ok := value.(string); ok { + result[key] = strings.ToLower(str) + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatCapitalize(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if str, ok := value.(string); ok { + result[key] = strings.Title(strings.ToLower(str)) + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) formatTrim(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields(data) + + for key, value := range data { + if len(fields) == 0 || contains(fields, key) { + if str, ok := value.(string); ok { + result[key] = strings.TrimSpace(str) + } else { + result[key] = value + } + } else { + result[key] = value + } + } + return result +} + +func (h *FormatHandler) getTargetFields(data map[string]any) []string { + if fields, ok := h.Payload.Data["fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) } } - return str, nil - default: - return str, nil + return result } + return nil } -func (f *FormatHandler) formatBoolean(value any) (string, error) { - var boolVal bool - - switch v := value.(type) { - case bool: - boolVal = v - case string: - lower := strings.ToLower(v) - boolVal = lower == "true" || lower == "yes" || lower == "1" || lower == "on" - case int, int32, int64: - boolVal = reflect.ValueOf(v).Int() != 0 - case float32, float64: - boolVal = reflect.ValueOf(v).Float() != 0 - default: - return "", fmt.Errorf("unsupported boolean type: %T", value) +func (h *FormatHandler) getDateFormat() string { + if format, ok := h.Payload.Data["date_format"].(string); ok { + return format } - - trueValue := f.FormatConfig["true_value"] - falseValue := f.FormatConfig["false_value"] - - if trueValue == "" { - trueValue = "true" - } - if falseValue == "" { - falseValue = "false" - } - - if boolVal { - return trueValue, nil - } - return falseValue, nil + return "2006-01-02" // Default date format } -func (f *FormatHandler) formatArray(value any) (string, error) { - rv := reflect.ValueOf(value) - if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array { - return "", fmt.Errorf("value is not an array or slice") +func (h *FormatHandler) getCurrency() string { + if currency, ok := h.Payload.Data["currency"].(string); ok { + return currency } - - separator := f.FormatConfig["separator"] - if separator == "" { - separator = ", " - } - - var elements []string - for i := 0; i < rv.Len(); i++ { - elements = append(elements, fmt.Sprintf("%v", rv.Index(i).Interface())) - } - - return strings.Join(elements, separator), nil + return "$" // Default currency symbol } -// Factory functions for different format types -func NewDateFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler { +func contains(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} + +func NewFormatHandler(id string) *FormatHandler { return &FormatHandler{ - Operation: dag.Operation{ - ID: id, - Key: "format_date", - Type: dag.Function, - Tags: []string{"data", "format", "date"}, - }, - FormatType: "date", - SourceField: sourceField, - TargetField: targetField, - FormatConfig: config, - } -} - -func NewNumberFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler { - return &FormatHandler{ - Operation: dag.Operation{ - ID: id, - Key: "format_number", - Type: dag.Function, - Tags: []string{"data", "format", "number"}, - }, - FormatType: "number", - SourceField: sourceField, - TargetField: targetField, - FormatConfig: config, - } -} - -func NewCurrencyFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler { - return &FormatHandler{ - Operation: dag.Operation{ - ID: id, - Key: "format_currency", - Type: dag.Function, - Tags: []string{"data", "format", "currency"}, - }, - FormatType: "currency", - SourceField: sourceField, - TargetField: targetField, - FormatConfig: config, - } -} - -func NewStringFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler { - return &FormatHandler{ - Operation: dag.Operation{ - ID: id, - Key: "format_string", - Type: dag.Function, - Tags: []string{"data", "format", "string"}, - }, - FormatType: "string", - SourceField: sourceField, - TargetField: targetField, - FormatConfig: config, + Operation: dag.Operation{ID: id, Key: "format", Type: dag.Function, Tags: []string{"data", "transformation"}}, } } diff --git a/handlers/group_handler.go b/handlers/group_handler.go new file mode 100644 index 0000000..a0d5142 --- /dev/null +++ b/handlers/group_handler.go @@ -0,0 +1,280 @@ +package handlers + +import ( + "context" + "fmt" + "sort" + + "github.com/oarkflow/json" + "github.com/oarkflow/mq" + "github.com/oarkflow/mq/dag" +) + +// GroupHandler handles data grouping operations with aggregation +type GroupHandler struct { + dag.Operation +} + +func (h *GroupHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { + var data map[string]any + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} + } + + // Extract the data array + dataArray, ok := data["data"].([]interface{}) + if !ok { + return mq.Result{Error: fmt.Errorf("expected 'data' field to be an array")} + } + + groupByFields := h.getGroupByFields() + if len(groupByFields) == 0 { + return mq.Result{Error: fmt.Errorf("group_by fields not specified")} + } + + aggregations := h.getAggregations() + result := h.groupData(dataArray, groupByFields, aggregations) + + // Update the data with grouped result + data["data"] = result + data["grouped"] = true + data["group_count"] = len(result) + + resultPayload, err := json.Marshal(data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} + } + + return mq.Result{Payload: resultPayload, Ctx: ctx} +} + +func (h *GroupHandler) groupData(dataArray []interface{}, groupByFields []string, aggregations map[string]string) []map[string]any { + groups := make(map[string][]map[string]any) + + // Group data by specified fields + for _, item := range dataArray { + record, ok := item.(map[string]any) + if !ok { + continue + } + + // Create group key + groupKey := h.createGroupKey(record, groupByFields) + groups[groupKey] = append(groups[groupKey], record) + } + + // Apply aggregations + var result []map[string]any + for _, records := range groups { + groupResult := make(map[string]any) + + // Add group by fields to result + if len(records) > 0 { + for _, field := range groupByFields { + groupResult[field] = records[0][field] + } + } + + // Apply aggregations + for field, aggType := range aggregations { + switch aggType { + case "count": + groupResult[field+"_count"] = len(records) + case "sum": + groupResult[field+"_sum"] = h.sumField(records, field) + case "avg", "average": + sum := h.sumField(records, field) + if count := len(records); count > 0 { + groupResult[field+"_avg"] = sum / float64(count) + } + case "min": + groupResult[field+"_min"] = h.minField(records, field) + case "max": + groupResult[field+"_max"] = h.maxField(records, field) + case "first": + if len(records) > 0 { + groupResult[field+"_first"] = records[0][field] + } + case "last": + if len(records) > 0 { + groupResult[field+"_last"] = records[len(records)-1][field] + } + case "concat": + groupResult[field+"_concat"] = h.concatField(records, field) + case "unique": + groupResult[field+"_unique"] = h.uniqueField(records, field) + } + } + + // Add record count + groupResult["_record_count"] = len(records) + + result = append(result, groupResult) + } + + // Sort results for consistent output + sort.Slice(result, func(i, j int) bool { + for _, field := range groupByFields { + if fmt.Sprintf("%v", result[i][field]) < fmt.Sprintf("%v", result[j][field]) { + return true + } else if fmt.Sprintf("%v", result[i][field]) > fmt.Sprintf("%v", result[j][field]) { + return false + } + } + return false + }) + + return result +} + +func (h *GroupHandler) createGroupKey(record map[string]any, fields []string) string { + var keyParts []string + for _, field := range fields { + keyParts = append(keyParts, fmt.Sprintf("%v", record[field])) + } + return fmt.Sprintf("%v", keyParts) +} + +func (h *GroupHandler) sumField(records []map[string]any, field string) float64 { + var sum float64 + for _, record := range records { + if val, ok := record[field]; ok { + switch v := val.(type) { + case float64: + sum += v + case int: + sum += float64(v) + case int64: + sum += float64(v) + } + } + } + return sum +} + +func (h *GroupHandler) minField(records []map[string]any, field string) interface{} { + if len(records) == 0 { + return nil + } + + var min interface{} + for _, record := range records { + if val, ok := record[field]; ok { + if min == nil { + min = val + } else { + if h.compareValues(val, min) < 0 { + min = val + } + } + } + } + return min +} + +func (h *GroupHandler) maxField(records []map[string]any, field string) interface{} { + if len(records) == 0 { + return nil + } + + var max interface{} + for _, record := range records { + if val, ok := record[field]; ok { + if max == nil { + max = val + } else { + if h.compareValues(val, max) > 0 { + max = val + } + } + } + } + return max +} + +func (h *GroupHandler) concatField(records []map[string]any, field string) string { + var values []string + separator := h.getConcatSeparator() + + for _, record := range records { + if val, ok := record[field]; ok && val != nil { + values = append(values, fmt.Sprintf("%v", val)) + } + } + + result := "" + for i, val := range values { + if i > 0 { + result += separator + } + result += val + } + return result +} + +func (h *GroupHandler) uniqueField(records []map[string]any, field string) []interface{} { + seen := make(map[string]bool) + var unique []interface{} + + for _, record := range records { + if val, ok := record[field]; ok && val != nil { + key := fmt.Sprintf("%v", val) + if !seen[key] { + seen[key] = true + unique = append(unique, val) + } + } + } + + return unique +} + +func (h *GroupHandler) compareValues(a, b interface{}) int { + aStr := fmt.Sprintf("%v", a) + bStr := fmt.Sprintf("%v", b) + if aStr < bStr { + return -1 + } else if aStr > bStr { + return 1 + } + return 0 +} + +func (h *GroupHandler) getGroupByFields() []string { + if fields, ok := h.Payload.Data["group_by"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *GroupHandler) getAggregations() map[string]string { + result := make(map[string]string) + if aggs, ok := h.Payload.Data["aggregations"].(map[string]interface{}); ok { + for field, aggType := range aggs { + if str, ok := aggType.(string); ok { + result[field] = str + } + } + } + return result +} + +func (h *GroupHandler) getConcatSeparator() string { + if sep, ok := h.Payload.Data["concat_separator"].(string); ok { + return sep + } + return ", " // Default separator +} + +func NewGroupHandler(id string) *GroupHandler { + return &GroupHandler{ + Operation: dag.Operation{ID: id, Key: "group", Type: dag.Function, Tags: []string{"data", "aggregation"}}, + } +} diff --git a/handlers/grouping_handler.go b/handlers/grouping_handler.go deleted file mode 100644 index b02bf30..0000000 --- a/handlers/grouping_handler.go +++ /dev/null @@ -1,338 +0,0 @@ -package handlers - -import ( - "context" - "fmt" - "reflect" - "sort" - "strconv" - "strings" - - "github.com/oarkflow/json" - "github.com/oarkflow/mq" - "github.com/oarkflow/mq/dag" -) - -// GroupingHandler groups data by specified fields and applies aggregations -type GroupingHandler struct { - dag.Operation - GroupByFields []string `json:"group_by_fields"` // fields to group by - Aggregations []AggregationConfig `json:"aggregations"` // aggregation configurations - SourceField string `json:"source_field"` // field containing array to group - TargetField string `json:"target_field"` // field to store grouped result - Options GroupingOptions `json:"options"` // additional options -} - -type AggregationConfig struct { - Field string `json:"field"` // field to aggregate - Operation string `json:"operation"` // sum, count, avg, min, max, concat, first, last - Alias string `json:"alias"` // optional alias for result field -} - -type GroupingOptions struct { - SortBy string `json:"sort_by"` // field to sort groups by - SortDirection string `json:"sort_direction"` // asc or desc - IncludeCount bool `json:"include_count"` // include count of items in each group - CountAlias string `json:"count_alias"` // alias for count field (default: "count") -} - -func (g *GroupingHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { - var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} - } - - // Get source data - sourceData, exists := data[g.SourceField] - if !exists { - return mq.Result{Error: fmt.Errorf("source field '%s' not found", g.SourceField), Ctx: ctx} - } - - // Convert to slice of maps - items, err := g.convertToSliceOfMaps(sourceData) - if err != nil { - return mq.Result{Error: err, Ctx: ctx} - } - - // Group the data - groups := g.groupData(items) - - // Apply aggregations - result := g.applyAggregations(groups) - - // Sort if requested - if g.Options.SortBy != "" { - result = g.sortGroups(result) - } - - // Set target field - targetField := g.TargetField - if targetField == "" { - targetField = "grouped_data" - } - data[targetField] = result - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} -} - -func (g *GroupingHandler) convertToSliceOfMaps(data any) ([]map[string]any, error) { - rv := reflect.ValueOf(data) - - if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array { - return nil, fmt.Errorf("source data must be an array or slice") - } - - var items []map[string]any - for i := 0; i < rv.Len(); i++ { - item := rv.Index(i).Interface() - - // Convert item to map[string]any - itemMap := make(map[string]any) - itemBytes, err := json.Marshal(item) - if err != nil { - return nil, fmt.Errorf("failed to marshal item at index %d: %v", i, err) - } - - if err := json.Unmarshal(itemBytes, &itemMap); err != nil { - return nil, fmt.Errorf("failed to unmarshal item at index %d: %v", i, err) - } - - items = append(items, itemMap) - } - - return items, nil -} - -func (g *GroupingHandler) groupData(items []map[string]any) map[string][]map[string]any { - groups := make(map[string][]map[string]any) - - for _, item := range items { - // Create group key - var keyParts []string - for _, field := range g.GroupByFields { - value := fmt.Sprintf("%v", item[field]) - keyParts = append(keyParts, value) - } - groupKey := strings.Join(keyParts, "|") - - // Add item to group - groups[groupKey] = append(groups[groupKey], item) - } - - return groups -} - -func (g *GroupingHandler) applyAggregations(groups map[string][]map[string]any) []map[string]any { - var result []map[string]any - - for groupKey, items := range groups { - groupResult := make(map[string]any) - - // Add group key fields - keyParts := strings.Split(groupKey, "|") - for i, field := range g.GroupByFields { - if i < len(keyParts) { - groupResult[field] = keyParts[i] - } - } - - // Add count if requested - if g.Options.IncludeCount { - countAlias := g.Options.CountAlias - if countAlias == "" { - countAlias = "count" - } - groupResult[countAlias] = len(items) - } - - // Apply aggregations - for _, agg := range g.Aggregations { - fieldAlias := agg.Alias - if fieldAlias == "" { - fieldAlias = agg.Field + "_" + agg.Operation - } - - aggregatedValue := g.performAggregation(items, agg) - groupResult[fieldAlias] = aggregatedValue - } - - result = append(result, groupResult) - } - - return result -} - -func (g *GroupingHandler) performAggregation(items []map[string]any, agg AggregationConfig) any { - switch agg.Operation { - case "count": - return len(items) - case "sum": - return g.sumValues(items, agg.Field) - case "avg": - sum := g.sumValues(items, agg.Field) - if count := len(items); count > 0 { - return sum / float64(count) - } - return 0 - case "min": - return g.minValue(items, agg.Field) - case "max": - return g.maxValue(items, agg.Field) - case "first": - if len(items) > 0 { - return items[0][agg.Field] - } - return nil - case "last": - if len(items) > 0 { - return items[len(items)-1][agg.Field] - } - return nil - case "concat": - return g.concatValues(items, agg.Field) - case "unique": - return g.uniqueValues(items, agg.Field) - default: - return nil - } -} - -func (g *GroupingHandler) sumValues(items []map[string]any, field string) float64 { - var sum float64 - for _, item := range items { - if value, exists := item[field]; exists { - if num := g.toFloat64(value); num != 0 { - sum += num - } - } - } - return sum -} - -func (g *GroupingHandler) minValue(items []map[string]any, field string) any { - var min any - for _, item := range items { - if value, exists := item[field]; exists { - if min == nil { - min = value - } else { - if g.compareValues(value, min) < 0 { - min = value - } - } - } - } - return min -} - -func (g *GroupingHandler) maxValue(items []map[string]any, field string) any { - var max any - for _, item := range items { - if value, exists := item[field]; exists { - if max == nil { - max = value - } else { - if g.compareValues(value, max) > 0 { - max = value - } - } - } - } - return max -} - -func (g *GroupingHandler) concatValues(items []map[string]any, field string) string { - var values []string - for _, item := range items { - if value, exists := item[field]; exists { - values = append(values, fmt.Sprintf("%v", value)) - } - } - return strings.Join(values, ", ") -} - -func (g *GroupingHandler) uniqueValues(items []map[string]any, field string) []any { - seen := make(map[string]bool) - var unique []any - - for _, item := range items { - if value, exists := item[field]; exists { - key := fmt.Sprintf("%v", value) - if !seen[key] { - seen[key] = true - unique = append(unique, value) - } - } - } - return unique -} - -func (g *GroupingHandler) toFloat64(value any) float64 { - switch v := value.(type) { - case int: - return float64(v) - case int32: - return float64(v) - case int64: - return float64(v) - case float32: - return float64(v) - case float64: - return v - case string: - if num, err := strconv.ParseFloat(v, 64); err == nil { - return num - } - } - return 0 -} - -func (g *GroupingHandler) compareValues(a, b any) int { - aFloat := g.toFloat64(a) - bFloat := g.toFloat64(b) - - if aFloat < bFloat { - return -1 - } else if aFloat > bFloat { - return 1 - } - - // If numeric comparison doesn't work, compare as strings - aStr := fmt.Sprintf("%v", a) - bStr := fmt.Sprintf("%v", b) - return strings.Compare(aStr, bStr) -} - -func (g *GroupingHandler) sortGroups(groups []map[string]any) []map[string]any { - sort.Slice(groups, func(i, j int) bool { - valueI := groups[i][g.Options.SortBy] - valueJ := groups[j][g.Options.SortBy] - - comparison := g.compareValues(valueI, valueJ) - - if g.Options.SortDirection == "desc" { - return comparison > 0 - } - return comparison < 0 - }) - - return groups -} - -// Factory function -func NewGroupingHandler(id, sourceField, targetField string, groupByFields []string, aggregations []AggregationConfig, options GroupingOptions) *GroupingHandler { - return &GroupingHandler{ - Operation: dag.Operation{ - ID: id, - Key: "group_data", - Type: dag.Function, - Tags: []string{"data", "grouping", "aggregation"}, - }, - GroupByFields: groupByFields, - Aggregations: aggregations, - SourceField: sourceField, - TargetField: targetField, - Options: options, - } -} diff --git a/handlers/json_handler.go b/handlers/json_handler.go index 51387d1..8cf9f30 100644 --- a/handlers/json_handler.go +++ b/handlers/json_handler.go @@ -3,364 +3,343 @@ package handlers import ( "context" "fmt" - "strings" "github.com/oarkflow/json" "github.com/oarkflow/mq" "github.com/oarkflow/mq/dag" ) -// JSONHandler handles JSON parsing and stringification operations +// JSONHandler handles JSON parsing and stringifying operations type JSONHandler struct { dag.Operation - OperationType string `json:"operation_type"` // "parse" or "stringify" - SourceField string `json:"source_field"` // field containing data to process - TargetField string `json:"target_field"` // field to store result - Options JSONOptions `json:"options"` // processing options } -type JSONOptions struct { - Pretty bool `json:"pretty"` // pretty print JSON (stringify only) - Indent string `json:"indent"` // indentation string (stringify only) - EscapeHTML bool `json:"escape_html"` // escape HTML in JSON strings (stringify only) - ValidateOnly bool `json:"validate_only"` // only validate, don't parse (parse only) - ErrorOnInvalid bool `json:"error_on_invalid"` // return error if JSON is invalid - DefaultOnError any `json:"default_on_error"` // default value to use if parsing fails - StrictMode bool `json:"strict_mode"` // strict JSON parsing - AllowComments bool `json:"allow_comments"` // allow comments in JSON (parse only) - AllowTrailing bool `json:"allow_trailing"` // allow trailing commas (parse only) -} - -func (j *JSONHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { +func (h *JSONHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} } - // Get source value - sourceValue, exists := data[j.SourceField] - if !exists { - return mq.Result{Error: fmt.Errorf("source field '%s' not found", j.SourceField), Ctx: ctx} + operation, ok := h.Payload.Data["operation"].(string) + if !ok { + return mq.Result{Error: fmt.Errorf("operation not specified")} } - var result any - var err error - - switch j.OperationType { - case "parse": - result, err = j.parseJSON(sourceValue) - case "stringify": - result, err = j.stringifyJSON(sourceValue) + var result map[string]any + switch operation { + case "parse", "string_to_json": + result = h.parseJSON(data) + case "stringify", "json_to_string": + result = h.stringifyJSON(data) + case "pretty_print": + result = h.prettyPrintJSON(data) + case "minify": + result = h.minifyJSON(data) + case "validate": + result = h.validateJSON(data) + case "extract_fields": + result = h.extractFields(data) default: - return mq.Result{Error: fmt.Errorf("unsupported operation type: %s", j.OperationType), Ctx: ctx} + return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)} } + resultPayload, err := json.Marshal(result) if err != nil { - if j.Options.ErrorOnInvalid { - return mq.Result{Error: err, Ctx: ctx} - } - // Use default value if specified - if j.Options.DefaultOnError != nil { - result = j.Options.DefaultOnError - } else { - result = sourceValue // keep original value - } + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} } - // Set target field - targetField := j.TargetField - if targetField == "" { - targetField = j.SourceField // overwrite source if no target specified - } - data[targetField] = result - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} + return mq.Result{Payload: resultPayload, Ctx: ctx} } -func (j *JSONHandler) parseJSON(value any) (any, error) { - // Convert value to string - jsonStr := fmt.Sprintf("%v", value) +func (h *JSONHandler) parseJSON(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() - // Validate only if requested - if j.Options.ValidateOnly { - var temp any - err := json.Unmarshal([]byte(jsonStr), &temp) - if err != nil { - return false, fmt.Errorf("invalid JSON: %v", err) + // Copy all original data + for key, value := range data { + result[key] = value + } + + for _, field := range fields { + if val, ok := data[field]; ok { + if str, ok := val.(string); ok { + var parsed interface{} + if err := json.Unmarshal([]byte(str), &parsed); err == nil { + targetField := h.getTargetFieldForSource(field) + result[targetField] = parsed + result[field+"_parsed"] = true + } else { + result[field+"_parse_error"] = err.Error() + result[field+"_parsed"] = false + } + } } - return true, nil - } - - // Preprocess if needed - if j.Options.AllowComments { - jsonStr = j.removeComments(jsonStr) - } - - if j.Options.AllowTrailing { - jsonStr = j.removeTrailingCommas(jsonStr) - } - - // Parse JSON - var result any - err := json.Unmarshal([]byte(jsonStr), &result) - if err != nil { - return nil, fmt.Errorf("failed to parse JSON: %v", err) - } - - return result, nil -} - -func (j *JSONHandler) stringifyJSON(value any) (string, error) { - var result []byte - var err error - - if j.Options.Pretty { - indent := j.Options.Indent - if indent == "" { - indent = " " // default indentation - } - result, err = json.MarshalIndent(value, "", indent) - } else { - result, err = json.Marshal(value) - } - - if err != nil { - return "", fmt.Errorf("failed to stringify JSON: %v", err) - } - - return string(result), nil -} - -func (j *JSONHandler) removeComments(jsonStr string) string { - lines := strings.Split(jsonStr, "\n") - var cleanLines []string - - for _, line := range lines { - // Remove single-line comments - if commentIndex := strings.Index(line, "//"); commentIndex != -1 { - line = line[:commentIndex] - } - cleanLines = append(cleanLines, line) - } - - result := strings.Join(cleanLines, "\n") - - // Remove multi-line comments (basic implementation) - for { - start := strings.Index(result, "/*") - if start == -1 { - break - } - end := strings.Index(result[start:], "*/") - if end == -1 { - break - } - result = result[:start] + result[start+end+2:] } return result } -func (j *JSONHandler) removeTrailingCommas(jsonStr string) string { - // Basic implementation - remove commas before closing brackets/braces - jsonStr = strings.ReplaceAll(jsonStr, ",}", "}") - jsonStr = strings.ReplaceAll(jsonStr, ",]", "]") - return jsonStr -} - -// Advanced JSON handler for complex operations -type AdvancedJSONHandler struct { - dag.Operation - Operations []JSONOperation `json:"operations"` // chain of JSON operations -} - -type JSONOperation struct { - Type string `json:"type"` // "parse", "stringify", "validate", "extract", "merge" - SourceField string `json:"source_field"` // field to operate on - TargetField string `json:"target_field"` // field to store result - Options JSONOptions `json:"options"` // operation options - Path string `json:"path"` // JSON path for extraction (extract only) - MergeWith string `json:"merge_with"` // field to merge with (merge only) -} - -func (a *AdvancedJSONHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { - var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} - } - - // Execute operations in sequence - for i, op := range a.Operations { - var result any - var err error - - switch op.Type { - case "parse", "stringify": - handler := &JSONHandler{ - OperationType: op.Type, - SourceField: op.SourceField, - TargetField: op.TargetField, - Options: op.Options, - } - - tempData, _ := json.Marshal(data) - tempTask := &mq.Task{Payload: tempData} - - handlerResult := handler.ProcessTask(ctx, tempTask) - if handlerResult.Error != nil { - return mq.Result{Error: fmt.Errorf("operation %d failed: %v", i+1, handlerResult.Error), Ctx: ctx} - } - - if err := json.Unmarshal(handlerResult.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal result from operation %d: %v", i+1, err), Ctx: ctx} - } - continue - - case "validate": - result, err = a.validateJSON(data[op.SourceField]) - case "extract": - result, err = a.extractFromJSON(data[op.SourceField], op.Path) - case "merge": - result, err = a.mergeJSON(data[op.SourceField], data[op.MergeWith]) - default: - return mq.Result{Error: fmt.Errorf("unsupported operation type: %s", op.Type), Ctx: ctx} - } - - if err != nil { - if op.Options.ErrorOnInvalid { - return mq.Result{Error: fmt.Errorf("operation %d failed: %v", i+1, err), Ctx: ctx} - } - result = op.Options.DefaultOnError - } - - // Set target field - targetField := op.TargetField - if targetField == "" { - targetField = op.SourceField - } - data[targetField] = result - } - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} -} - -func (a *AdvancedJSONHandler) validateJSON(value any) (bool, error) { - jsonStr := fmt.Sprintf("%v", value) - var temp any - err := json.Unmarshal([]byte(jsonStr), &temp) - return err == nil, err -} - -func (a *AdvancedJSONHandler) extractFromJSON(value any, path string) (any, error) { - // Basic JSON path extraction (simplified implementation) - // For production use, consider using a proper JSON path library - - var jsonData any - if str, ok := value.(string); ok { - if err := json.Unmarshal([]byte(str), &jsonData); err != nil { - return nil, fmt.Errorf("invalid JSON: %v", err) - } - } else { - jsonData = value - } - - // Split path and navigate - parts := strings.Split(strings.Trim(path, "."), ".") - current := jsonData - - for _, part := range parts { - if part == "" { - continue - } - - switch v := current.(type) { - case map[string]any: - current = v[part] - default: - return nil, fmt.Errorf("cannot navigate path '%s' at part '%s'", path, part) - } - } - - return current, nil -} - -func (a *AdvancedJSONHandler) mergeJSON(value1, value2 any) (any, error) { - // Convert both values to maps if they're JSON strings - var map1, map2 map[string]any - - if str, ok := value1.(string); ok { - if err := json.Unmarshal([]byte(str), &map1); err != nil { - return nil, fmt.Errorf("invalid JSON in first value: %v", err) - } - } else if m, ok := value1.(map[string]any); ok { - map1 = m - } else { - return nil, fmt.Errorf("first value is not a JSON object") - } - - if str, ok := value2.(string); ok { - if err := json.Unmarshal([]byte(str), &map2); err != nil { - return nil, fmt.Errorf("invalid JSON in second value: %v", err) - } - } else if m, ok := value2.(map[string]any); ok { - map2 = m - } else { - return nil, fmt.Errorf("second value is not a JSON object") - } - - // Merge maps +func (h *JSONHandler) stringifyJSON(data map[string]any) map[string]any { result := make(map[string]any) - for k, v := range map1 { - result[k] = v - } - for k, v := range map2 { - result[k] = v // overwrites if key exists + fields := h.getTargetFields() + indent := h.getIndent() + + // Copy all original data + for key, value := range data { + result[key] = value } - return result, nil + for _, field := range fields { + if val, ok := data[field]; ok { + var jsonBytes []byte + var err error + + if indent { + jsonBytes, err = json.MarshalIndent(val, "", " ") + } else { + jsonBytes, err = json.Marshal(val) + } + + if err == nil { + targetField := h.getTargetFieldForSource(field) + result[targetField] = string(jsonBytes) + result[field+"_stringified"] = true + } else { + result[field+"_stringify_error"] = err.Error() + result[field+"_stringified"] = false + } + } + } + + return result } -// Factory functions -func NewJSONParser(id, sourceField, targetField string, options JSONOptions) *JSONHandler { +func (h *JSONHandler) prettyPrintJSON(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for _, field := range fields { + if val, ok := data[field]; ok { + var prettyJSON interface{} + + // If it's a string, try to parse it first + if str, ok := val.(string); ok { + if err := json.Unmarshal([]byte(str), &prettyJSON); err != nil { + prettyJSON = val + } + } else { + prettyJSON = val + } + + if jsonBytes, err := json.MarshalIndent(prettyJSON, "", " "); err == nil { + targetField := h.getTargetFieldForSource(field) + result[targetField] = string(jsonBytes) + } + } + } + + return result +} + +func (h *JSONHandler) minifyJSON(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for _, field := range fields { + if val, ok := data[field]; ok { + var minifyJSON interface{} + + // If it's a string, try to parse it first + if str, ok := val.(string); ok { + if err := json.Unmarshal([]byte(str), &minifyJSON); err != nil { + minifyJSON = val + } + } else { + minifyJSON = val + } + + if jsonBytes, err := json.Marshal(minifyJSON); err == nil { + targetField := h.getTargetFieldForSource(field) + result[targetField] = string(jsonBytes) + } + } + } + + return result +} + +func (h *JSONHandler) validateJSON(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for _, field := range fields { + if val, ok := data[field]; ok { + if str, ok := val.(string); ok { + var temp interface{} + if err := json.Unmarshal([]byte(str), &temp); err == nil { + result[field+"_valid_json"] = true + result[field+"_json_type"] = h.getJSONType(temp) + } else { + result[field+"_valid_json"] = false + result[field+"_validation_error"] = err.Error() + } + } else { + result[field+"_valid_json"] = true + result[field+"_json_type"] = h.getJSONType(val) + } + } + } + + return result +} + +func (h *JSONHandler) extractFields(data map[string]any) map[string]any { + result := make(map[string]any) + sourceField := h.getSourceField() + fieldsToExtract := h.getFieldsToExtract() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + if val, ok := data[sourceField]; ok { + var jsonData map[string]interface{} + + // If it's a string, parse it + if str, ok := val.(string); ok { + if err := json.Unmarshal([]byte(str), &jsonData); err != nil { + result["extract_error"] = err.Error() + return result + } + } else if obj, ok := val.(map[string]interface{}); ok { + jsonData = obj + } else { + result["extract_error"] = "source field is not a JSON object or string" + return result + } + + // Extract specified fields + for _, fieldPath := range fieldsToExtract { + if extractedVal := h.extractNestedField(jsonData, fieldPath); extractedVal != nil { + result[fieldPath] = extractedVal + } + } + } + + return result +} + +func (h *JSONHandler) extractNestedField(data map[string]interface{}, fieldPath string) interface{} { + // Simple implementation for dot notation + // For more complex path extraction, could use jsonpath library + if val, ok := data[fieldPath]; ok { + return val + } + return nil +} + +func (h *JSONHandler) getJSONType(val interface{}) string { + switch val.(type) { + case map[string]interface{}: + return "object" + case []interface{}: + return "array" + case string: + return "string" + case float64: + return "number" + case bool: + return "boolean" + case nil: + return "null" + default: + return "unknown" + } +} + +func (h *JSONHandler) getTargetFields() []string { + if fields, ok := h.Payload.Data["fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *JSONHandler) getSourceField() string { + if field, ok := h.Payload.Data["source_field"].(string); ok { + return field + } + return "" +} + +func (h *JSONHandler) getFieldsToExtract() []string { + if fields, ok := h.Payload.Data["extract_fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *JSONHandler) getTargetFieldForSource(sourceField string) string { + // Check if there's a specific mapping + if mapping, ok := h.Payload.Data["field_mapping"].(map[string]interface{}); ok { + if target, ok := mapping[sourceField].(string); ok { + return target + } + } + + // Default: append suffix based on operation + operation, _ := h.Payload.Data["operation"].(string) + switch operation { + case "parse", "string_to_json": + return sourceField + "_parsed" + case "stringify", "json_to_string": + return sourceField + "_string" + case "pretty_print": + return sourceField + "_pretty" + case "minify": + return sourceField + "_minified" + default: + return sourceField + "_result" + } +} + +func (h *JSONHandler) getIndent() bool { + if indent, ok := h.Payload.Data["indent"].(bool); ok { + return indent + } + return false +} + +func NewJSONHandler(id string) *JSONHandler { return &JSONHandler{ - Operation: dag.Operation{ - ID: id, - Key: "json_parse", - Type: dag.Function, - Tags: []string{"data", "json", "parse"}, - }, - OperationType: "parse", - SourceField: sourceField, - TargetField: targetField, - Options: options, - } -} - -func NewJSONStringifier(id, sourceField, targetField string, options JSONOptions) *JSONHandler { - return &JSONHandler{ - Operation: dag.Operation{ - ID: id, - Key: "json_stringify", - Type: dag.Function, - Tags: []string{"data", "json", "stringify"}, - }, - OperationType: "stringify", - SourceField: sourceField, - TargetField: targetField, - Options: options, - } -} - -func NewAdvancedJSONHandler(id string, operations []JSONOperation) *AdvancedJSONHandler { - return &AdvancedJSONHandler{ - Operation: dag.Operation{ - ID: id, - Key: "advanced_json", - Type: dag.Function, - Tags: []string{"data", "json", "advanced"}, - }, - Operations: operations, + Operation: dag.Operation{ID: id, Key: "json", Type: dag.Function, Tags: []string{"data", "transformation", "json"}}, } } diff --git a/handlers/split_join_handler.go b/handlers/split_join_handler.go index 7da1ad9..b96a491 100644 --- a/handlers/split_join_handler.go +++ b/handlers/split_join_handler.go @@ -3,8 +3,6 @@ package handlers import ( "context" "fmt" - "reflect" - "regexp" "strings" "github.com/oarkflow/json" @@ -12,248 +10,201 @@ import ( "github.com/oarkflow/mq/dag" ) -// SplitJoinHandler handles splitting strings into arrays and joining arrays into strings +// SplitJoinHandler handles string split and join operations type SplitJoinHandler struct { dag.Operation - OpType string `json:"op_type"` // "split" or "join" - SourceField string `json:"source_field"` // field to operate on - TargetField string `json:"target_field"` // field to store result - Delimiter string `json:"delimiter"` // delimiter for split/join - Options SplitJoinOptions `json:"options"` } -type SplitJoinOptions struct { - TrimSpaces bool `json:"trim_spaces"` // trim spaces from elements (split only) - RemoveEmpty bool `json:"remove_empty"` // remove empty elements (split only) - MaxSplit int `json:"max_split"` // maximum number of splits (-1 for unlimited) - UseRegex bool `json:"use_regex"` // treat delimiter as regex pattern (split only) - CaseInsensitive bool `json:"case_insensitive"` // case insensitive regex (split only) - Prefix string `json:"prefix"` // prefix for joined string (join only) - Suffix string `json:"suffix"` // suffix for joined string (join only) -} - -func (s *SplitJoinHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { +func (h *SplitJoinHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} + err := json.Unmarshal(task.Payload, &data) + if err != nil { + return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)} } - // Get source value - sourceValue, exists := data[s.SourceField] - if !exists { - return mq.Result{Error: fmt.Errorf("source field '%s' not found", s.SourceField), Ctx: ctx} + operation, ok := h.Payload.Data["operation"].(string) + if !ok { + return mq.Result{Error: fmt.Errorf("operation not specified")} } - var result any - var err error - - switch s.OpType { + var result map[string]any + switch operation { case "split": - result, err = s.performSplit(sourceValue) + result = h.splitOperation(data) case "join": - result, err = s.performJoin(sourceValue) + result = h.joinOperation(data) + case "split_to_array": + result = h.splitToArrayOperation(data) + case "join_from_array": + result = h.joinFromArrayOperation(data) default: - return mq.Result{Error: fmt.Errorf("unsupported operation: %s", s.OpType), Ctx: ctx} + return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)} } + resultPayload, err := json.Marshal(result) if err != nil { - return mq.Result{Error: err, Ctx: ctx} + return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)} } - // Set target field - targetField := s.TargetField - if targetField == "" { - targetField = s.SourceField // overwrite source if no target specified - } - data[targetField] = result - - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} + return mq.Result{Payload: resultPayload, Ctx: ctx} } -func (s *SplitJoinHandler) performSplit(value any) ([]string, error) { - // Convert value to string - str := fmt.Sprintf("%v", value) +func (h *SplitJoinHandler) splitOperation(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() + separator := h.getSeparator() - var parts []string + // Copy all original data + for key, value := range data { + result[key] = value + } - if s.Options.UseRegex { - // Use regex for splitting - flags := "" - if s.Options.CaseInsensitive { - flags = "(?i)" - } - pattern := flags + s.Delimiter + for _, field := range fields { + if val, ok := data[field]; ok { + if str, ok := val.(string); ok { + parts := strings.Split(str, separator) - re, err := regexp.Compile(pattern) - if err != nil { - return nil, fmt.Errorf("invalid regex pattern '%s': %v", pattern, err) - } + // Create individual fields for each part + for i, part := range parts { + result[fmt.Sprintf("%s_%d", field, i)] = strings.TrimSpace(part) + } - if s.Options.MaxSplit > 0 { - parts = re.Split(str, s.Options.MaxSplit+1) - } else { - parts = re.Split(str, -1) - } - } else { - // Use simple string splitting - if s.Options.MaxSplit > 0 { - parts = strings.SplitN(str, s.Delimiter, s.Options.MaxSplit+1) - } else { - parts = strings.Split(str, s.Delimiter) + // Also store as array + result[field+"_parts"] = parts + result[field+"_count"] = len(parts) + } } } - // Process the parts based on options - var processedParts []string - for _, part := range parts { - if s.Options.TrimSpaces { - part = strings.TrimSpace(part) - } - - if s.Options.RemoveEmpty && part == "" { - continue - } - - processedParts = append(processedParts, part) - } - - return processedParts, nil + return result } -func (s *SplitJoinHandler) performJoin(value any) (string, error) { - // Convert value to slice of strings - parts, err := s.convertToStringSlice(value) - if err != nil { - return "", err - } +func (h *SplitJoinHandler) joinOperation(data map[string]any) map[string]any { + result := make(map[string]any) + targetField := h.getTargetField() + separator := h.getSeparator() + sourceFields := h.getSourceFields() - // Join the parts - joined := strings.Join(parts, s.Delimiter) - - // Add prefix/suffix if specified - if s.Options.Prefix != "" { - joined = s.Options.Prefix + joined - } - if s.Options.Suffix != "" { - joined = joined + s.Options.Suffix - } - - return joined, nil -} - -func (s *SplitJoinHandler) convertToStringSlice(value any) ([]string, error) { - rv := reflect.ValueOf(value) - - if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array { - return nil, fmt.Errorf("value must be an array or slice for join operation") + // Copy all original data + for key, value := range data { + result[key] = value } var parts []string - for i := 0; i < rv.Len(); i++ { - element := rv.Index(i).Interface() - parts = append(parts, fmt.Sprintf("%v", element)) - } - - return parts, nil -} - -// Advanced split/join handler for complex operations -type AdvancedSplitJoinHandler struct { - dag.Operation - Operations []SplitJoinOperation `json:"operations"` // chain of split/join operations -} - -type SplitJoinOperation struct { - Type string `json:"type"` // "split" or "join" - SourceField string `json:"source_field"` // field to operate on - TargetField string `json:"target_field"` // field to store result - Delimiter string `json:"delimiter"` // delimiter for operation - Options SplitJoinOptions `json:"options"` // operation options -} - -func (a *AdvancedSplitJoinHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result { - var data map[string]any - if err := json.Unmarshal(task.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx} - } - - // Execute operations in sequence - for i, op := range a.Operations { - handler := &SplitJoinHandler{ - Operation: dag.Operation{ - ID: fmt.Sprintf("%s_op_%d", a.ID, i), - Key: "temp_split_join", - Type: dag.Function, - Tags: []string{"data", "temp"}, - }, - OpType: op.Type, - SourceField: op.SourceField, - TargetField: op.TargetField, - Delimiter: op.Delimiter, - Options: op.Options, - } - - // Create a temporary task for this operation - tempData, _ := json.Marshal(data) - tempTask := &mq.Task{Payload: tempData} - - result := handler.ProcessTask(ctx, tempTask) - if result.Error != nil { - return mq.Result{Error: fmt.Errorf("operation %d failed: %v", i+1, result.Error), Ctx: ctx} - } - - // Update data with the result - if err := json.Unmarshal(result.Payload, &data); err != nil { - return mq.Result{Error: fmt.Errorf("failed to unmarshal result from operation %d: %v", i+1, err), Ctx: ctx} + for _, field := range sourceFields { + if val, ok := data[field]; ok && val != nil { + parts = append(parts, fmt.Sprintf("%v", val)) } } - bt, _ := json.Marshal(data) - return mq.Result{Payload: bt, Ctx: ctx} + if len(parts) > 0 { + result[targetField] = strings.Join(parts, separator) + } + + return result } -// Factory functions -func NewSplitHandler(id, sourceField, targetField, delimiter string, options SplitJoinOptions) *SplitJoinHandler { +func (h *SplitJoinHandler) splitToArrayOperation(data map[string]any) map[string]any { + result := make(map[string]any) + fields := h.getTargetFields() + separator := h.getSeparator() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + for _, field := range fields { + if val, ok := data[field]; ok { + if str, ok := val.(string); ok { + parts := strings.Split(str, separator) + var cleanParts []interface{} + for _, part := range parts { + cleanParts = append(cleanParts, strings.TrimSpace(part)) + } + result[field+"_array"] = cleanParts + } + } + } + + return result +} + +func (h *SplitJoinHandler) joinFromArrayOperation(data map[string]any) map[string]any { + result := make(map[string]any) + targetField := h.getTargetField() + separator := h.getSeparator() + sourceField := h.getSourceField() + + // Copy all original data + for key, value := range data { + result[key] = value + } + + if val, ok := data[sourceField]; ok { + if arr, ok := val.([]interface{}); ok { + var parts []string + for _, item := range arr { + if item != nil { + parts = append(parts, fmt.Sprintf("%v", item)) + } + } + result[targetField] = strings.Join(parts, separator) + } + } + + return result +} + +func (h *SplitJoinHandler) getTargetFields() []string { + if fields, ok := h.Payload.Data["fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *SplitJoinHandler) getTargetField() string { + if field, ok := h.Payload.Data["target_field"].(string); ok { + return field + } + return "joined_field" +} + +func (h *SplitJoinHandler) getSourceField() string { + if field, ok := h.Payload.Data["source_field"].(string); ok { + return field + } + return "" +} + +func (h *SplitJoinHandler) getSourceFields() []string { + if fields, ok := h.Payload.Data["source_fields"].([]interface{}); ok { + var result []string + for _, field := range fields { + if str, ok := field.(string); ok { + result = append(result, str) + } + } + return result + } + return nil +} + +func (h *SplitJoinHandler) getSeparator() string { + if sep, ok := h.Payload.Data["separator"].(string); ok { + return sep + } + return "," // Default separator +} + +func NewSplitJoinHandler(id string) *SplitJoinHandler { return &SplitJoinHandler{ - Operation: dag.Operation{ - ID: id, - Key: "split_string", - Type: dag.Function, - Tags: []string{"data", "string", "split"}, - }, - OpType: "split", - SourceField: sourceField, - TargetField: targetField, - Delimiter: delimiter, - Options: options, - } -} - -func NewJoinHandler(id, sourceField, targetField, delimiter string, options SplitJoinOptions) *SplitJoinHandler { - return &SplitJoinHandler{ - Operation: dag.Operation{ - ID: id, - Key: "join_array", - Type: dag.Function, - Tags: []string{"data", "array", "join"}, - }, - OpType: "join", - SourceField: sourceField, - TargetField: targetField, - Delimiter: delimiter, - Options: options, - } -} - -func NewAdvancedSplitJoinHandler(id string, operations []SplitJoinOperation) *AdvancedSplitJoinHandler { - return &AdvancedSplitJoinHandler{ - Operation: dag.Operation{ - ID: id, - Key: "advanced_split_join", - Type: dag.Function, - Tags: []string{"data", "string", "array", "advanced"}, - }, - Operations: operations, + Operation: dag.Operation{ID: id, Key: "split_join", Type: dag.Function, Tags: []string{"data", "transformation", "string"}}, } } diff --git a/metrics/metrics.go b/metrics/metrics.go deleted file mode 100644 index 1bfa282..0000000 --- a/metrics/metrics.go +++ /dev/null @@ -1,43 +0,0 @@ -package metrics - -import ( - "net/http" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" -) - -var ( - taskProcessed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "tasks_processed_total", - Help: "Total number of tasks processed.", - }, - []string{"status"}, - ) - taskProcessingTime = prometheus.NewHistogram( - prometheus.HistogramOpts{ - Name: "task_processing_time_seconds", - Help: "Histogram of task processing times.", - Buckets: prometheus.DefBuckets, - }, - ) -) - -func init() { - prometheus.MustRegister(taskProcessed) - prometheus.MustRegister(taskProcessingTime) -} - -func RecordTaskProcessed(status string) { - taskProcessed.WithLabelValues(status).Inc() -} - -func RecordTaskProcessingTime(duration float64) { - taskProcessingTime.Observe(duration) -} - -func StartMetricsServer(port string) { - http.Handle("/metrics", promhttp.Handler()) - go http.ListenAndServe(port, nil) -}