mirror of
https://github.com/oarkflow/mq.git
synced 2025-10-04 23:52:48 +08:00
update
This commit is contained in:
763
examples/data_transform_demo.go
Normal file
763
examples/data_transform_demo.go
Normal file
@@ -0,0 +1,763 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
"github.com/oarkflow/mq/handlers"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Println("=== Data Transformation Handlers Examples ===")
|
||||
|
||||
// Test each handler with sample data
|
||||
testFormatHandler()
|
||||
testGroupHandler()
|
||||
testSplitJoinHandler()
|
||||
testFlattenHandler()
|
||||
testJSONHandler()
|
||||
testFieldHandler()
|
||||
testDataHandler()
|
||||
|
||||
// Example of chaining handlers
|
||||
exampleDAGChaining()
|
||||
}
|
||||
|
||||
func testFormatHandler() {
|
||||
fmt.Println("\n1. FORMAT HANDLER TESTS")
|
||||
fmt.Println("========================")
|
||||
|
||||
// Test uppercase formatting
|
||||
testData := map[string]any{
|
||||
"name": "john doe",
|
||||
"title": "software engineer",
|
||||
"age": 30,
|
||||
}
|
||||
|
||||
handler := handlers.NewFormatHandler("format-test")
|
||||
config := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"format_type": "uppercase",
|
||||
"fields": []string{"name", "title"},
|
||||
},
|
||||
}
|
||||
handler.SetConfig(config)
|
||||
|
||||
result := runHandler(handler, testData, "Uppercase Format")
|
||||
printResult("Uppercase formatting", result)
|
||||
|
||||
// Test currency formatting
|
||||
currencyData := map[string]any{
|
||||
"price": 99.99,
|
||||
"tax": "15.50",
|
||||
"total": 115.49,
|
||||
}
|
||||
|
||||
currencyHandler := handlers.NewFormatHandler("currency-test")
|
||||
currencyConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"format_type": "currency",
|
||||
"fields": []string{"price", "tax", "total"},
|
||||
"currency": "$",
|
||||
},
|
||||
}
|
||||
currencyHandler.SetConfig(currencyConfig)
|
||||
|
||||
result = runHandler(currencyHandler, currencyData, "Currency Format")
|
||||
printResult("Currency formatting", result)
|
||||
|
||||
// Test date formatting
|
||||
dateData := map[string]any{
|
||||
"created_at": "2023-06-15T10:30:00Z",
|
||||
"updated_at": "2023-06-20",
|
||||
}
|
||||
|
||||
dateHandler := handlers.NewFormatHandler("date-test")
|
||||
dateConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"format_type": "date",
|
||||
"fields": []string{"created_at", "updated_at"},
|
||||
"date_format": "2006-01-02",
|
||||
},
|
||||
}
|
||||
dateHandler.SetConfig(dateConfig)
|
||||
|
||||
result = runHandler(dateHandler, dateData, "Date Format")
|
||||
printResult("Date formatting", result)
|
||||
}
|
||||
|
||||
func testGroupHandler() {
|
||||
fmt.Println("\n2. GROUP HANDLER TESTS")
|
||||
fmt.Println("======================")
|
||||
|
||||
// Test data grouping with aggregation
|
||||
testData := map[string]any{
|
||||
"data": []interface{}{
|
||||
map[string]any{"department": "Engineering", "salary": 80000, "age": 30, "name": "John"},
|
||||
map[string]any{"department": "Engineering", "salary": 90000, "age": 25, "name": "Jane"},
|
||||
map[string]any{"department": "Marketing", "salary": 60000, "age": 35, "name": "Bob"},
|
||||
map[string]any{"department": "Marketing", "salary": 65000, "age": 28, "name": "Alice"},
|
||||
map[string]any{"department": "Engineering", "salary": 95000, "age": 32, "name": "Mike"},
|
||||
},
|
||||
}
|
||||
|
||||
handler := handlers.NewGroupHandler("group-test")
|
||||
config := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"group_by": []string{"department"},
|
||||
"aggregations": map[string]any{
|
||||
"salary": "sum",
|
||||
"age": "avg",
|
||||
"name": "concat",
|
||||
},
|
||||
"concat_separator": ", ",
|
||||
},
|
||||
}
|
||||
handler.SetConfig(config)
|
||||
|
||||
result := runHandler(handler, testData, "Group by Department")
|
||||
printResult("Data grouping", result)
|
||||
}
|
||||
|
||||
func testSplitJoinHandler() {
|
||||
fmt.Println("\n3. SPLIT/JOIN HANDLER TESTS")
|
||||
fmt.Println("============================")
|
||||
|
||||
// Test split operation
|
||||
testData := map[string]any{
|
||||
"full_name": "John Michael Doe",
|
||||
"tags": "go,programming,backend,api",
|
||||
"skills": "golang python javascript",
|
||||
}
|
||||
|
||||
splitHandler := handlers.NewSplitJoinHandler("split-test")
|
||||
splitConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "split",
|
||||
"fields": []string{"full_name", "skills"},
|
||||
"separator": " ",
|
||||
},
|
||||
}
|
||||
splitHandler.SetConfig(splitConfig)
|
||||
|
||||
result := runHandler(splitHandler, testData, "Split Operation (space)")
|
||||
printResult("String splitting with space", result)
|
||||
|
||||
// Test split with comma
|
||||
splitHandler2 := handlers.NewSplitJoinHandler("split-test-2")
|
||||
splitConfig2 := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "split",
|
||||
"fields": []string{"tags"},
|
||||
"separator": ",",
|
||||
},
|
||||
}
|
||||
splitHandler2.SetConfig(splitConfig2)
|
||||
|
||||
result = runHandler(splitHandler2, testData, "Split Operation (comma)")
|
||||
printResult("String splitting with comma", result)
|
||||
|
||||
// Test join operation
|
||||
joinData := map[string]any{
|
||||
"first_name": "John",
|
||||
"middle_name": "Michael",
|
||||
"last_name": "Doe",
|
||||
"title": "Mr.",
|
||||
}
|
||||
|
||||
joinHandler := handlers.NewSplitJoinHandler("join-test")
|
||||
joinConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "join",
|
||||
"source_fields": []string{"title", "first_name", "middle_name", "last_name"},
|
||||
"target_field": "full_name_with_title",
|
||||
"separator": " ",
|
||||
},
|
||||
}
|
||||
joinHandler.SetConfig(joinConfig)
|
||||
|
||||
result = runHandler(joinHandler, joinData, "Join Operation")
|
||||
printResult("String joining", result)
|
||||
}
|
||||
|
||||
func testFlattenHandler() {
|
||||
fmt.Println("\n4. FLATTEN HANDLER TESTS")
|
||||
fmt.Println("=========================")
|
||||
|
||||
// Test flatten settings
|
||||
testData := map[string]any{
|
||||
"user_id": 123,
|
||||
"settings": []interface{}{
|
||||
map[string]any{"key": "theme", "value": "dark", "value_type": "string"},
|
||||
map[string]any{"key": "notifications", "value": "true", "value_type": "boolean"},
|
||||
map[string]any{"key": "max_items", "value": "50", "value_type": "integer"},
|
||||
map[string]any{"key": "timeout", "value": "30.5", "value_type": "float"},
|
||||
},
|
||||
}
|
||||
|
||||
handler := handlers.NewFlattenHandler("flatten-test")
|
||||
config := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "flatten_settings",
|
||||
"source_field": "settings",
|
||||
"target_field": "user_config",
|
||||
},
|
||||
}
|
||||
handler.SetConfig(config)
|
||||
|
||||
result := runHandler(handler, testData, "Flatten Settings")
|
||||
printResult("Settings flattening", result)
|
||||
|
||||
// Test flatten key-value pairs
|
||||
kvData := map[string]any{
|
||||
"user_id": 456,
|
||||
"properties": []interface{}{
|
||||
map[string]any{"name": "color", "val": "blue"},
|
||||
map[string]any{"name": "size", "val": "large"},
|
||||
map[string]any{"name": "weight", "val": "heavy"},
|
||||
},
|
||||
}
|
||||
|
||||
kvHandler := handlers.NewFlattenHandler("kv-test")
|
||||
kvConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "flatten_key_value",
|
||||
"source_field": "properties",
|
||||
"key_field": "name",
|
||||
"value_field": "val",
|
||||
"target_field": "flattened_props",
|
||||
},
|
||||
}
|
||||
kvHandler.SetConfig(kvConfig)
|
||||
|
||||
result = runHandler(kvHandler, kvData, "Flatten Key-Value")
|
||||
printResult("Key-value flattening", result)
|
||||
|
||||
// Test flatten nested objects
|
||||
nestedData := map[string]any{
|
||||
"user": map[string]any{
|
||||
"id": 123,
|
||||
"profile": map[string]any{
|
||||
"name": "John Doe",
|
||||
"email": "john@example.com",
|
||||
"address": map[string]any{
|
||||
"street": "123 Main St",
|
||||
"city": "New York",
|
||||
"country": "USA",
|
||||
},
|
||||
"preferences": map[string]any{
|
||||
"theme": "dark",
|
||||
"language": "en",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
nestedHandler := handlers.NewFlattenHandler("nested-test")
|
||||
nestedConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "flatten_nested_objects",
|
||||
"separator": "_",
|
||||
},
|
||||
}
|
||||
nestedHandler.SetConfig(nestedConfig)
|
||||
|
||||
result = runHandler(nestedHandler, nestedData, "Flatten Nested Objects")
|
||||
printResult("Nested object flattening", result)
|
||||
}
|
||||
|
||||
func testJSONHandler() {
|
||||
fmt.Println("\n5. JSON HANDLER TESTS")
|
||||
fmt.Println("=====================")
|
||||
|
||||
// Test JSON parsing
|
||||
testData := map[string]any{
|
||||
"config": `{"theme": "dark", "language": "en", "notifications": true, "max_items": 100}`,
|
||||
"metadata": `["tag1", "tag2", "tag3"]`,
|
||||
"user": `{"id": 123, "name": "John Doe", "active": true}`,
|
||||
}
|
||||
|
||||
parseHandler := handlers.NewJSONHandler("json-parse-test")
|
||||
parseConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "parse",
|
||||
"fields": []string{"config", "metadata", "user"},
|
||||
},
|
||||
}
|
||||
parseHandler.SetConfig(parseConfig)
|
||||
|
||||
result := runHandler(parseHandler, testData, "JSON Parsing")
|
||||
printResult("JSON parsing", result)
|
||||
|
||||
// Test JSON stringifying
|
||||
objData := map[string]any{
|
||||
"user": map[string]any{
|
||||
"id": 123,
|
||||
"name": "John Doe",
|
||||
"active": true,
|
||||
"roles": []string{"admin", "user"},
|
||||
},
|
||||
"preferences": map[string]any{
|
||||
"theme": "dark",
|
||||
"notifications": true,
|
||||
"language": "en",
|
||||
},
|
||||
}
|
||||
|
||||
stringifyHandler := handlers.NewJSONHandler("json-stringify-test")
|
||||
stringifyConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "stringify",
|
||||
"fields": []string{"user", "preferences"},
|
||||
"indent": true,
|
||||
},
|
||||
}
|
||||
stringifyHandler.SetConfig(stringifyConfig)
|
||||
|
||||
result = runHandler(stringifyHandler, objData, "JSON Stringifying")
|
||||
printResult("JSON stringifying", result)
|
||||
|
||||
// Test JSON validation
|
||||
validationData := map[string]any{
|
||||
"valid_json": `{"key": "value"}`,
|
||||
"invalid_json": `{"key": value}`, // Missing quotes around value
|
||||
"valid_array": `[1, 2, 3]`,
|
||||
}
|
||||
|
||||
validateHandler := handlers.NewJSONHandler("json-validate-test")
|
||||
validateConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "validate",
|
||||
"fields": []string{"valid_json", "invalid_json", "valid_array"},
|
||||
},
|
||||
}
|
||||
validateHandler.SetConfig(validateConfig)
|
||||
|
||||
result = runHandler(validateHandler, validationData, "JSON Validation")
|
||||
printResult("JSON validation", result)
|
||||
}
|
||||
|
||||
func testFieldHandler() {
|
||||
fmt.Println("\n6. FIELD HANDLER TESTS")
|
||||
fmt.Println("======================")
|
||||
|
||||
testData := map[string]any{
|
||||
"id": 123,
|
||||
"first_name": "John",
|
||||
"last_name": "Doe",
|
||||
"email_addr": "john@example.com",
|
||||
"phone_number": "555-1234",
|
||||
"internal_id": "INT-123",
|
||||
"created_at": "2023-01-15",
|
||||
"updated_at": "2023-06-20",
|
||||
"is_active": true,
|
||||
"salary": 75000.50,
|
||||
}
|
||||
|
||||
// Test field filtering/selection
|
||||
filterHandler := handlers.NewFieldHandler("filter-test")
|
||||
filterConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "filter",
|
||||
"fields": []string{"id", "first_name", "last_name", "email_addr", "is_active"},
|
||||
},
|
||||
}
|
||||
filterHandler.SetConfig(filterConfig)
|
||||
|
||||
result := runHandler(filterHandler, testData, "Filter/Select Fields")
|
||||
printResult("Field filtering", result)
|
||||
|
||||
// Test field exclusion/removal
|
||||
excludeHandler := handlers.NewFieldHandler("exclude-test")
|
||||
excludeConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "exclude",
|
||||
"fields": []string{"internal_id", "created_at", "updated_at"},
|
||||
},
|
||||
}
|
||||
excludeHandler.SetConfig(excludeConfig)
|
||||
|
||||
result = runHandler(excludeHandler, testData, "Exclude Fields")
|
||||
printResult("Field exclusion", result)
|
||||
|
||||
// Test field renaming
|
||||
renameHandler := handlers.NewFieldHandler("rename-test")
|
||||
renameConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "rename",
|
||||
"mapping": map[string]any{
|
||||
"first_name": "firstName",
|
||||
"last_name": "lastName",
|
||||
"email_addr": "email",
|
||||
"phone_number": "phone",
|
||||
"created_at": "createdAt",
|
||||
"updated_at": "updatedAt",
|
||||
"is_active": "active",
|
||||
},
|
||||
},
|
||||
}
|
||||
renameHandler.SetConfig(renameConfig)
|
||||
|
||||
result = runHandler(renameHandler, testData, "Rename Fields")
|
||||
printResult("Field renaming", result)
|
||||
|
||||
// Test adding new fields
|
||||
addHandler := handlers.NewFieldHandler("add-test")
|
||||
addConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "add",
|
||||
"new_fields": map[string]any{
|
||||
"status": "active",
|
||||
"version": "1.0",
|
||||
"is_verified": true,
|
||||
"last_login": "2023-06-20T10:30:00Z",
|
||||
"department": "Engineering",
|
||||
"access_level": 3,
|
||||
},
|
||||
},
|
||||
}
|
||||
addHandler.SetConfig(addConfig)
|
||||
|
||||
result = runHandler(addHandler, testData, "Add Fields")
|
||||
printResult("Adding fields", result)
|
||||
|
||||
// Test field copying
|
||||
copyHandler := handlers.NewFieldHandler("copy-test")
|
||||
copyConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "copy",
|
||||
"mapping": map[string]any{
|
||||
"first_name": "display_name",
|
||||
"email_addr": "contact_email",
|
||||
"id": "user_id",
|
||||
},
|
||||
},
|
||||
}
|
||||
copyHandler.SetConfig(copyConfig)
|
||||
|
||||
result = runHandler(copyHandler, testData, "Copy Fields")
|
||||
printResult("Field copying", result)
|
||||
|
||||
// Test key transformation
|
||||
transformHandler := handlers.NewFieldHandler("transform-test")
|
||||
transformConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "transform_keys",
|
||||
"transformation": "snake_case",
|
||||
},
|
||||
}
|
||||
transformHandler.SetConfig(transformConfig)
|
||||
|
||||
result = runHandler(transformHandler, testData, "Transform Keys")
|
||||
printResult("Key transformation", result)
|
||||
}
|
||||
|
||||
func testDataHandler() {
|
||||
fmt.Println("\n7. DATA HANDLER TESTS")
|
||||
fmt.Println("=====================")
|
||||
|
||||
// Test data sorting
|
||||
testData := map[string]any{
|
||||
"data": []interface{}{
|
||||
map[string]any{"name": "John", "age": 30, "salary": 80000, "department": "Engineering"},
|
||||
map[string]any{"name": "Jane", "age": 25, "salary": 90000, "department": "Engineering"},
|
||||
map[string]any{"name": "Bob", "age": 35, "salary": 75000, "department": "Marketing"},
|
||||
map[string]any{"name": "Alice", "age": 28, "salary": 85000, "department": "Marketing"},
|
||||
},
|
||||
}
|
||||
|
||||
sortHandler := handlers.NewDataHandler("sort-test")
|
||||
sortConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "sort",
|
||||
"sort_field": "salary",
|
||||
"sort_order": "desc",
|
||||
},
|
||||
}
|
||||
sortHandler.SetConfig(sortConfig)
|
||||
|
||||
result := runHandler(sortHandler, testData, "Sort Data by Salary (Desc)")
|
||||
printResult("Data sorting", result)
|
||||
|
||||
// Test field calculations
|
||||
calcData := map[string]any{
|
||||
"base_price": 100.0,
|
||||
"tax_rate": 0.15,
|
||||
"shipping_cost": 10.0,
|
||||
"discount": 5.0,
|
||||
"quantity": 2,
|
||||
}
|
||||
|
||||
calcHandler := handlers.NewDataHandler("calc-test")
|
||||
calcConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "calculate",
|
||||
"calculations": map[string]any{
|
||||
"tax_amount": map[string]any{
|
||||
"operation": "multiply",
|
||||
"fields": []string{"base_price", "tax_rate"},
|
||||
},
|
||||
"subtotal": map[string]any{
|
||||
"operation": "sum",
|
||||
"fields": []string{"base_price", "tax_amount", "shipping_cost"},
|
||||
},
|
||||
"total": map[string]any{
|
||||
"operation": "subtract",
|
||||
"fields": []string{"subtotal", "discount"},
|
||||
},
|
||||
"grand_total": map[string]any{
|
||||
"operation": "multiply",
|
||||
"fields": []string{"total", "quantity"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
calcHandler.SetConfig(calcConfig)
|
||||
|
||||
result = runHandler(calcHandler, calcData, "Field Calculations")
|
||||
printResult("Field calculations", result)
|
||||
|
||||
// Test data deduplication
|
||||
dupData := map[string]any{
|
||||
"data": []interface{}{
|
||||
map[string]any{"email": "john@example.com", "name": "John Doe", "id": 1},
|
||||
map[string]any{"email": "jane@example.com", "name": "Jane Smith", "id": 2},
|
||||
map[string]any{"email": "john@example.com", "name": "John D.", "id": 3}, // duplicate email
|
||||
map[string]any{"email": "bob@example.com", "name": "Bob Jones", "id": 4},
|
||||
map[string]any{"email": "jane@example.com", "name": "Jane S.", "id": 5}, // duplicate email
|
||||
},
|
||||
}
|
||||
|
||||
dedupHandler := handlers.NewDataHandler("dedup-test")
|
||||
dedupConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "deduplicate",
|
||||
"dedupe_fields": []string{"email"},
|
||||
},
|
||||
}
|
||||
dedupHandler.SetConfig(dedupConfig)
|
||||
|
||||
result = runHandler(dedupHandler, dupData, "Data Deduplication")
|
||||
printResult("Data deduplication", result)
|
||||
|
||||
// Test type casting
|
||||
castData := map[string]any{
|
||||
"user_id": "123",
|
||||
"age": "30",
|
||||
"salary": "75000.50",
|
||||
"is_active": "true",
|
||||
"score": "95.5",
|
||||
"name": 123,
|
||||
"is_verified": "false",
|
||||
}
|
||||
|
||||
castHandler := handlers.NewDataHandler("cast-test")
|
||||
castConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "type_cast",
|
||||
"cast": map[string]any{
|
||||
"user_id": "int",
|
||||
"age": "int",
|
||||
"salary": "float",
|
||||
"is_active": "bool",
|
||||
"score": "float",
|
||||
"name": "string",
|
||||
"is_verified": "bool",
|
||||
},
|
||||
},
|
||||
}
|
||||
castHandler.SetConfig(castConfig)
|
||||
|
||||
result = runHandler(castHandler, castData, "Type Casting")
|
||||
printResult("Type casting", result)
|
||||
|
||||
// Test conditional field setting
|
||||
condData := map[string]any{
|
||||
"age": 25,
|
||||
"salary": 60000,
|
||||
"years_experience": 3,
|
||||
}
|
||||
|
||||
condHandler := handlers.NewDataHandler("conditional-test")
|
||||
condConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "conditional_set",
|
||||
"conditions": map[string]any{
|
||||
"salary_level": map[string]any{
|
||||
"condition": "salary > 70000",
|
||||
"if_true": "high",
|
||||
"if_false": "standard",
|
||||
},
|
||||
"experience_level": map[string]any{
|
||||
"condition": "years_experience >= 5",
|
||||
"if_true": "senior",
|
||||
"if_false": "junior",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
condHandler.SetConfig(condConfig)
|
||||
|
||||
result = runHandler(condHandler, condData, "Conditional Field Setting")
|
||||
printResult("Conditional setting", result)
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
func runHandler(handler dag.Processor, data map[string]any, description string) map[string]any {
|
||||
fmt.Printf("\n--- Testing: %s ---\n", description)
|
||||
|
||||
// Convert data to JSON payload
|
||||
payload, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
log.Printf("Error marshaling test data: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create a task
|
||||
task := &mq.Task{
|
||||
ID: mq.NewID(),
|
||||
Payload: payload,
|
||||
}
|
||||
|
||||
// Process the task
|
||||
ctx := context.Background()
|
||||
result := handler.ProcessTask(ctx, task)
|
||||
|
||||
if result.Error != nil {
|
||||
log.Printf("Handler error: %v", result.Error)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse result payload
|
||||
var resultData map[string]any
|
||||
if err := json.Unmarshal(result.Payload, &resultData); err != nil {
|
||||
log.Printf("Error unmarshaling result: %v", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
return resultData
|
||||
}
|
||||
|
||||
func printResult(operation string, result map[string]any) {
|
||||
if result == nil {
|
||||
fmt.Printf("❌ %s failed\n", operation)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("✅ %s succeeded\n", operation)
|
||||
|
||||
// Pretty print the result (truncated for readability)
|
||||
resultJSON, err := json.MarshalIndent(result, "", " ")
|
||||
if err != nil {
|
||||
fmt.Printf("Error formatting result: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Truncate very long results
|
||||
resultStr := string(resultJSON)
|
||||
if len(resultStr) > 1000 {
|
||||
resultStr = resultStr[:997] + "..."
|
||||
}
|
||||
|
||||
fmt.Printf("Result:\n%s\n", resultStr)
|
||||
}
|
||||
|
||||
// Example of chaining handlers in a DAG workflow
|
||||
func exampleDAGChaining() {
|
||||
fmt.Println("\n=== CHAINING HANDLERS EXAMPLE ===")
|
||||
fmt.Println("==================================")
|
||||
|
||||
// Sample input data with nested JSON and various formatting needs
|
||||
inputData := map[string]any{
|
||||
"user_data": `{"firstName": "john", "lastName": "doe", "age": "30", "salary": "75000.50", "isActive": "true"}`,
|
||||
"metadata": `{"department": "engineering", "level": "senior", "skills": ["go", "python", "javascript"]}`,
|
||||
}
|
||||
|
||||
fmt.Println("🔗 Chaining multiple handlers to transform data...")
|
||||
fmt.Printf("Input data: %+v\n", inputData)
|
||||
|
||||
// Step 1: Parse JSON strings
|
||||
jsonHandler := handlers.NewJSONHandler("json-step")
|
||||
jsonConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "parse",
|
||||
"fields": []string{"user_data", "metadata"},
|
||||
},
|
||||
}
|
||||
jsonHandler.SetConfig(jsonConfig)
|
||||
|
||||
step1Result := runHandler(jsonHandler, inputData, "Step 1: Parse JSON strings")
|
||||
|
||||
if step1Result != nil {
|
||||
// Step 2: Flatten the parsed nested data
|
||||
flattenHandler := handlers.NewFlattenHandler("flatten-step")
|
||||
flattenConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "flatten_nested_objects",
|
||||
"separator": "_",
|
||||
},
|
||||
}
|
||||
flattenHandler.SetConfig(flattenConfig)
|
||||
|
||||
step2Result := runHandler(flattenHandler, step1Result, "Step 2: Flatten nested objects")
|
||||
|
||||
if step2Result != nil {
|
||||
// Step 3: Format name fields to proper case
|
||||
formatHandler := handlers.NewFormatHandler("format-step")
|
||||
formatConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"format_type": "capitalize",
|
||||
"fields": []string{"user_data_parsed_firstName", "user_data_parsed_lastName"},
|
||||
},
|
||||
}
|
||||
formatHandler.SetConfig(formatConfig)
|
||||
|
||||
step3Result := runHandler(formatHandler, step2Result, "Step 3: Format names to proper case")
|
||||
|
||||
if step3Result != nil {
|
||||
// Step 4: Rename fields to standard naming
|
||||
fieldHandler := handlers.NewFieldHandler("rename-step")
|
||||
renameConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "rename",
|
||||
"mapping": map[string]any{
|
||||
"user_data_parsed_firstName": "first_name",
|
||||
"user_data_parsed_lastName": "last_name",
|
||||
"user_data_parsed_age": "age",
|
||||
"user_data_parsed_salary": "salary",
|
||||
"user_data_parsed_isActive": "is_active",
|
||||
"metadata_parsed_department": "department",
|
||||
"metadata_parsed_level": "level",
|
||||
},
|
||||
},
|
||||
}
|
||||
fieldHandler.SetConfig(renameConfig)
|
||||
|
||||
step4Result := runHandler(fieldHandler, step3Result, "Step 4: Rename fields")
|
||||
|
||||
if step4Result != nil {
|
||||
// Step 5: Cast data types
|
||||
dataHandler := handlers.NewDataHandler("cast-step")
|
||||
castConfig := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"operation": "type_cast",
|
||||
"cast": map[string]any{
|
||||
"age": "int",
|
||||
"salary": "float",
|
||||
"is_active": "bool",
|
||||
},
|
||||
},
|
||||
}
|
||||
dataHandler.SetConfig(castConfig)
|
||||
|
||||
finalResult := runHandler(dataHandler, step4Result, "Step 5: Cast data types")
|
||||
printResult("🎉 Final chained transformation result", finalResult)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
8
go.mod
8
go.mod
@@ -14,7 +14,6 @@ require (
|
||||
github.com/oarkflow/json v0.0.21
|
||||
github.com/oarkflow/log v1.0.79
|
||||
github.com/oarkflow/xid v1.2.8
|
||||
github.com/prometheus/client_golang v1.21.1
|
||||
golang.org/x/crypto v0.33.0
|
||||
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394
|
||||
golang.org/x/time v0.11.0
|
||||
@@ -31,22 +30,15 @@ require (
|
||||
|
||||
require (
|
||||
github.com/andybalholm/brotli v1.1.1 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/goccy/go-reflect v1.2.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/klauspost/compress v1.18.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.16 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/oarkflow/jsonschema v0.0.4
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.63.0 // indirect
|
||||
github.com/prometheus/procfs v0.16.0 // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/valyala/fasthttp v1.59.0 // indirect
|
||||
golang.org/x/sys v0.31.0 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
)
|
||||
|
20
go.sum
20
go.sum
@@ -1,9 +1,5 @@
|
||||
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
|
||||
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
|
||||
@@ -14,8 +10,6 @@ github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
|
||||
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/gofiber/fiber/v2 v2.52.6 h1:Rfp+ILPiYSvvVuIPvxrBns+HJp8qGLDnLJawAu27XVI=
|
||||
github.com/gofiber/fiber/v2 v2.52.6/go.mod h1:YEcBbO/FB+5M1IZNBP9FO3J9281zgPAreiI1oqg8nDw=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
@@ -28,16 +22,12 @@ github.com/kaptinlin/go-i18n v0.1.4 h1:wCiwAn1LOcvymvWIVAM4m5dUAMiHunTdEubLDk4hT
|
||||
github.com/kaptinlin/go-i18n v0.1.4/go.mod h1:g1fn1GvTgT4CiLE8/fFE1hboHWJ6erivrDpiDtCcFKg=
|
||||
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
||||
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
|
||||
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
|
||||
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/oarkflow/date v0.0.4 h1:EwY/wiS3CqZNBx7b2x+3kkJwVNuGk+G0dls76kL/fhU=
|
||||
github.com/oarkflow/date v0.0.4/go.mod h1:xQTFc6p6O5VX6J75ZrPJbelIFGca1ASmhpgirFqL8vM=
|
||||
github.com/oarkflow/dipper v0.0.6 h1:E+ak9i4R1lxx0B04CjfG5DTLTmwuWA1nrdS6KIHdUxQ=
|
||||
@@ -62,14 +52,6 @@ github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0
|
||||
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk=
|
||||
github.com/prometheus/client_golang v1.21.1/go.mod h1:U9NM32ykUErtVBxdvD3zfi+EuFkkaBvMb09mIfe0Zgg=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k=
|
||||
github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18=
|
||||
github.com/prometheus/procfs v0.16.0 h1:xh6oHhKwnOJKMYiYBDWmkHqQPyiY40sny36Cmx2bbsM=
|
||||
github.com/prometheus/procfs v0.16.0/go.mod h1:8veyXUu3nGP7oaCxhX6yeaM5u4stL2FeMXnCqhDthZg=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
@@ -92,7 +74,5 @@ golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
|
||||
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
|
||||
golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
|
||||
golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
737
handlers/data_handler.go
Normal file
737
handlers/data_handler.go
Normal file
@@ -0,0 +1,737 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// DataHandler handles miscellaneous data operations
|
||||
type DataHandler struct {
|
||||
dag.Operation
|
||||
}
|
||||
|
||||
func (h *DataHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
operation, ok := h.Payload.Data["operation"].(string)
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("operation not specified")}
|
||||
}
|
||||
|
||||
var result map[string]any
|
||||
switch operation {
|
||||
case "sort":
|
||||
result = h.sortData(data)
|
||||
case "deduplicate":
|
||||
result = h.deduplicateData(data)
|
||||
case "calculate":
|
||||
result = h.calculateFields(data)
|
||||
case "conditional_set":
|
||||
result = h.conditionalSet(data)
|
||||
case "type_cast":
|
||||
result = h.typeCast(data)
|
||||
case "validate_fields":
|
||||
result = h.validateFields(data)
|
||||
case "normalize":
|
||||
result = h.normalizeData(data)
|
||||
case "pivot":
|
||||
result = h.pivotData(data)
|
||||
case "unpivot":
|
||||
result = h.unpivotData(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)}
|
||||
}
|
||||
|
||||
resultPayload, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (h *DataHandler) sortData(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Copy non-array data
|
||||
for key, value := range data {
|
||||
if key != "data" {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
if dataArray, ok := data["data"].([]interface{}); ok {
|
||||
sortField := h.getSortField()
|
||||
sortOrder := h.getSortOrder() // "asc" or "desc"
|
||||
|
||||
// Convert to slice of maps for sorting
|
||||
var records []map[string]interface{}
|
||||
for _, item := range dataArray {
|
||||
if record, ok := item.(map[string]interface{}); ok {
|
||||
records = append(records, record)
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the records
|
||||
sort.Slice(records, func(i, j int) bool {
|
||||
vi := records[i][sortField]
|
||||
vj := records[j][sortField]
|
||||
|
||||
comparison := h.compareValues(vi, vj)
|
||||
if sortOrder == "desc" {
|
||||
return comparison > 0
|
||||
}
|
||||
return comparison < 0
|
||||
})
|
||||
|
||||
// Convert back to []interface{}
|
||||
var sortedData []interface{}
|
||||
for _, record := range records {
|
||||
sortedData = append(sortedData, record)
|
||||
}
|
||||
|
||||
result["data"] = sortedData
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) deduplicateData(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Copy non-array data
|
||||
for key, value := range data {
|
||||
if key != "data" {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
if dataArray, ok := data["data"].([]interface{}); ok {
|
||||
dedupeFields := h.getDedupeFields()
|
||||
seen := make(map[string]bool)
|
||||
var uniqueData []interface{}
|
||||
|
||||
for _, item := range dataArray {
|
||||
if record, ok := item.(map[string]interface{}); ok {
|
||||
key := h.createDedupeKey(record, dedupeFields)
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
uniqueData = append(uniqueData, item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result["data"] = uniqueData
|
||||
result["original_count"] = len(dataArray)
|
||||
result["deduplicated_count"] = len(uniqueData)
|
||||
result["duplicates_removed"] = len(dataArray) - len(uniqueData)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) calculateFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
calculations := h.getCalculations()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for targetField, calc := range calculations {
|
||||
operation := calc["operation"].(string)
|
||||
sourceFields := calc["fields"].([]string)
|
||||
|
||||
switch operation {
|
||||
case "sum":
|
||||
result[targetField] = h.sumFields(data, sourceFields)
|
||||
case "subtract":
|
||||
result[targetField] = h.subtractFields(data, sourceFields)
|
||||
case "multiply":
|
||||
result[targetField] = h.multiplyFields(data, sourceFields)
|
||||
case "divide":
|
||||
result[targetField] = h.divideFields(data, sourceFields)
|
||||
case "average":
|
||||
result[targetField] = h.averageFields(data, sourceFields)
|
||||
case "min":
|
||||
result[targetField] = h.minFields(data, sourceFields)
|
||||
case "max":
|
||||
result[targetField] = h.maxFields(data, sourceFields)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) conditionalSet(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
conditions := h.getConditions()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for targetField, condConfig := range conditions {
|
||||
condition := condConfig["condition"].(string)
|
||||
ifTrue := condConfig["if_true"]
|
||||
ifFalse := condConfig["if_false"]
|
||||
|
||||
if h.evaluateCondition(data, condition) {
|
||||
result[targetField] = ifTrue
|
||||
} else {
|
||||
result[targetField] = ifFalse
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) typeCast(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
castConfig := h.getCastConfig()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for field, targetType := range castConfig {
|
||||
if val, ok := data[field]; ok {
|
||||
result[field] = h.castValue(val, targetType)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) validateFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
validationRules := h.getValidationRules()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
validationResults := make(map[string]interface{})
|
||||
allValid := true
|
||||
|
||||
for field, rules := range validationRules {
|
||||
if val, ok := data[field]; ok {
|
||||
fieldResult := h.validateField(val, rules)
|
||||
validationResults[field] = fieldResult
|
||||
if !fieldResult["valid"].(bool) {
|
||||
allValid = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result["validation_results"] = validationResults
|
||||
result["all_valid"] = allValid
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) normalizeData(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
normalizationType := h.getNormalizationType()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
result[field] = h.normalizeValue(val, normalizationType)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) pivotData(data map[string]any) map[string]any {
|
||||
// Simplified pivot implementation
|
||||
result := make(map[string]any)
|
||||
|
||||
if dataArray, ok := data["data"].([]interface{}); ok {
|
||||
pivotField := h.getPivotField()
|
||||
valueField := h.getValueField()
|
||||
|
||||
pivoted := make(map[string]interface{})
|
||||
|
||||
for _, item := range dataArray {
|
||||
if record, ok := item.(map[string]interface{}); ok {
|
||||
if pivotVal, ok := record[pivotField]; ok {
|
||||
if val, ok := record[valueField]; ok {
|
||||
key := fmt.Sprintf("%v", pivotVal)
|
||||
pivoted[key] = val
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result["pivoted_data"] = pivoted
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) unpivotData(data map[string]any) map[string]any {
|
||||
// Simplified unpivot implementation
|
||||
result := make(map[string]any)
|
||||
unpivotFields := h.getUnpivotFields()
|
||||
|
||||
var unpivotedData []interface{}
|
||||
|
||||
for _, field := range unpivotFields {
|
||||
if val, ok := data[field]; ok {
|
||||
record := map[string]interface{}{
|
||||
"field": field,
|
||||
"value": val,
|
||||
}
|
||||
unpivotedData = append(unpivotedData, record)
|
||||
}
|
||||
}
|
||||
|
||||
result["data"] = unpivotedData
|
||||
result["unpivoted"] = true
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
func (h *DataHandler) compareValues(a, b interface{}) int {
|
||||
if a == nil && b == nil {
|
||||
return 0
|
||||
}
|
||||
if a == nil {
|
||||
return -1
|
||||
}
|
||||
if b == nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Try numeric comparison first
|
||||
if aNum, aOk := toFloat64(a); aOk {
|
||||
if bNum, bOk := toFloat64(b); bOk {
|
||||
if aNum < bNum {
|
||||
return -1
|
||||
} else if aNum > bNum {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to string comparison
|
||||
aStr := fmt.Sprintf("%v", a)
|
||||
bStr := fmt.Sprintf("%v", b)
|
||||
if aStr < bStr {
|
||||
return -1
|
||||
} else if aStr > bStr {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (h *DataHandler) createDedupeKey(record map[string]interface{}, fields []string) string {
|
||||
var keyParts []string
|
||||
for _, field := range fields {
|
||||
keyParts = append(keyParts, fmt.Sprintf("%v", record[field]))
|
||||
}
|
||||
return strings.Join(keyParts, "|")
|
||||
}
|
||||
|
||||
func (h *DataHandler) sumFields(data map[string]any, fields []string) float64 {
|
||||
var sum float64
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
sum += num
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
func (h *DataHandler) subtractFields(data map[string]any, fields []string) float64 {
|
||||
if len(fields) < 2 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var result float64
|
||||
if val, ok := data[fields[0]]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
result = num
|
||||
}
|
||||
}
|
||||
|
||||
for _, field := range fields[1:] {
|
||||
if val, ok := data[field]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
result -= num
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) multiplyFields(data map[string]any, fields []string) float64 {
|
||||
result := 1.0
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
result *= num
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) divideFields(data map[string]any, fields []string) float64 {
|
||||
if len(fields) < 2 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var result float64
|
||||
if val, ok := data[fields[0]]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
result = num
|
||||
}
|
||||
}
|
||||
|
||||
for _, field := range fields[1:] {
|
||||
if val, ok := data[field]; ok {
|
||||
if num, ok := toFloat64(val); ok && num != 0 {
|
||||
result /= num
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) averageFields(data map[string]any, fields []string) float64 {
|
||||
sum := h.sumFields(data, fields)
|
||||
return sum / float64(len(fields))
|
||||
}
|
||||
|
||||
func (h *DataHandler) minFields(data map[string]any, fields []string) float64 {
|
||||
min := math.Inf(1)
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
if num < min {
|
||||
min = num
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
func (h *DataHandler) maxFields(data map[string]any, fields []string) float64 {
|
||||
max := math.Inf(-1)
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if num, ok := toFloat64(val); ok {
|
||||
if num > max {
|
||||
max = num
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func (h *DataHandler) evaluateCondition(data map[string]any, condition string) bool {
|
||||
// Simple condition evaluation - can be extended
|
||||
parts := strings.Fields(condition)
|
||||
if len(parts) >= 3 {
|
||||
field := parts[0]
|
||||
operator := parts[1]
|
||||
value := parts[2]
|
||||
|
||||
if fieldVal, ok := data[field]; ok {
|
||||
switch operator {
|
||||
case "==", "=":
|
||||
return fmt.Sprintf("%v", fieldVal) == value
|
||||
case "!=":
|
||||
return fmt.Sprintf("%v", fieldVal) != value
|
||||
case ">":
|
||||
if fieldNum, ok := toFloat64(fieldVal); ok {
|
||||
if valueNum, ok := toFloat64(value); ok {
|
||||
return fieldNum > valueNum
|
||||
}
|
||||
}
|
||||
case "<":
|
||||
if fieldNum, ok := toFloat64(fieldVal); ok {
|
||||
if valueNum, ok := toFloat64(value); ok {
|
||||
return fieldNum < valueNum
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *DataHandler) castValue(val interface{}, targetType string) interface{} {
|
||||
switch targetType {
|
||||
case "string":
|
||||
return fmt.Sprintf("%v", val)
|
||||
case "int":
|
||||
if num, ok := toFloat64(val); ok {
|
||||
return int(num)
|
||||
}
|
||||
return val
|
||||
case "float":
|
||||
if num, ok := toFloat64(val); ok {
|
||||
return num
|
||||
}
|
||||
return val
|
||||
case "bool":
|
||||
if str, ok := val.(string); ok {
|
||||
return str == "true" || str == "1"
|
||||
}
|
||||
return val
|
||||
default:
|
||||
return val
|
||||
}
|
||||
}
|
||||
|
||||
func (h *DataHandler) validateField(val interface{}, rules map[string]interface{}) map[string]interface{} {
|
||||
result := map[string]interface{}{
|
||||
"valid": true,
|
||||
"errors": []string{},
|
||||
}
|
||||
|
||||
var errors []string
|
||||
|
||||
// Required validation
|
||||
if required, ok := rules["required"].(bool); ok && required {
|
||||
if val == nil || val == "" {
|
||||
errors = append(errors, "field is required")
|
||||
}
|
||||
}
|
||||
|
||||
// Type validation
|
||||
if expectedType, ok := rules["type"].(string); ok {
|
||||
if !h.validateType(val, expectedType) {
|
||||
errors = append(errors, fmt.Sprintf("expected type %s", expectedType))
|
||||
}
|
||||
}
|
||||
|
||||
// Range validation for numbers
|
||||
if minVal, ok := rules["min"]; ok {
|
||||
if num, numOk := toFloat64(val); numOk {
|
||||
if minNum, minOk := toFloat64(minVal); minOk {
|
||||
if num < minNum {
|
||||
errors = append(errors, fmt.Sprintf("value must be >= %v", minVal))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
result["valid"] = false
|
||||
result["errors"] = errors
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) validateType(val interface{}, expectedType string) bool {
|
||||
actualType := reflect.TypeOf(val).String()
|
||||
switch expectedType {
|
||||
case "string":
|
||||
return actualType == "string"
|
||||
case "int", "integer":
|
||||
return actualType == "int" || actualType == "float64"
|
||||
case "float", "number":
|
||||
return actualType == "float64" || actualType == "int"
|
||||
case "bool", "boolean":
|
||||
return actualType == "bool"
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (h *DataHandler) normalizeValue(val interface{}, normType string) interface{} {
|
||||
switch normType {
|
||||
case "lowercase":
|
||||
if str, ok := val.(string); ok {
|
||||
return strings.ToLower(str)
|
||||
}
|
||||
case "uppercase":
|
||||
if str, ok := val.(string); ok {
|
||||
return strings.ToUpper(str)
|
||||
}
|
||||
case "trim":
|
||||
if str, ok := val.(string); ok {
|
||||
return strings.TrimSpace(str)
|
||||
}
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
func toFloat64(val interface{}) (float64, bool) {
|
||||
switch v := val.(type) {
|
||||
case float64:
|
||||
return v, true
|
||||
case int:
|
||||
return float64(v), true
|
||||
case int64:
|
||||
return float64(v), true
|
||||
case string:
|
||||
if f, err := strconv.ParseFloat(v, 64); err == nil {
|
||||
return f, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// Configuration getters
|
||||
func (h *DataHandler) getSortField() string {
|
||||
if field, ok := h.Payload.Data["sort_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *DataHandler) getSortOrder() string {
|
||||
if order, ok := h.Payload.Data["sort_order"].(string); ok {
|
||||
return order
|
||||
}
|
||||
return "asc"
|
||||
}
|
||||
|
||||
func (h *DataHandler) getDedupeFields() []string {
|
||||
if fields, ok := h.Payload.Data["dedupe_fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *DataHandler) getCalculations() map[string]map[string]interface{} {
|
||||
result := make(map[string]map[string]interface{})
|
||||
if calc, ok := h.Payload.Data["calculations"].(map[string]interface{}); ok {
|
||||
for key, value := range calc {
|
||||
if calcMap, ok := value.(map[string]interface{}); ok {
|
||||
result[key] = calcMap
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) getConditions() map[string]map[string]interface{} {
|
||||
result := make(map[string]map[string]interface{})
|
||||
if cond, ok := h.Payload.Data["conditions"].(map[string]interface{}); ok {
|
||||
for key, value := range cond {
|
||||
if condMap, ok := value.(map[string]interface{}); ok {
|
||||
result[key] = condMap
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) getCastConfig() map[string]string {
|
||||
result := make(map[string]string)
|
||||
if cast, ok := h.Payload.Data["cast"].(map[string]interface{}); ok {
|
||||
for key, value := range cast {
|
||||
if str, ok := value.(string); ok {
|
||||
result[key] = str
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) getValidationRules() map[string]map[string]interface{} {
|
||||
result := make(map[string]map[string]interface{})
|
||||
if rules, ok := h.Payload.Data["validation_rules"].(map[string]interface{}); ok {
|
||||
for key, value := range rules {
|
||||
if ruleMap, ok := value.(map[string]interface{}); ok {
|
||||
result[key] = ruleMap
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *DataHandler) getTargetFields() []string {
|
||||
if fields, ok := h.Payload.Data["fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *DataHandler) getNormalizationType() string {
|
||||
if normType, ok := h.Payload.Data["normalize_type"].(string); ok {
|
||||
return normType
|
||||
}
|
||||
return "trim"
|
||||
}
|
||||
|
||||
func (h *DataHandler) getPivotField() string {
|
||||
if field, ok := h.Payload.Data["pivot_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *DataHandler) getValueField() string {
|
||||
if field, ok := h.Payload.Data["value_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *DataHandler) getUnpivotFields() []string {
|
||||
if fields, ok := h.Payload.Data["unpivot_fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewDataHandler(id string) *DataHandler {
|
||||
return &DataHandler{
|
||||
Operation: dag.Operation{ID: id, Key: "data", Type: dag.Function, Tags: []string{"data", "transformation", "misc"}},
|
||||
}
|
||||
}
|
@@ -1,765 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// DataTransformationHandler provides comprehensive data transformation capabilities
|
||||
type DataTransformationHandler struct {
|
||||
dag.Operation
|
||||
Transformations []DataTransformation `json:"transformations"` // list of transformations to apply
|
||||
}
|
||||
|
||||
type DataTransformation struct {
|
||||
Name string `json:"name"` // transformation name/identifier
|
||||
Type string `json:"type"` // transformation type
|
||||
SourceField string `json:"source_field"` // source field (can be empty for data-wide operations)
|
||||
TargetField string `json:"target_field"` // target field (can be empty to overwrite source)
|
||||
Config map[string]any `json:"config"` // transformation configuration
|
||||
Condition *TransformCondition `json:"condition"` // optional condition for when to apply
|
||||
}
|
||||
|
||||
type TransformCondition struct {
|
||||
Field string `json:"field"` // field to check
|
||||
Operator string `json:"operator"` // eq, ne, gt, lt, ge, le, contains, regex
|
||||
Value any `json:"value"` // value to compare against
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Apply transformations in sequence
|
||||
for i, transformation := range d.Transformations {
|
||||
// Check condition if specified
|
||||
if transformation.Condition != nil {
|
||||
if !d.evaluateCondition(data, transformation.Condition) {
|
||||
continue // skip this transformation
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
data, err = d.applyTransformation(data, transformation)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("transformation %d (%s) failed: %v", i+1, transformation.Name, err), Ctx: ctx}
|
||||
}
|
||||
}
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) evaluateCondition(data map[string]any, condition *TransformCondition) bool {
|
||||
fieldValue, exists := data[condition.Field]
|
||||
if !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
switch condition.Operator {
|
||||
case "eq":
|
||||
return fmt.Sprintf("%v", fieldValue) == fmt.Sprintf("%v", condition.Value)
|
||||
case "ne":
|
||||
return fmt.Sprintf("%v", fieldValue) != fmt.Sprintf("%v", condition.Value)
|
||||
case "gt":
|
||||
return d.compareNumeric(fieldValue, condition.Value) > 0
|
||||
case "lt":
|
||||
return d.compareNumeric(fieldValue, condition.Value) < 0
|
||||
case "ge":
|
||||
return d.compareNumeric(fieldValue, condition.Value) >= 0
|
||||
case "le":
|
||||
return d.compareNumeric(fieldValue, condition.Value) <= 0
|
||||
case "contains":
|
||||
return strings.Contains(fmt.Sprintf("%v", fieldValue), fmt.Sprintf("%v", condition.Value))
|
||||
case "regex":
|
||||
// Basic regex support - in production, use proper regex library
|
||||
return strings.Contains(fmt.Sprintf("%v", fieldValue), fmt.Sprintf("%v", condition.Value))
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) compareNumeric(a, b any) int {
|
||||
aFloat := d.toFloat64(a)
|
||||
bFloat := d.toFloat64(b)
|
||||
|
||||
if aFloat < bFloat {
|
||||
return -1
|
||||
} else if aFloat > bFloat {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) applyTransformation(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
switch transformation.Type {
|
||||
case "normalize":
|
||||
return d.normalizeData(data, transformation)
|
||||
case "aggregate":
|
||||
return d.aggregateData(data, transformation)
|
||||
case "pivot":
|
||||
return d.pivotData(data, transformation)
|
||||
case "unpivot":
|
||||
return d.unpivotData(data, transformation)
|
||||
case "calculate":
|
||||
return d.calculateField(data, transformation)
|
||||
case "lookup":
|
||||
return d.lookupTransform(data, transformation)
|
||||
case "bucket":
|
||||
return d.bucketize(data, transformation)
|
||||
case "rank":
|
||||
return d.rankData(data, transformation)
|
||||
case "window":
|
||||
return d.windowFunction(data, transformation)
|
||||
case "encode":
|
||||
return d.encodeData(data, transformation)
|
||||
case "decode":
|
||||
return d.decodeData(data, transformation)
|
||||
case "validate":
|
||||
return d.validateData(data, transformation)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported transformation type: %s", transformation.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) normalizeData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
sourceValue := data[transformation.SourceField]
|
||||
normalizeType, _ := transformation.Config["type"].(string)
|
||||
|
||||
var normalized any
|
||||
var err error
|
||||
|
||||
switch normalizeType {
|
||||
case "min_max":
|
||||
normalized, err = d.minMaxNormalize(sourceValue, transformation.Config)
|
||||
case "z_score":
|
||||
normalized, err = d.zScoreNormalize(sourceValue, transformation.Config)
|
||||
case "unit_vector":
|
||||
normalized, err = d.unitVectorNormalize(sourceValue, transformation.Config)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported normalization type: %s", normalizeType)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
targetField := transformation.TargetField
|
||||
if targetField == "" {
|
||||
targetField = transformation.SourceField
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
result[targetField] = normalized
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) minMaxNormalize(value any, config map[string]any) (float64, error) {
|
||||
num := d.toFloat64(value)
|
||||
min, _ := config["min"].(float64)
|
||||
max, _ := config["max"].(float64)
|
||||
|
||||
if max == min {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return (num - min) / (max - min), nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) zScoreNormalize(value any, config map[string]any) (float64, error) {
|
||||
num := d.toFloat64(value)
|
||||
mean, _ := config["mean"].(float64)
|
||||
stdDev, _ := config["std_dev"].(float64)
|
||||
|
||||
if stdDev == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return (num - mean) / stdDev, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) unitVectorNormalize(value any, config map[string]any) (float64, error) {
|
||||
num := d.toFloat64(value)
|
||||
magnitude, _ := config["magnitude"].(float64)
|
||||
|
||||
if magnitude == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return num / magnitude, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) calculateField(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
expression, _ := transformation.Config["expression"].(string)
|
||||
|
||||
// Simple expression evaluator - in production, use a proper expression library
|
||||
result, err := d.evaluateExpression(expression, data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
targetField := transformation.TargetField
|
||||
if targetField == "" {
|
||||
return nil, fmt.Errorf("target field is required for calculate transformation")
|
||||
}
|
||||
|
||||
resultData := make(map[string]any)
|
||||
for k, v := range data {
|
||||
resultData[k] = v
|
||||
}
|
||||
resultData[targetField] = result
|
||||
|
||||
return resultData, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) evaluateExpression(expression string, data map[string]any) (any, error) {
|
||||
// Basic expression evaluation - replace with proper expression evaluator
|
||||
// This is a simplified implementation for common cases
|
||||
|
||||
expression = strings.TrimSpace(expression)
|
||||
|
||||
// Handle simple field references
|
||||
if value, exists := data[expression]; exists {
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// Handle simple arithmetic operations
|
||||
if strings.Contains(expression, "+") {
|
||||
parts := strings.Split(expression, "+")
|
||||
if len(parts) == 2 {
|
||||
left := strings.TrimSpace(parts[0])
|
||||
right := strings.TrimSpace(parts[1])
|
||||
|
||||
leftVal := d.getValueOrNumber(left, data)
|
||||
rightVal := d.getValueOrNumber(right, data)
|
||||
|
||||
return d.toFloat64(leftVal) + d.toFloat64(rightVal), nil
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(expression, "-") {
|
||||
parts := strings.Split(expression, "-")
|
||||
if len(parts) == 2 {
|
||||
left := strings.TrimSpace(parts[0])
|
||||
right := strings.TrimSpace(parts[1])
|
||||
|
||||
leftVal := d.getValueOrNumber(left, data)
|
||||
rightVal := d.getValueOrNumber(right, data)
|
||||
|
||||
return d.toFloat64(leftVal) - d.toFloat64(rightVal), nil
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(expression, "*") {
|
||||
parts := strings.Split(expression, "*")
|
||||
if len(parts) == 2 {
|
||||
left := strings.TrimSpace(parts[0])
|
||||
right := strings.TrimSpace(parts[1])
|
||||
|
||||
leftVal := d.getValueOrNumber(left, data)
|
||||
rightVal := d.getValueOrNumber(right, data)
|
||||
|
||||
return d.toFloat64(leftVal) * d.toFloat64(rightVal), nil
|
||||
}
|
||||
}
|
||||
|
||||
if strings.Contains(expression, "/") {
|
||||
parts := strings.Split(expression, "/")
|
||||
if len(parts) == 2 {
|
||||
left := strings.TrimSpace(parts[0])
|
||||
right := strings.TrimSpace(parts[1])
|
||||
|
||||
leftVal := d.getValueOrNumber(left, data)
|
||||
rightVal := d.toFloat64(d.getValueOrNumber(right, data))
|
||||
|
||||
if rightVal == 0 {
|
||||
return nil, fmt.Errorf("division by zero")
|
||||
}
|
||||
|
||||
return d.toFloat64(leftVal) / rightVal, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unable to evaluate expression: %s", expression)
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) getValueOrNumber(str string, data map[string]any) any {
|
||||
// Check if it's a field reference
|
||||
if value, exists := data[str]; exists {
|
||||
return value
|
||||
}
|
||||
|
||||
// Try to parse as number
|
||||
if num, err := strconv.ParseFloat(str, 64); err == nil {
|
||||
return num
|
||||
}
|
||||
|
||||
// Return as string
|
||||
return str
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) bucketize(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
sourceValue := data[transformation.SourceField]
|
||||
buckets, _ := transformation.Config["buckets"].([]any)
|
||||
labels, _ := transformation.Config["labels"].([]any)
|
||||
|
||||
num := d.toFloat64(sourceValue)
|
||||
|
||||
// Find the appropriate bucket
|
||||
var bucketIndex int = -1
|
||||
for i, bucket := range buckets {
|
||||
if bucketVal := d.toFloat64(bucket); num <= bucketVal {
|
||||
bucketIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var result any
|
||||
if bucketIndex >= 0 && bucketIndex < len(labels) {
|
||||
result = labels[bucketIndex]
|
||||
} else {
|
||||
result = "out_of_range"
|
||||
}
|
||||
|
||||
targetField := transformation.TargetField
|
||||
if targetField == "" {
|
||||
targetField = transformation.SourceField
|
||||
}
|
||||
|
||||
resultData := make(map[string]any)
|
||||
for k, v := range data {
|
||||
resultData[k] = v
|
||||
}
|
||||
resultData[targetField] = result
|
||||
|
||||
return resultData, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) encodeData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
sourceValue := data[transformation.SourceField]
|
||||
encodingType, _ := transformation.Config["type"].(string)
|
||||
|
||||
var encoded any
|
||||
var err error
|
||||
|
||||
switch encodingType {
|
||||
case "one_hot":
|
||||
encoded, err = d.oneHotEncode(sourceValue, transformation.Config)
|
||||
case "label":
|
||||
encoded, err = d.labelEncode(sourceValue, transformation.Config)
|
||||
case "ordinal":
|
||||
encoded, err = d.ordinalEncode(sourceValue, transformation.Config)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported encoding type: %s", encodingType)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
targetField := transformation.TargetField
|
||||
if targetField == "" {
|
||||
targetField = transformation.SourceField
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
result[targetField] = encoded
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) oneHotEncode(value any, config map[string]any) (map[string]any, error) {
|
||||
categories, _ := config["categories"].([]any)
|
||||
valueStr := fmt.Sprintf("%v", value)
|
||||
|
||||
result := make(map[string]any)
|
||||
for _, category := range categories {
|
||||
categoryStr := fmt.Sprintf("%v", category)
|
||||
if valueStr == categoryStr {
|
||||
result[categoryStr] = 1
|
||||
} else {
|
||||
result[categoryStr] = 0
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) labelEncode(value any, config map[string]any) (int, error) {
|
||||
mapping, _ := config["mapping"].(map[string]any)
|
||||
valueStr := fmt.Sprintf("%v", value)
|
||||
|
||||
if encoded, exists := mapping[valueStr]; exists {
|
||||
return int(d.toFloat64(encoded)), nil
|
||||
}
|
||||
|
||||
return -1, fmt.Errorf("value '%s' not found in encoding mapping", valueStr)
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) ordinalEncode(value any, config map[string]any) (int, error) {
|
||||
order, _ := config["order"].([]any)
|
||||
valueStr := fmt.Sprintf("%v", value)
|
||||
|
||||
for i, item := range order {
|
||||
if fmt.Sprintf("%v", item) == valueStr {
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
|
||||
return -1, fmt.Errorf("value '%s' not found in ordinal order", valueStr)
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) aggregateData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// This is a simplified version - for complex aggregations, use GroupingHandler
|
||||
aggregationType, _ := transformation.Config["type"].(string)
|
||||
sourceField := transformation.SourceField
|
||||
|
||||
// Assume source field contains an array of values
|
||||
sourceValue, exists := data[sourceField]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("source field '%s' not found", sourceField)
|
||||
}
|
||||
|
||||
values := d.extractNumbers(sourceValue)
|
||||
if len(values) == 0 {
|
||||
return nil, fmt.Errorf("no numeric values found in source field")
|
||||
}
|
||||
|
||||
var result float64
|
||||
|
||||
switch aggregationType {
|
||||
case "sum":
|
||||
for _, v := range values {
|
||||
result += v
|
||||
}
|
||||
case "avg", "mean":
|
||||
for _, v := range values {
|
||||
result += v
|
||||
}
|
||||
result /= float64(len(values))
|
||||
case "min":
|
||||
result = values[0]
|
||||
for _, v := range values {
|
||||
if v < result {
|
||||
result = v
|
||||
}
|
||||
}
|
||||
case "max":
|
||||
result = values[0]
|
||||
for _, v := range values {
|
||||
if v > result {
|
||||
result = v
|
||||
}
|
||||
}
|
||||
case "std":
|
||||
// Calculate standard deviation
|
||||
mean := 0.0
|
||||
for _, v := range values {
|
||||
mean += v
|
||||
}
|
||||
mean /= float64(len(values))
|
||||
|
||||
variance := 0.0
|
||||
for _, v := range values {
|
||||
variance += math.Pow(v-mean, 2)
|
||||
}
|
||||
variance /= float64(len(values))
|
||||
result = math.Sqrt(variance)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported aggregation type: %s", aggregationType)
|
||||
}
|
||||
|
||||
targetField := transformation.TargetField
|
||||
if targetField == "" {
|
||||
targetField = sourceField
|
||||
}
|
||||
|
||||
resultData := make(map[string]any)
|
||||
for k, v := range data {
|
||||
resultData[k] = v
|
||||
}
|
||||
resultData[targetField] = result
|
||||
|
||||
return resultData, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) extractNumbers(value any) []float64 {
|
||||
var numbers []float64
|
||||
|
||||
rv := reflect.ValueOf(value)
|
||||
if rv.Kind() == reflect.Slice || rv.Kind() == reflect.Array {
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
if num := d.toFloat64(rv.Index(i).Interface()); num != 0 {
|
||||
numbers = append(numbers, num)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if num := d.toFloat64(value); num != 0 {
|
||||
numbers = append(numbers, num)
|
||||
}
|
||||
}
|
||||
|
||||
return numbers
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) rankData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// For ranking, we need the data to contain an array of items
|
||||
arrayField, _ := transformation.Config["array_field"].(string)
|
||||
rankField := transformation.SourceField
|
||||
|
||||
arrayData, exists := data[arrayField]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("array field '%s' not found", arrayField)
|
||||
}
|
||||
|
||||
// Convert to slice and extract values for ranking
|
||||
rv := reflect.ValueOf(arrayData)
|
||||
if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array {
|
||||
return nil, fmt.Errorf("array field must contain an array")
|
||||
}
|
||||
|
||||
type rankItem struct {
|
||||
index int
|
||||
value float64
|
||||
}
|
||||
|
||||
var items []rankItem
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
item := rv.Index(i).Interface()
|
||||
if itemMap, ok := item.(map[string]any); ok {
|
||||
if val, exists := itemMap[rankField]; exists {
|
||||
items = append(items, rankItem{
|
||||
index: i,
|
||||
value: d.toFloat64(val),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by value
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
return items[i].value > items[j].value // descending order
|
||||
})
|
||||
|
||||
// Assign ranks
|
||||
ranks := make(map[int]int)
|
||||
for rank, item := range items {
|
||||
ranks[item.index] = rank + 1
|
||||
}
|
||||
|
||||
// Update the original data with ranks
|
||||
targetField := transformation.TargetField
|
||||
if targetField == "" {
|
||||
targetField = rankField + "_rank"
|
||||
}
|
||||
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
item := rv.Index(i).Interface()
|
||||
if itemMap, ok := item.(map[string]any); ok {
|
||||
itemMap[targetField] = ranks[i]
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) pivotData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// Pivot transformation implementation
|
||||
pivotField, _ := transformation.Config["pivot_field"].(string)
|
||||
valueField, _ := transformation.Config["value_field"].(string)
|
||||
|
||||
if pivotField == "" || valueField == "" {
|
||||
return nil, fmt.Errorf("pivot_field and value_field are required")
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for key, value := range data {
|
||||
if key == pivotField {
|
||||
result[fmt.Sprintf("%v", value)] = data[valueField]
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) unpivotData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// Unpivot transformation implementation
|
||||
unpivotFields, _ := transformation.Config["fields"].([]string)
|
||||
if len(unpivotFields) == 0 {
|
||||
return nil, fmt.Errorf("fields for unpivoting are required")
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for _, field := range unpivotFields {
|
||||
if value, exists := data[field]; exists {
|
||||
result[field] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) lookupTransform(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// Lookup transformation implementation
|
||||
lookupTable, _ := transformation.Config["lookup_table"].(map[string]any)
|
||||
lookupKey, _ := transformation.Config["lookup_key"].(string)
|
||||
|
||||
if lookupTable == nil || lookupKey == "" {
|
||||
return nil, fmt.Errorf("lookup_table and lookup_key are required")
|
||||
}
|
||||
|
||||
lookupValue := data[lookupKey]
|
||||
if result, exists := lookupTable[fmt.Sprintf("%v", lookupValue)]; exists {
|
||||
return map[string]any{lookupKey: result}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("lookup value not found")
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) windowFunction(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// Window function transformation implementation
|
||||
windowField, _ := transformation.Config["window_field"].(string)
|
||||
operation, _ := transformation.Config["operation"].(string)
|
||||
|
||||
if windowField == "" || operation == "" {
|
||||
return nil, fmt.Errorf("window_field and operation are required")
|
||||
}
|
||||
|
||||
values := d.extractNumbers(data[windowField])
|
||||
if len(values) == 0 {
|
||||
return nil, fmt.Errorf("no numeric values found in window_field")
|
||||
}
|
||||
|
||||
var result float64
|
||||
switch operation {
|
||||
case "sum":
|
||||
for _, v := range values {
|
||||
result += v
|
||||
}
|
||||
case "avg":
|
||||
for _, v := range values {
|
||||
result += v
|
||||
}
|
||||
result /= float64(len(values))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported window operation: %s", operation)
|
||||
}
|
||||
|
||||
return map[string]any{windowField: result}, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) decodeData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// Data decoding implementation
|
||||
encodingType, _ := transformation.Config["type"].(string)
|
||||
|
||||
if encodingType == "" {
|
||||
return nil, fmt.Errorf("encoding type is required")
|
||||
}
|
||||
|
||||
sourceValue := data[transformation.SourceField]
|
||||
var decoded any
|
||||
var err error
|
||||
|
||||
switch encodingType {
|
||||
case "base64":
|
||||
decoded, err = d.decodeBase64(fmt.Sprintf("%v", sourceValue))
|
||||
case "hex":
|
||||
decoded, err = d.decodeHex(fmt.Sprintf("%v", sourceValue))
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported decoding type: %s", encodingType)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return map[string]any{transformation.TargetField: decoded}, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) decodeBase64(value string) (string, error) {
|
||||
decoded, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("%v", decoded), nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) decodeHex(value string) (string, error) {
|
||||
decoded, err := strconv.ParseFloat(value, 64)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("%v", decoded), nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) validateData(data map[string]any, transformation DataTransformation) (map[string]any, error) {
|
||||
// Data validation implementation
|
||||
validationRules, _ := transformation.Config["rules"].([]map[string]any)
|
||||
|
||||
if len(validationRules) == 0 {
|
||||
return nil, fmt.Errorf("validation rules are required")
|
||||
}
|
||||
|
||||
for _, rule := range validationRules {
|
||||
field, _ := rule["field"].(string)
|
||||
operator, _ := rule["operator"].(string)
|
||||
value := rule["value"]
|
||||
|
||||
if !d.evaluateCondition(data, &TransformCondition{Field: field, Operator: operator, Value: value}) {
|
||||
return nil, fmt.Errorf("validation failed for field: %s", field)
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (d *DataTransformationHandler) toFloat64(value any) float64 {
|
||||
switch v := value.(type) {
|
||||
case int:
|
||||
return float64(v)
|
||||
case int32:
|
||||
return float64(v)
|
||||
case int64:
|
||||
return float64(v)
|
||||
case float32:
|
||||
return float64(v)
|
||||
case float64:
|
||||
return v
|
||||
case string:
|
||||
if num, err := strconv.ParseFloat(v, 64); err == nil {
|
||||
return num
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Factory function
|
||||
func NewDataTransformationHandler(id string, transformations []DataTransformation) *DataTransformationHandler {
|
||||
return &DataTransformationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "data_transformation",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "transformation", "advanced"},
|
||||
},
|
||||
Transformations: transformations,
|
||||
}
|
||||
}
|
@@ -1,494 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// DataUtilsHandler provides utility functions for common data operations
|
||||
type DataUtilsHandler struct {
|
||||
dag.Operation
|
||||
UtilityType string `json:"utility_type"` // type of utility operation
|
||||
Config map[string]any `json:"config"` // operation configuration
|
||||
}
|
||||
|
||||
// Utility operation types:
|
||||
// - "deduplicate": Remove duplicate entries from arrays or objects
|
||||
// - "merge": Merge multiple objects or arrays
|
||||
// - "diff": Compare two data structures and return differences
|
||||
// - "sort": Sort arrays or object keys
|
||||
// - "reverse": Reverse arrays or strings
|
||||
// - "sample": Take a sample of data
|
||||
// - "validate_schema": Validate data against a schema
|
||||
// - "convert_types": Convert data types in bulk
|
||||
|
||||
func (d *DataUtilsHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
}
|
||||
|
||||
var result map[string]any
|
||||
var err error
|
||||
|
||||
switch d.UtilityType {
|
||||
case "deduplicate":
|
||||
result, err = d.deduplicate(data)
|
||||
case "merge":
|
||||
result, err = d.merge(data)
|
||||
case "diff":
|
||||
result, err = d.diff(data)
|
||||
case "sort":
|
||||
result, err = d.sort(data)
|
||||
case "reverse":
|
||||
result, err = d.reverse(data)
|
||||
case "sample":
|
||||
result, err = d.sample(data)
|
||||
case "validate_schema":
|
||||
result, err = d.validateSchema(data)
|
||||
case "convert_types":
|
||||
result, err = d.convertTypes(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported utility type: %s", d.UtilityType), Ctx: ctx}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return mq.Result{Error: err, Ctx: ctx}
|
||||
}
|
||||
|
||||
bt, _ := json.Marshal(result)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) deduplicate(data map[string]any) (map[string]any, error) {
|
||||
sourceField, _ := d.Config["source_field"].(string)
|
||||
targetField, _ := d.Config["target_field"].(string)
|
||||
dedupeBy, _ := d.Config["dedupe_by"].(string) // field to dedupe by, or empty for exact match
|
||||
|
||||
if targetField == "" {
|
||||
targetField = sourceField
|
||||
}
|
||||
|
||||
sourceData, exists := data[sourceField]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("source field '%s' not found", sourceField)
|
||||
}
|
||||
|
||||
// Implementation depends on data type
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Basic deduplication logic - can be extended
|
||||
if arrayData, ok := sourceData.([]any); ok {
|
||||
seen := make(map[string]bool)
|
||||
var dedupedArray []any
|
||||
|
||||
for _, item := range arrayData {
|
||||
var key string
|
||||
if dedupeBy != "" {
|
||||
// Dedupe by specific field
|
||||
if itemMap, ok := item.(map[string]any); ok {
|
||||
key = fmt.Sprintf("%v", itemMap[dedupeBy])
|
||||
}
|
||||
} else {
|
||||
// Dedupe by entire item
|
||||
key = fmt.Sprintf("%v", item)
|
||||
}
|
||||
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
dedupedArray = append(dedupedArray, item)
|
||||
}
|
||||
}
|
||||
|
||||
result[targetField] = dedupedArray
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) merge(data map[string]any) (map[string]any, error) {
|
||||
sourceFields, _ := d.Config["source_fields"].([]any)
|
||||
targetField, _ := d.Config["target_field"].(string)
|
||||
mergeStrategy, _ := d.Config["strategy"].(string) // "overwrite", "append", "combine"
|
||||
|
||||
if len(sourceFields) < 2 {
|
||||
return nil, fmt.Errorf("at least 2 source fields required for merge")
|
||||
}
|
||||
|
||||
var mergedResult any
|
||||
|
||||
switch mergeStrategy {
|
||||
case "overwrite":
|
||||
// Merge objects by overwriting keys
|
||||
merged := make(map[string]any)
|
||||
for _, fieldName := range sourceFields {
|
||||
if field, ok := fieldName.(string); ok {
|
||||
if fieldData, exists := data[field]; exists {
|
||||
if fieldMap, ok := fieldData.(map[string]any); ok {
|
||||
for k, v := range fieldMap {
|
||||
merged[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedResult = merged
|
||||
|
||||
case "append":
|
||||
// Merge arrays by appending
|
||||
var merged []any
|
||||
for _, fieldName := range sourceFields {
|
||||
if field, ok := fieldName.(string); ok {
|
||||
if fieldData, exists := data[field]; exists {
|
||||
if fieldArray, ok := fieldData.([]any); ok {
|
||||
merged = append(merged, fieldArray...)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedResult = merged
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported merge strategy: %s", mergeStrategy)
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
result[targetField] = mergedResult
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) diff(data map[string]any) (map[string]any, error) {
|
||||
field1, _ := d.Config["first_field"].(string)
|
||||
field2, _ := d.Config["second_field"].(string)
|
||||
targetField, _ := d.Config["target_field"].(string)
|
||||
|
||||
data1, exists1 := data[field1]
|
||||
data2, exists2 := data[field2]
|
||||
|
||||
if !exists1 || !exists2 {
|
||||
return nil, fmt.Errorf("both comparison fields must exist")
|
||||
}
|
||||
|
||||
// Basic diff implementation
|
||||
diffResult := map[string]any{
|
||||
"equal": fmt.Sprintf("%v", data1) == fmt.Sprintf("%v", data2),
|
||||
"first_only": d.findUniqueElements(data1, data2),
|
||||
"second_only": d.findUniqueElements(data2, data1),
|
||||
"common": d.findCommonElements(data1, data2),
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
result[targetField] = diffResult
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) findUniqueElements(data1, data2 any) []any {
|
||||
// Simplified implementation for arrays
|
||||
if array1, ok := data1.([]any); ok {
|
||||
if array2, ok := data2.([]any); ok {
|
||||
set2 := make(map[string]bool)
|
||||
for _, item := range array2 {
|
||||
set2[fmt.Sprintf("%v", item)] = true
|
||||
}
|
||||
|
||||
var unique []any
|
||||
for _, item := range array1 {
|
||||
if !set2[fmt.Sprintf("%v", item)] {
|
||||
unique = append(unique, item)
|
||||
}
|
||||
}
|
||||
return unique
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) findCommonElements(data1, data2 any) []any {
|
||||
// Simplified implementation for arrays
|
||||
if array1, ok := data1.([]any); ok {
|
||||
if array2, ok := data2.([]any); ok {
|
||||
set2 := make(map[string]bool)
|
||||
for _, item := range array2 {
|
||||
set2[fmt.Sprintf("%v", item)] = true
|
||||
}
|
||||
|
||||
var common []any
|
||||
seen := make(map[string]bool)
|
||||
for _, item := range array1 {
|
||||
key := fmt.Sprintf("%v", item)
|
||||
if set2[key] && !seen[key] {
|
||||
common = append(common, item)
|
||||
seen[key] = true
|
||||
}
|
||||
}
|
||||
return common
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) sort(data map[string]any) (map[string]any, error) {
|
||||
sourceField, _ := d.Config["source_field"].(string)
|
||||
targetField, _ := d.Config["target_field"].(string)
|
||||
// sortBy, _ := d.Config["sort_by"].(string)
|
||||
// direction, _ := d.Config["direction"].(string) // "asc" or "desc"
|
||||
|
||||
if targetField == "" {
|
||||
targetField = sourceField
|
||||
}
|
||||
|
||||
sourceData, exists := data[sourceField]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("source field '%s' not found", sourceField)
|
||||
}
|
||||
|
||||
// Basic sorting implementation
|
||||
// For production, use more sophisticated sorting
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// This is a placeholder - implement proper sorting based on data type
|
||||
result[targetField] = sourceData
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) reverse(data map[string]any) (map[string]any, error) {
|
||||
sourceField, _ := d.Config["source_field"].(string)
|
||||
targetField, _ := d.Config["target_field"].(string)
|
||||
|
||||
if targetField == "" {
|
||||
targetField = sourceField
|
||||
}
|
||||
|
||||
sourceData, exists := data[sourceField]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("source field '%s' not found", sourceField)
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Reverse arrays
|
||||
if arrayData, ok := sourceData.([]any); ok {
|
||||
reversed := make([]any, len(arrayData))
|
||||
for i, item := range arrayData {
|
||||
reversed[len(arrayData)-1-i] = item
|
||||
}
|
||||
result[targetField] = reversed
|
||||
} else if strData, ok := sourceData.(string); ok {
|
||||
// Reverse strings
|
||||
runes := []rune(strData)
|
||||
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
|
||||
runes[i], runes[j] = runes[j], runes[i]
|
||||
}
|
||||
result[targetField] = string(runes)
|
||||
} else {
|
||||
result[targetField] = sourceData
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) sample(data map[string]any) (map[string]any, error) {
|
||||
sourceField, _ := d.Config["source_field"].(string)
|
||||
targetField, _ := d.Config["target_field"].(string)
|
||||
sampleSize, _ := d.Config["sample_size"].(float64)
|
||||
|
||||
if targetField == "" {
|
||||
targetField = sourceField
|
||||
}
|
||||
|
||||
sourceData, exists := data[sourceField]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("source field '%s' not found", sourceField)
|
||||
}
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Basic sampling for arrays
|
||||
if arrayData, ok := sourceData.([]any); ok {
|
||||
size := int(sampleSize)
|
||||
if size > len(arrayData) {
|
||||
size = len(arrayData)
|
||||
}
|
||||
|
||||
if size <= 0 {
|
||||
result[targetField] = []any{}
|
||||
} else if size >= len(arrayData) {
|
||||
result[targetField] = arrayData
|
||||
} else {
|
||||
// Simple sampling - take first N elements
|
||||
// For production, implement proper random sampling
|
||||
sample := make([]any, size)
|
||||
copy(sample, arrayData[:size])
|
||||
result[targetField] = sample
|
||||
}
|
||||
} else {
|
||||
result[targetField] = sourceData
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) validateSchema(data map[string]any) (map[string]any, error) {
|
||||
// Basic schema validation placeholder
|
||||
// For production, implement proper JSON schema validation
|
||||
sourceField, _ := d.Config["source_field"].(string)
|
||||
schema, _ := d.Config["schema"].(map[string]any)
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Placeholder validation result
|
||||
result["validation_result"] = map[string]any{
|
||||
"valid": true,
|
||||
"errors": []string{},
|
||||
"schema": schema,
|
||||
"data": data[sourceField],
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) convertTypes(data map[string]any) (map[string]any, error) {
|
||||
conversions, _ := d.Config["conversions"].(map[string]any)
|
||||
|
||||
result := make(map[string]any)
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Apply type conversions
|
||||
for field, targetType := range conversions {
|
||||
if value, exists := result[field]; exists {
|
||||
converted, err := d.convertType(value, fmt.Sprintf("%v", targetType))
|
||||
if err == nil {
|
||||
result[field] = converted
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) convertType(value any, targetType string) (any, error) {
|
||||
switch targetType {
|
||||
case "string":
|
||||
return fmt.Sprintf("%v", value), nil
|
||||
case "int":
|
||||
if num := d.toFloat64(value); num != 0 {
|
||||
return int(num), nil
|
||||
}
|
||||
return 0, nil
|
||||
case "float":
|
||||
return d.toFloat64(value), nil
|
||||
case "bool":
|
||||
str := fmt.Sprintf("%v", value)
|
||||
return str == "true" || str == "1" || str == "yes", nil
|
||||
default:
|
||||
return value, fmt.Errorf("unsupported target type: %s", targetType)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DataUtilsHandler) toFloat64(value any) float64 {
|
||||
switch v := value.(type) {
|
||||
case int:
|
||||
return float64(v)
|
||||
case int32:
|
||||
return float64(v)
|
||||
case int64:
|
||||
return float64(v)
|
||||
case float32:
|
||||
return float64(v)
|
||||
case float64:
|
||||
return v
|
||||
case string:
|
||||
var result float64
|
||||
if n, err := fmt.Sscanf(v, "%f", &result); err == nil && n == 1 {
|
||||
return result
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Factory functions for common utilities
|
||||
func NewDeduplicateHandler(id, sourceField, targetField, dedupeBy string) *DataUtilsHandler {
|
||||
return &DataUtilsHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "deduplicate_data",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "utils", "deduplicate"},
|
||||
},
|
||||
UtilityType: "deduplicate",
|
||||
Config: map[string]any{
|
||||
"source_field": sourceField,
|
||||
"target_field": targetField,
|
||||
"dedupe_by": dedupeBy,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewMergeHandler(id string, sourceFields []string, targetField, strategy string) *DataUtilsHandler {
|
||||
var anyFields []any
|
||||
for _, field := range sourceFields {
|
||||
anyFields = append(anyFields, field)
|
||||
}
|
||||
|
||||
return &DataUtilsHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "merge_data",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "utils", "merge"},
|
||||
},
|
||||
UtilityType: "merge",
|
||||
Config: map[string]any{
|
||||
"source_fields": anyFields,
|
||||
"target_field": targetField,
|
||||
"strategy": strategy,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewDataDiffHandler(id, field1, field2, targetField string) *DataUtilsHandler {
|
||||
return &DataUtilsHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "diff_data",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "utils", "diff"},
|
||||
},
|
||||
UtilityType: "diff",
|
||||
Config: map[string]any{
|
||||
"first_field": field1,
|
||||
"second_field": field2,
|
||||
"target_field": targetField,
|
||||
},
|
||||
}
|
||||
}
|
208
handlers/examples.go
Normal file
208
handlers/examples.go
Normal file
@@ -0,0 +1,208 @@
|
||||
package handlers
|
||||
|
||||
/*
|
||||
Data Transformation Handlers Usage Examples
|
||||
|
||||
This file contains examples of how to configure and use the various data transformation handlers.
|
||||
All configurations are done through the dag.Operation.Payload.Data map - no handler-specific configurations.
|
||||
|
||||
1. FORMAT HANDLER
|
||||
=================
|
||||
Supports: string, number, date, currency, uppercase, lowercase, capitalize, trim
|
||||
|
||||
Example configuration:
|
||||
{
|
||||
"format_type": "uppercase",
|
||||
"fields": ["name", "title"],
|
||||
"currency": "$",
|
||||
"date_format": "2006-01-02"
|
||||
}
|
||||
|
||||
2. GROUP HANDLER
|
||||
================
|
||||
Groups data with aggregation functions
|
||||
|
||||
Example configuration:
|
||||
{
|
||||
"group_by": ["department", "status"],
|
||||
"aggregations": {
|
||||
"salary": "sum",
|
||||
"age": "avg",
|
||||
"count": "count",
|
||||
"name": "concat"
|
||||
},
|
||||
"concat_separator": ", "
|
||||
}
|
||||
|
||||
3. SPLIT/JOIN HANDLER
|
||||
====================
|
||||
Handles string operations
|
||||
|
||||
Split example:
|
||||
{
|
||||
"operation": "split",
|
||||
"fields": ["full_name"],
|
||||
"separator": " "
|
||||
}
|
||||
|
||||
Join example:
|
||||
{
|
||||
"operation": "join",
|
||||
"source_fields": ["first_name", "last_name"],
|
||||
"target_field": "full_name",
|
||||
"separator": " "
|
||||
}
|
||||
|
||||
4. FLATTEN HANDLER
|
||||
==================
|
||||
Flattens nested data structures
|
||||
|
||||
Flatten settings example (key-value pairs):
|
||||
{
|
||||
"operation": "flatten_settings",
|
||||
"source_field": "settings",
|
||||
"target_field": "config"
|
||||
}
|
||||
|
||||
Input: {"settings": [{"key": "theme", "value": "dark", "value_type": "string"}]}
|
||||
Output: {"config": {"theme": "dark"}}
|
||||
|
||||
5. JSON HANDLER
|
||||
===============
|
||||
JSON parsing and manipulation
|
||||
|
||||
Parse JSON string:
|
||||
{
|
||||
"operation": "parse",
|
||||
"fields": ["json_data"]
|
||||
}
|
||||
|
||||
Stringify object:
|
||||
{
|
||||
"operation": "stringify",
|
||||
"fields": ["object_data"],
|
||||
"indent": true
|
||||
}
|
||||
|
||||
6. FIELD HANDLER
|
||||
================
|
||||
Field manipulation operations
|
||||
|
||||
Filter fields:
|
||||
{
|
||||
"operation": "filter",
|
||||
"fields": ["name", "email", "age"]
|
||||
}
|
||||
|
||||
Rename fields:
|
||||
{
|
||||
"operation": "rename",
|
||||
"mapping": {
|
||||
"old_name": "new_name",
|
||||
"email_addr": "email"
|
||||
}
|
||||
}
|
||||
|
||||
Add fields:
|
||||
{
|
||||
"operation": "add",
|
||||
"new_fields": {
|
||||
"created_at": "2023-01-01",
|
||||
"status": "active"
|
||||
}
|
||||
}
|
||||
|
||||
Transform keys:
|
||||
{
|
||||
"operation": "transform_keys",
|
||||
"transformation": "snake_case" // or camel_case, kebab_case, etc.
|
||||
}
|
||||
|
||||
7. DATA HANDLER
|
||||
===============
|
||||
Miscellaneous data operations
|
||||
|
||||
Sort data:
|
||||
{
|
||||
"operation": "sort",
|
||||
"sort_field": "created_at",
|
||||
"sort_order": "desc"
|
||||
}
|
||||
|
||||
Deduplicate:
|
||||
{
|
||||
"operation": "deduplicate",
|
||||
"dedupe_fields": ["email", "phone"]
|
||||
}
|
||||
|
||||
Calculate fields:
|
||||
{
|
||||
"operation": "calculate",
|
||||
"calculations": {
|
||||
"total": {
|
||||
"operation": "sum",
|
||||
"fields": ["amount1", "amount2"]
|
||||
},
|
||||
"average_score": {
|
||||
"operation": "average",
|
||||
"fields": ["score1", "score2", "score3"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Type casting:
|
||||
{
|
||||
"operation": "type_cast",
|
||||
"cast": {
|
||||
"age": "int",
|
||||
"salary": "float",
|
||||
"active": "bool"
|
||||
}
|
||||
}
|
||||
|
||||
Validate fields:
|
||||
{
|
||||
"operation": "validate_fields",
|
||||
"validation_rules": {
|
||||
"email": {
|
||||
"required": true,
|
||||
"type": "string"
|
||||
},
|
||||
"age": {
|
||||
"required": true,
|
||||
"type": "int",
|
||||
"min": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
USAGE IN DAG:
|
||||
=============
|
||||
|
||||
import "github.com/oarkflow/mq/handlers"
|
||||
import "github.com/oarkflow/mq/dag"
|
||||
|
||||
// Create handler
|
||||
formatHandler := handlers.NewFormatHandler("format-1")
|
||||
|
||||
// Configure through Operation.Payload
|
||||
config := dag.Payload{
|
||||
Data: map[string]any{
|
||||
"format_type": "uppercase",
|
||||
"fields": []string{"name", "title"},
|
||||
},
|
||||
}
|
||||
formatHandler.SetConfig(config)
|
||||
|
||||
// Use in DAG
|
||||
dag := dag.NewDAG("data-processing")
|
||||
dag.AddNode(formatHandler)
|
||||
|
||||
CHAINING OPERATIONS:
|
||||
===================
|
||||
|
||||
You can chain multiple handlers in a DAG:
|
||||
1. Parse JSON → 2. Flatten → 3. Filter fields → 4. Format → 5. Group
|
||||
|
||||
Each handler receives the output of the previous handler as input.
|
||||
*/
|
344
handlers/field_handler.go
Normal file
344
handlers/field_handler.go
Normal file
@@ -0,0 +1,344 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// FieldHandler handles field manipulation operations (filter, add, remove, rename, etc.)
|
||||
type FieldHandler struct {
|
||||
dag.Operation
|
||||
}
|
||||
|
||||
func (h *FieldHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
operation, ok := h.Payload.Data["operation"].(string)
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("operation not specified")}
|
||||
}
|
||||
|
||||
var result map[string]any
|
||||
switch operation {
|
||||
case "filter", "select":
|
||||
result = h.filterFields(data)
|
||||
case "exclude", "remove":
|
||||
result = h.excludeFields(data)
|
||||
case "rename":
|
||||
result = h.renameFields(data)
|
||||
case "add", "set":
|
||||
result = h.addFields(data)
|
||||
case "copy":
|
||||
result = h.copyFields(data)
|
||||
case "merge":
|
||||
result = h.mergeFields(data)
|
||||
case "prefix":
|
||||
result = h.prefixFields(data)
|
||||
case "suffix":
|
||||
result = h.suffixFields(data)
|
||||
case "transform_keys":
|
||||
result = h.transformKeys(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)}
|
||||
}
|
||||
|
||||
resultPayload, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (h *FieldHandler) filterFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
allowedFields := h.getTargetFields()
|
||||
|
||||
if len(allowedFields) == 0 {
|
||||
return data // If no fields specified, return all
|
||||
}
|
||||
|
||||
for _, field := range allowedFields {
|
||||
if val, ok := data[field]; ok {
|
||||
result[field] = val
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) excludeFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
excludeFields := h.getTargetFields()
|
||||
|
||||
// Copy all fields except excluded ones
|
||||
for key, value := range data {
|
||||
if !contains(excludeFields, key) {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) renameFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
renameMap := h.getFieldMapping()
|
||||
|
||||
// Copy all fields, renaming as specified
|
||||
for key, value := range data {
|
||||
if newKey, ok := renameMap[key]; ok {
|
||||
result[newKey] = value
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) addFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
newFields := h.getNewFields()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
// Add new fields
|
||||
for key, value := range newFields {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) copyFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
copyMap := h.getFieldMapping()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
// Copy fields to new names
|
||||
for sourceKey, targetKey := range copyMap {
|
||||
if val, ok := data[sourceKey]; ok {
|
||||
result[targetKey] = val
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) mergeFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
mergeConfig := h.getMergeConfig()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
// Merge fields
|
||||
for targetField, config := range mergeConfig {
|
||||
sourceFields := config["fields"].([]string)
|
||||
separator := config["separator"].(string)
|
||||
|
||||
var values []string
|
||||
for _, field := range sourceFields {
|
||||
if val, ok := data[field]; ok && val != nil {
|
||||
values = append(values, fmt.Sprintf("%v", val))
|
||||
}
|
||||
}
|
||||
|
||||
if len(values) > 0 {
|
||||
result[targetField] = strings.Join(values, separator)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) prefixFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
prefix := h.getPrefix()
|
||||
targetFields := h.getTargetFields()
|
||||
|
||||
for key, value := range data {
|
||||
if len(targetFields) == 0 || contains(targetFields, key) {
|
||||
result[prefix+key] = value
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) suffixFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
suffix := h.getSuffix()
|
||||
targetFields := h.getTargetFields()
|
||||
|
||||
for key, value := range data {
|
||||
if len(targetFields) == 0 || contains(targetFields, key) {
|
||||
result[key+suffix] = value
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) transformKeys(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
transformation := h.getKeyTransformation()
|
||||
|
||||
for key, value := range data {
|
||||
newKey := h.applyKeyTransformation(key, transformation)
|
||||
result[newKey] = value
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) applyKeyTransformation(key string, transformation string) string {
|
||||
switch transformation {
|
||||
case "lowercase":
|
||||
return strings.ToLower(key)
|
||||
case "uppercase":
|
||||
return strings.ToUpper(key)
|
||||
case "snake_case":
|
||||
return h.toSnakeCase(key)
|
||||
case "camel_case":
|
||||
return h.toCamelCase(key)
|
||||
case "kebab_case":
|
||||
return h.toKebabCase(key)
|
||||
case "pascal_case":
|
||||
return h.toPascalCase(key)
|
||||
default:
|
||||
return key
|
||||
}
|
||||
}
|
||||
|
||||
func (h *FieldHandler) toSnakeCase(s string) string {
|
||||
result := strings.ReplaceAll(s, " ", "_")
|
||||
result = strings.ReplaceAll(result, "-", "_")
|
||||
return strings.ToLower(result)
|
||||
}
|
||||
|
||||
func (h *FieldHandler) toCamelCase(s string) string {
|
||||
parts := strings.FieldsFunc(s, func(c rune) bool {
|
||||
return c == ' ' || c == '_' || c == '-'
|
||||
})
|
||||
|
||||
if len(parts) == 0 {
|
||||
return s
|
||||
}
|
||||
|
||||
result := strings.ToLower(parts[0])
|
||||
for _, part := range parts[1:] {
|
||||
if len(part) > 0 {
|
||||
result += strings.ToUpper(part[:1]) + strings.ToLower(part[1:])
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) toKebabCase(s string) string {
|
||||
result := strings.ReplaceAll(s, " ", "-")
|
||||
result = strings.ReplaceAll(result, "_", "-")
|
||||
return strings.ToLower(result)
|
||||
}
|
||||
|
||||
func (h *FieldHandler) toPascalCase(s string) string {
|
||||
camel := h.toCamelCase(s)
|
||||
if len(camel) > 0 {
|
||||
return strings.ToUpper(camel[:1]) + camel[1:]
|
||||
}
|
||||
return camel
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getTargetFields() []string {
|
||||
if fields, ok := h.Payload.Data["fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getFieldMapping() map[string]string {
|
||||
result := make(map[string]string)
|
||||
if mapping, ok := h.Payload.Data["mapping"].(map[string]interface{}); ok {
|
||||
for key, value := range mapping {
|
||||
if str, ok := value.(string); ok {
|
||||
result[key] = str
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getNewFields() map[string]interface{} {
|
||||
if fields, ok := h.Payload.Data["new_fields"].(map[string]interface{}); ok {
|
||||
return fields
|
||||
}
|
||||
return make(map[string]interface{})
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getMergeConfig() map[string]map[string]interface{} {
|
||||
result := make(map[string]map[string]interface{})
|
||||
if config, ok := h.Payload.Data["merge_config"].(map[string]interface{}); ok {
|
||||
for key, value := range config {
|
||||
if configMap, ok := value.(map[string]interface{}); ok {
|
||||
result[key] = configMap
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getPrefix() string {
|
||||
if prefix, ok := h.Payload.Data["prefix"].(string); ok {
|
||||
return prefix
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getSuffix() string {
|
||||
if suffix, ok := h.Payload.Data["suffix"].(string); ok {
|
||||
return suffix
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *FieldHandler) getKeyTransformation() string {
|
||||
if transform, ok := h.Payload.Data["transformation"].(string); ok {
|
||||
return transform
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func NewFieldHandler(id string) *FieldHandler {
|
||||
return &FieldHandler{
|
||||
Operation: dag.Operation{ID: id, Key: "field", Type: dag.Function, Tags: []string{"data", "transformation", "field"}},
|
||||
}
|
||||
}
|
@@ -1,501 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// FieldManipulationHandler handles various field operations on data
|
||||
type FieldManipulationHandler struct {
|
||||
dag.Operation
|
||||
Operations []FieldOperation `json:"operations"` // list of field operations to perform
|
||||
}
|
||||
|
||||
type FieldOperation struct {
|
||||
Type string `json:"type"` // "filter", "add", "remove", "rename", "copy", "transform"
|
||||
Config FieldOperationConfig `json:"config"` // operation-specific configuration
|
||||
}
|
||||
|
||||
type FieldOperationConfig struct {
|
||||
// Common fields
|
||||
Fields []string `json:"fields"` // fields to operate on
|
||||
Pattern string `json:"pattern"` // regex pattern for field matching
|
||||
CaseSensitive bool `json:"case_sensitive"` // case sensitive pattern matching
|
||||
|
||||
// Filter operation
|
||||
IncludeOnly []string `json:"include_only"` // only include these fields
|
||||
Exclude []string `json:"exclude"` // exclude these fields
|
||||
KeepNulls bool `json:"keep_nulls"` // keep fields with null values
|
||||
KeepEmpty bool `json:"keep_empty"` // keep fields with empty values
|
||||
|
||||
// Add operation
|
||||
NewFields map[string]any `json:"new_fields"` // fields to add with their values
|
||||
DefaultValue any `json:"default_value"` // default value for new fields
|
||||
|
||||
// Rename operation
|
||||
FieldMapping map[string]string `json:"field_mapping"` // old field name -> new field name
|
||||
|
||||
// Copy operation
|
||||
CopyMapping map[string]string `json:"copy_mapping"` // source field -> target field
|
||||
OverwriteCopy bool `json:"overwrite_copy"` // overwrite target if exists
|
||||
|
||||
// Transform operation
|
||||
Transformation string `json:"transformation"` // transformation type
|
||||
TransformConfig map[string]any `json:"transform_config"` // transformation configuration
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Apply operations in sequence
|
||||
for i, operation := range f.Operations {
|
||||
var err error
|
||||
|
||||
switch operation.Type {
|
||||
case "filter":
|
||||
data, err = f.filterFields(data, operation.Config)
|
||||
case "add":
|
||||
data, err = f.addFields(data, operation.Config)
|
||||
case "remove":
|
||||
data, err = f.removeFields(data, operation.Config)
|
||||
case "rename":
|
||||
data, err = f.renameFields(data, operation.Config)
|
||||
case "copy":
|
||||
data, err = f.copyFields(data, operation.Config)
|
||||
case "transform":
|
||||
data, err = f.transformFields(data, operation.Config)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation type: %s", operation.Type), Ctx: ctx}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("operation %d (%s) failed: %v", i+1, operation.Type, err), Ctx: ctx}
|
||||
}
|
||||
}
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) filterFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
|
||||
// If include_only is specified, only include those fields
|
||||
if len(config.IncludeOnly) > 0 {
|
||||
for _, field := range config.IncludeOnly {
|
||||
if value, exists := data[field]; exists {
|
||||
if f.shouldKeepValue(value, config) {
|
||||
result[field] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Otherwise, include all except excluded fields
|
||||
excludeSet := make(map[string]bool)
|
||||
for _, field := range config.Exclude {
|
||||
excludeSet[field] = true
|
||||
}
|
||||
|
||||
// Compile regex pattern if provided
|
||||
var pattern *regexp.Regexp
|
||||
if config.Pattern != "" {
|
||||
flags := ""
|
||||
if !config.CaseSensitive {
|
||||
flags = "(?i)"
|
||||
}
|
||||
var err error
|
||||
pattern, err = regexp.Compile(flags + config.Pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex pattern: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
for field, value := range data {
|
||||
// Check if field should be excluded
|
||||
if excludeSet[field] {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check pattern matching
|
||||
if pattern != nil && !pattern.MatchString(field) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check value conditions
|
||||
if f.shouldKeepValue(value, config) {
|
||||
result[field] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) shouldKeepValue(value any, config FieldOperationConfig) bool {
|
||||
if value == nil {
|
||||
return config.KeepNulls
|
||||
}
|
||||
|
||||
// Check for empty values
|
||||
if f.isEmpty(value) {
|
||||
return config.KeepEmpty
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) isEmpty(value any) bool {
|
||||
if value == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
rv := reflect.ValueOf(value)
|
||||
switch rv.Kind() {
|
||||
case reflect.String:
|
||||
return rv.String() == ""
|
||||
case reflect.Slice, reflect.Array, reflect.Map:
|
||||
return rv.Len() == 0
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) addFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Copy existing data
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Add new fields from new_fields map
|
||||
for field, value := range config.NewFields {
|
||||
result[field] = value
|
||||
}
|
||||
|
||||
// Add fields from fields list with default value
|
||||
for _, field := range config.Fields {
|
||||
if _, exists := result[field]; !exists {
|
||||
result[field] = config.DefaultValue
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) removeFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Create set of fields to remove
|
||||
removeSet := make(map[string]bool)
|
||||
for _, field := range config.Fields {
|
||||
removeSet[field] = true
|
||||
}
|
||||
|
||||
// Compile regex pattern if provided
|
||||
var pattern *regexp.Regexp
|
||||
if config.Pattern != "" {
|
||||
flags := ""
|
||||
if !config.CaseSensitive {
|
||||
flags = "(?i)"
|
||||
}
|
||||
var err error
|
||||
pattern, err = regexp.Compile(flags + config.Pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex pattern: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Copy fields that should not be removed
|
||||
for field, value := range data {
|
||||
shouldRemove := removeSet[field]
|
||||
|
||||
// Check pattern matching
|
||||
if !shouldRemove && pattern != nil {
|
||||
shouldRemove = pattern.MatchString(field)
|
||||
}
|
||||
|
||||
if !shouldRemove {
|
||||
result[field] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) renameFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Copy and rename fields
|
||||
for field, value := range data {
|
||||
newName := field
|
||||
if mappedName, exists := config.FieldMapping[field]; exists {
|
||||
newName = mappedName
|
||||
}
|
||||
result[newName] = value
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) copyFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Copy existing data
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Copy fields based on mapping
|
||||
for sourceField, targetField := range config.CopyMapping {
|
||||
if value, exists := data[sourceField]; exists {
|
||||
// Check if target already exists and overwrite is not allowed
|
||||
if _, targetExists := result[targetField]; targetExists && !config.OverwriteCopy {
|
||||
continue
|
||||
}
|
||||
result[targetField] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) transformFields(data map[string]any, config FieldOperationConfig) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
|
||||
// Copy existing data
|
||||
for k, v := range data {
|
||||
result[k] = v
|
||||
}
|
||||
|
||||
// Apply transformations to specified fields
|
||||
for _, field := range config.Fields {
|
||||
if value, exists := result[field]; exists {
|
||||
transformedValue, err := f.applyTransformation(value, config.Transformation, config.TransformConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("transformation failed for field '%s': %v", field, err)
|
||||
}
|
||||
result[field] = transformedValue
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) applyTransformation(value any, transformationType string, config map[string]any) (any, error) {
|
||||
switch transformationType {
|
||||
case "uppercase":
|
||||
return strings.ToUpper(fmt.Sprintf("%v", value)), nil
|
||||
|
||||
case "lowercase":
|
||||
return strings.ToLower(fmt.Sprintf("%v", value)), nil
|
||||
|
||||
case "title":
|
||||
return strings.Title(fmt.Sprintf("%v", value)), nil
|
||||
|
||||
case "trim":
|
||||
return strings.TrimSpace(fmt.Sprintf("%v", value)), nil
|
||||
|
||||
case "prefix":
|
||||
prefix, _ := config["prefix"].(string)
|
||||
return prefix + fmt.Sprintf("%v", value), nil
|
||||
|
||||
case "suffix":
|
||||
suffix, _ := config["suffix"].(string)
|
||||
return fmt.Sprintf("%v", value) + suffix, nil
|
||||
|
||||
case "replace":
|
||||
old, _ := config["old"].(string)
|
||||
new, _ := config["new"].(string)
|
||||
return strings.ReplaceAll(fmt.Sprintf("%v", value), old, new), nil
|
||||
|
||||
case "regex_replace":
|
||||
pattern, _ := config["pattern"].(string)
|
||||
replacement, _ := config["replacement"].(string)
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex pattern: %v", err)
|
||||
}
|
||||
return re.ReplaceAllString(fmt.Sprintf("%v", value), replacement), nil
|
||||
|
||||
case "multiply":
|
||||
if multiplier, ok := config["multiplier"].(float64); ok {
|
||||
if num := f.toFloat64(value); num != 0 {
|
||||
return num * multiplier, nil
|
||||
}
|
||||
}
|
||||
return value, nil
|
||||
|
||||
case "add":
|
||||
if addend, ok := config["addend"].(float64); ok {
|
||||
if num := f.toFloat64(value); num != 0 {
|
||||
return num + addend, nil
|
||||
}
|
||||
}
|
||||
return value, nil
|
||||
|
||||
case "absolute":
|
||||
if num := f.toFloat64(value); num != 0 {
|
||||
if num < 0 {
|
||||
return -num, nil
|
||||
}
|
||||
return num, nil
|
||||
}
|
||||
return value, nil
|
||||
|
||||
case "default_if_empty":
|
||||
defaultVal := config["default"]
|
||||
if f.isEmpty(value) {
|
||||
return defaultVal, nil
|
||||
}
|
||||
return value, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported transformation type: %s", transformationType)
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FieldManipulationHandler) toFloat64(value any) float64 {
|
||||
switch v := value.(type) {
|
||||
case int:
|
||||
return float64(v)
|
||||
case int32:
|
||||
return float64(v)
|
||||
case int64:
|
||||
return float64(v)
|
||||
case float32:
|
||||
return float64(v)
|
||||
case float64:
|
||||
return v
|
||||
case string:
|
||||
var result float64
|
||||
if n, err := fmt.Sscanf(v, "%f", &result); err == nil && n == 1 {
|
||||
return result
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Factory functions for common operations
|
||||
func NewFieldFilter(id string, includeOnly, exclude []string, options FieldOperationConfig) *FieldManipulationHandler {
|
||||
options.IncludeOnly = includeOnly
|
||||
options.Exclude = exclude
|
||||
|
||||
return &FieldManipulationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "filter_fields",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "fields", "filter"},
|
||||
},
|
||||
Operations: []FieldOperation{
|
||||
{
|
||||
Type: "filter",
|
||||
Config: options,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewFieldAdder(id string, newFields map[string]any, defaultValue any) *FieldManipulationHandler {
|
||||
return &FieldManipulationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "add_fields",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "fields", "add"},
|
||||
},
|
||||
Operations: []FieldOperation{
|
||||
{
|
||||
Type: "add",
|
||||
Config: FieldOperationConfig{
|
||||
NewFields: newFields,
|
||||
DefaultValue: defaultValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewFieldRemover(id string, fieldsToRemove []string, pattern string) *FieldManipulationHandler {
|
||||
return &FieldManipulationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "remove_fields",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "fields", "remove"},
|
||||
},
|
||||
Operations: []FieldOperation{
|
||||
{
|
||||
Type: "remove",
|
||||
Config: FieldOperationConfig{
|
||||
Fields: fieldsToRemove,
|
||||
Pattern: pattern,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewFieldRenamer(id string, fieldMapping map[string]string) *FieldManipulationHandler {
|
||||
return &FieldManipulationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "rename_fields",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "fields", "rename"},
|
||||
},
|
||||
Operations: []FieldOperation{
|
||||
{
|
||||
Type: "rename",
|
||||
Config: FieldOperationConfig{
|
||||
FieldMapping: fieldMapping,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewFieldTransformer(id string, fields []string, transformation string, transformConfig map[string]any) *FieldManipulationHandler {
|
||||
return &FieldManipulationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "transform_fields",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "fields", "transform"},
|
||||
},
|
||||
Operations: []FieldOperation{
|
||||
{
|
||||
Type: "transform",
|
||||
Config: FieldOperationConfig{
|
||||
Fields: fields,
|
||||
Transformation: transformation,
|
||||
TransformConfig: transformConfig,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewAdvancedFieldManipulator(id string, operations []FieldOperation) *FieldManipulationHandler {
|
||||
return &FieldManipulationHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "advanced_field_manipulation",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "fields", "advanced"},
|
||||
},
|
||||
Operations: operations,
|
||||
}
|
||||
}
|
@@ -3,385 +3,265 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// FlattenHandler flattens array of objects to a single object or performs other flattening operations
|
||||
// FlattenHandler handles flattening array of objects to single objects
|
||||
type FlattenHandler struct {
|
||||
dag.Operation
|
||||
FlattenType string `json:"flatten_type"` // "array_to_object", "nested_object", "key_value_pairs"
|
||||
SourceField string `json:"source_field"` // field containing data to flatten
|
||||
TargetField string `json:"target_field"` // field to store flattened result
|
||||
Config FlattenConfiguration `json:"config"` // configuration for flattening
|
||||
}
|
||||
|
||||
type FlattenConfiguration struct {
|
||||
// For array_to_object flattening
|
||||
KeyField string `json:"key_field"` // field to use as key
|
||||
ValueField string `json:"value_field"` // field to use as value
|
||||
TypeField string `json:"type_field"` // optional field for value type conversion
|
||||
|
||||
// For nested_object flattening
|
||||
Separator string `json:"separator"` // separator for nested keys (default: ".")
|
||||
MaxDepth int `json:"max_depth"` // maximum depth to flatten (-1 for unlimited)
|
||||
Prefix string `json:"prefix"` // prefix for flattened keys
|
||||
SkipArrays bool `json:"skip_arrays"` // skip array flattening
|
||||
SkipObjects bool `json:"skip_objects"` // skip object flattening
|
||||
|
||||
// For key_value_pairs flattening
|
||||
PairSeparator string `json:"pair_separator"` // separator between key-value pairs
|
||||
KVSeparator string `json:"kv_separator"` // separator between key and value
|
||||
|
||||
// General options
|
||||
OverwriteExisting bool `json:"overwrite_existing"` // overwrite existing keys
|
||||
PreserveTypes bool `json:"preserve_types"` // preserve original data types
|
||||
}
|
||||
|
||||
func (f *FlattenHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
func (h *FlattenHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
// Get source data
|
||||
sourceData, exists := data[f.SourceField]
|
||||
if !exists {
|
||||
return mq.Result{Error: fmt.Errorf("source field '%s' not found", f.SourceField), Ctx: ctx}
|
||||
operation, ok := h.Payload.Data["operation"].(string)
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("operation not specified")}
|
||||
}
|
||||
|
||||
var result any
|
||||
var err error
|
||||
|
||||
switch f.FlattenType {
|
||||
case "array_to_object":
|
||||
result, err = f.flattenArrayToObject(sourceData)
|
||||
case "nested_object":
|
||||
result, err = f.flattenNestedObject(sourceData)
|
||||
case "key_value_pairs":
|
||||
result, err = f.flattenKeyValuePairs(sourceData)
|
||||
var result map[string]any
|
||||
switch operation {
|
||||
case "flatten_settings":
|
||||
result = h.flattenSettings(data)
|
||||
case "flatten_key_value":
|
||||
result = h.flattenKeyValue(data)
|
||||
case "flatten_nested_objects":
|
||||
result = h.flattenNestedObjects(data)
|
||||
case "flatten_array":
|
||||
result = h.flattenArray(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported flatten type: %s", f.FlattenType), Ctx: ctx}
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)}
|
||||
}
|
||||
|
||||
resultPayload, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return mq.Result{Error: err, Ctx: ctx}
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
// Set target field
|
||||
targetField := f.TargetField
|
||||
if targetField == "" {
|
||||
targetField = f.SourceField // overwrite source if no target specified
|
||||
}
|
||||
data[targetField] = result
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (f *FlattenHandler) flattenArrayToObject(data any) (map[string]any, error) {
|
||||
// Convert to slice of maps
|
||||
items, err := f.convertToSliceOfMaps(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// flattenSettings converts array of settings objects with key, value, value_type to a flat object
|
||||
func (h *FlattenHandler) flattenSettings(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
sourceField := h.getSourceField()
|
||||
|
||||
for _, item := range items {
|
||||
key, keyExists := item[f.Config.KeyField]
|
||||
if !keyExists {
|
||||
continue
|
||||
}
|
||||
|
||||
value, valueExists := item[f.Config.ValueField]
|
||||
if !valueExists {
|
||||
continue
|
||||
}
|
||||
|
||||
keyStr := fmt.Sprintf("%v", key)
|
||||
|
||||
// Handle type conversion if type field is specified
|
||||
if f.Config.TypeField != "" {
|
||||
if typeValue, typeExists := item[f.Config.TypeField]; typeExists {
|
||||
convertedValue, err := f.convertValueByType(value, fmt.Sprintf("%v", typeValue))
|
||||
if err == nil {
|
||||
value = convertedValue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for overwrites
|
||||
if !f.Config.OverwriteExisting {
|
||||
if _, exists := result[keyStr]; exists {
|
||||
continue // skip if key already exists
|
||||
}
|
||||
}
|
||||
|
||||
result[keyStr] = value
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
if settingsArray, ok := data[sourceField].([]interface{}); ok {
|
||||
flattened := make(map[string]any)
|
||||
|
||||
func (f *FlattenHandler) flattenNestedObject(data any) (map[string]any, error) {
|
||||
result := make(map[string]any)
|
||||
f.flattenRecursive(data, "", result, 0)
|
||||
return result, nil
|
||||
}
|
||||
for _, item := range settingsArray {
|
||||
if setting, ok := item.(map[string]interface{}); ok {
|
||||
key, keyExists := setting["key"].(string)
|
||||
value, valueExists := setting["value"]
|
||||
valueType, typeExists := setting["value_type"].(string)
|
||||
|
||||
func (f *FlattenHandler) flattenRecursive(data any, prefix string, result map[string]any, depth int) {
|
||||
// Check depth limit
|
||||
if f.Config.MaxDepth > 0 && depth >= f.Config.MaxDepth {
|
||||
key := prefix
|
||||
if key == "" {
|
||||
key = "root"
|
||||
}
|
||||
result[key] = data
|
||||
return
|
||||
}
|
||||
|
||||
rv := reflect.ValueOf(data)
|
||||
if !rv.IsValid() {
|
||||
return
|
||||
}
|
||||
|
||||
switch rv.Kind() {
|
||||
case reflect.Map:
|
||||
if f.Config.SkipObjects {
|
||||
result[prefix] = data
|
||||
return
|
||||
}
|
||||
|
||||
if rv.Type().Key().Kind() == reflect.String {
|
||||
for _, key := range rv.MapKeys() {
|
||||
keyStr := key.String()
|
||||
value := rv.MapIndex(key).Interface()
|
||||
|
||||
newPrefix := keyStr
|
||||
if prefix != "" {
|
||||
separator := f.Config.Separator
|
||||
if separator == "" {
|
||||
separator = "."
|
||||
if keyExists && valueExists {
|
||||
// Convert value based on value_type
|
||||
if typeExists {
|
||||
flattened[key] = h.convertValue(value, valueType)
|
||||
} else {
|
||||
flattened[key] = value
|
||||
}
|
||||
newPrefix = prefix + separator + keyStr
|
||||
}
|
||||
if f.Config.Prefix != "" {
|
||||
newPrefix = f.Config.Prefix + newPrefix
|
||||
}
|
||||
|
||||
f.flattenRecursive(value, newPrefix, result, depth+1)
|
||||
}
|
||||
} else {
|
||||
result[prefix] = data
|
||||
}
|
||||
|
||||
case reflect.Slice, reflect.Array:
|
||||
if f.Config.SkipArrays {
|
||||
result[prefix] = data
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
value := rv.Index(i).Interface()
|
||||
newPrefix := fmt.Sprintf("%s[%d]", prefix, i)
|
||||
f.flattenRecursive(value, newPrefix, result, depth+1)
|
||||
}
|
||||
|
||||
default:
|
||||
if prefix == "" {
|
||||
prefix = "value"
|
||||
}
|
||||
result[prefix] = data
|
||||
targetField := h.getTargetField()
|
||||
result[targetField] = flattened
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (f *FlattenHandler) flattenKeyValuePairs(data any) (map[string]any, error) {
|
||||
str := fmt.Sprintf("%v", data)
|
||||
// flattenKeyValue converts array of key-value objects to a flat object
|
||||
func (h *FlattenHandler) flattenKeyValue(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
sourceField := h.getSourceField()
|
||||
keyField := h.getKeyField()
|
||||
valueField := h.getValueField()
|
||||
|
||||
pairSeparator := f.Config.PairSeparator
|
||||
if pairSeparator == "" {
|
||||
pairSeparator = ","
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
kvSeparator := f.Config.KVSeparator
|
||||
if kvSeparator == "" {
|
||||
kvSeparator = "="
|
||||
}
|
||||
if kvArray, ok := data[sourceField].([]interface{}); ok {
|
||||
flattened := make(map[string]any)
|
||||
|
||||
pairs := strings.Split(str, pairSeparator)
|
||||
for _, pair := range pairs {
|
||||
pair = strings.TrimSpace(pair)
|
||||
if pair == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
kv := strings.SplitN(pair, kvSeparator, 2)
|
||||
if len(kv) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
key := strings.TrimSpace(kv[0])
|
||||
value := strings.TrimSpace(kv[1])
|
||||
|
||||
// Check for overwrites
|
||||
if !f.Config.OverwriteExisting {
|
||||
if _, exists := result[key]; exists {
|
||||
continue
|
||||
for _, item := range kvArray {
|
||||
if kvPair, ok := item.(map[string]interface{}); ok {
|
||||
if key, keyExists := kvPair[keyField]; keyExists {
|
||||
if value, valueExists := kvPair[valueField]; valueExists {
|
||||
if keyStr, ok := key.(string); ok {
|
||||
flattened[keyStr] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to preserve types if requested
|
||||
if f.Config.PreserveTypes {
|
||||
if convertedValue := f.tryConvertType(value); convertedValue != nil {
|
||||
result[key] = convertedValue
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
targetField := h.getTargetField()
|
||||
result[targetField] = flattened
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// flattenNestedObjects flattens nested objects using dot notation
|
||||
func (h *FlattenHandler) flattenNestedObjects(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
separator := h.getSeparator()
|
||||
|
||||
h.flattenRecursive(data, "", result, separator)
|
||||
return result
|
||||
}
|
||||
|
||||
// flattenArray flattens arrays by creating numbered fields
|
||||
func (h *FlattenHandler) flattenArray(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
sourceField := h.getSourceField()
|
||||
|
||||
// Copy all original data except the source field
|
||||
for key, value := range data {
|
||||
if key != sourceField {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (f *FlattenHandler) convertToSliceOfMaps(data any) ([]map[string]any, error) {
|
||||
rv := reflect.ValueOf(data)
|
||||
if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array {
|
||||
return nil, fmt.Errorf("data must be an array or slice")
|
||||
if array, ok := data[sourceField].([]interface{}); ok {
|
||||
for i, item := range array {
|
||||
if obj, ok := item.(map[string]interface{}); ok {
|
||||
for key, value := range obj {
|
||||
result[fmt.Sprintf("%s_%d_%s", sourceField, i, key)] = value
|
||||
}
|
||||
} else {
|
||||
result[fmt.Sprintf("%s_%d", sourceField, i)] = item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var items []map[string]any
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
item := rv.Index(i).Interface()
|
||||
|
||||
// Convert item to map[string]any
|
||||
itemMap := make(map[string]any)
|
||||
itemBytes, err := json.Marshal(item)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal item at index %d: %v", i, err)
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(itemBytes, &itemMap); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal item at index %d: %v", i, err)
|
||||
}
|
||||
|
||||
items = append(items, itemMap)
|
||||
}
|
||||
|
||||
return items, nil
|
||||
return result
|
||||
}
|
||||
|
||||
func (f *FlattenHandler) convertValueByType(value any, typeStr string) (any, error) {
|
||||
valueStr := fmt.Sprintf("%v", value)
|
||||
func (h *FlattenHandler) flattenRecursive(obj map[string]any, prefix string, result map[string]any, separator string) {
|
||||
for key, value := range obj {
|
||||
newKey := key
|
||||
if prefix != "" {
|
||||
newKey = prefix + separator + key
|
||||
}
|
||||
|
||||
switch strings.ToLower(typeStr) {
|
||||
case "string", "str":
|
||||
return valueStr, nil
|
||||
switch v := value.(type) {
|
||||
case map[string]interface{}:
|
||||
nestedMap := make(map[string]any)
|
||||
for k, val := range v {
|
||||
nestedMap[k] = val
|
||||
}
|
||||
h.flattenRecursive(nestedMap, newKey, result, separator)
|
||||
case []interface{}:
|
||||
// For arrays, create numbered fields
|
||||
for i, item := range v {
|
||||
itemKey := fmt.Sprintf("%s%s%d", newKey, separator, i)
|
||||
if itemMap, ok := item.(map[string]interface{}); ok {
|
||||
nestedMap := make(map[string]any)
|
||||
for k, val := range itemMap {
|
||||
nestedMap[k] = val
|
||||
}
|
||||
h.flattenRecursive(nestedMap, itemKey, result, separator)
|
||||
} else {
|
||||
result[itemKey] = item
|
||||
}
|
||||
}
|
||||
default:
|
||||
result[newKey] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *FlattenHandler) convertValue(value interface{}, valueType string) interface{} {
|
||||
switch valueType {
|
||||
case "string":
|
||||
return fmt.Sprintf("%v", value)
|
||||
case "int", "integer":
|
||||
if i, err := fmt.Sscanf(valueStr, "%d", new(int)); err == nil && i == 1 {
|
||||
var result int
|
||||
fmt.Sscanf(valueStr, "%d", &result)
|
||||
return result, nil
|
||||
if str, ok := value.(string); ok {
|
||||
var intVal int
|
||||
fmt.Sscanf(str, "%d", &intVal)
|
||||
return intVal
|
||||
}
|
||||
case "float", "double", "number":
|
||||
if i, err := fmt.Sscanf(valueStr, "%f", new(float64)); err == nil && i == 1 {
|
||||
var result float64
|
||||
fmt.Sscanf(valueStr, "%f", &result)
|
||||
return result, nil
|
||||
return value
|
||||
case "float", "number":
|
||||
if str, ok := value.(string); ok {
|
||||
var floatVal float64
|
||||
fmt.Sscanf(str, "%f", &floatVal)
|
||||
return floatVal
|
||||
}
|
||||
return value
|
||||
case "bool", "boolean":
|
||||
lower := strings.ToLower(valueStr)
|
||||
return lower == "true" || lower == "yes" || lower == "1" || lower == "on", nil
|
||||
case "json":
|
||||
var result any
|
||||
if err := json.Unmarshal([]byte(valueStr), &result); err == nil {
|
||||
return result, nil
|
||||
if str, ok := value.(string); ok {
|
||||
return str == "true" || str == "1"
|
||||
}
|
||||
return value
|
||||
case "json":
|
||||
if str, ok := value.(string); ok {
|
||||
var jsonVal interface{}
|
||||
if err := json.Unmarshal([]byte(str), &jsonVal); err == nil {
|
||||
return jsonVal
|
||||
}
|
||||
}
|
||||
return value
|
||||
default:
|
||||
return value
|
||||
}
|
||||
|
||||
return value, fmt.Errorf("unable to convert to type %s", typeStr)
|
||||
}
|
||||
|
||||
func (f *FlattenHandler) tryConvertType(value string) any {
|
||||
// Try int
|
||||
var intVal int
|
||||
if n, err := fmt.Sscanf(value, "%d", &intVal); err == nil && n == 1 {
|
||||
return intVal
|
||||
func (h *FlattenHandler) getSourceField() string {
|
||||
if field, ok := h.Payload.Data["source_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
|
||||
// Try float
|
||||
var floatVal float64
|
||||
if n, err := fmt.Sscanf(value, "%f", &floatVal); err == nil && n == 1 {
|
||||
return floatVal
|
||||
}
|
||||
|
||||
// Try bool
|
||||
lower := strings.ToLower(value)
|
||||
if lower == "true" || lower == "false" {
|
||||
return lower == "true"
|
||||
}
|
||||
|
||||
// Try JSON
|
||||
var jsonVal any
|
||||
if err := json.Unmarshal([]byte(value), &jsonVal); err == nil {
|
||||
return jsonVal
|
||||
}
|
||||
|
||||
return nil // Unable to convert, return nil to use original string
|
||||
return "settings" // Default
|
||||
}
|
||||
|
||||
// Factory functions
|
||||
func NewArrayToObjectFlattener(id, sourceField, targetField, keyField, valueField string, config FlattenConfiguration) *FlattenHandler {
|
||||
config.KeyField = keyField
|
||||
config.ValueField = valueField
|
||||
func (h *FlattenHandler) getTargetField() string {
|
||||
if field, ok := h.Payload.Data["target_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return "flattened" // Default
|
||||
}
|
||||
|
||||
func (h *FlattenHandler) getKeyField() string {
|
||||
if field, ok := h.Payload.Data["key_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return "key" // Default
|
||||
}
|
||||
|
||||
func (h *FlattenHandler) getValueField() string {
|
||||
if field, ok := h.Payload.Data["value_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return "value" // Default
|
||||
}
|
||||
|
||||
func (h *FlattenHandler) getSeparator() string {
|
||||
if sep, ok := h.Payload.Data["separator"].(string); ok {
|
||||
return sep
|
||||
}
|
||||
return "." // Default separator for flattening
|
||||
}
|
||||
|
||||
func NewFlattenHandler(id string) *FlattenHandler {
|
||||
return &FlattenHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "flatten_array_to_object",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "flatten", "array", "object"},
|
||||
},
|
||||
FlattenType: "array_to_object",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Config: config,
|
||||
}
|
||||
}
|
||||
|
||||
func NewNestedObjectFlattener(id, sourceField, targetField string, config FlattenConfiguration) *FlattenHandler {
|
||||
return &FlattenHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "flatten_nested_object",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "flatten", "nested", "object"},
|
||||
},
|
||||
FlattenType: "nested_object",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Config: config,
|
||||
}
|
||||
}
|
||||
|
||||
func NewKeyValuePairsFlattener(id, sourceField, targetField string, config FlattenConfiguration) *FlattenHandler {
|
||||
return &FlattenHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "flatten_key_value_pairs",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "flatten", "key-value", "string"},
|
||||
},
|
||||
FlattenType: "key_value_pairs",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Config: config,
|
||||
Operation: dag.Operation{ID: id, Key: "flatten", Type: dag.Function, Tags: []string{"data", "transformation", "flatten"}},
|
||||
}
|
||||
}
|
||||
|
@@ -3,7 +3,6 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -13,302 +12,249 @@ import (
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// FormatHandler handles various data formatting operations
|
||||
// FormatHandler handles data formatting operations
|
||||
type FormatHandler struct {
|
||||
dag.Operation
|
||||
FormatType string `json:"format_type"` // date, number, string, currency, etc.
|
||||
SourceField string `json:"source_field"` // field to format
|
||||
TargetField string `json:"target_field"` // field to store formatted result
|
||||
FormatConfig map[string]string `json:"format_config"` // format-specific configuration
|
||||
}
|
||||
|
||||
func (f *FormatHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
func (h *FormatHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
// Get source value
|
||||
sourceValue, exists := data[f.SourceField]
|
||||
if !exists {
|
||||
return mq.Result{Error: fmt.Errorf("source field '%s' not found", f.SourceField), Ctx: ctx}
|
||||
formatType, ok := h.Payload.Data["format_type"].(string)
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("format_type not specified")}
|
||||
}
|
||||
|
||||
// Format based on type
|
||||
var formattedValue any
|
||||
var err error
|
||||
|
||||
switch f.FormatType {
|
||||
case "date":
|
||||
formattedValue, err = f.formatDate(sourceValue)
|
||||
case "number":
|
||||
formattedValue, err = f.formatNumber(sourceValue)
|
||||
case "currency":
|
||||
formattedValue, err = f.formatCurrency(sourceValue)
|
||||
var result map[string]any
|
||||
switch formatType {
|
||||
case "string":
|
||||
formattedValue, err = f.formatString(sourceValue)
|
||||
case "boolean":
|
||||
formattedValue, err = f.formatBoolean(sourceValue)
|
||||
case "array":
|
||||
formattedValue, err = f.formatArray(sourceValue)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported format type: %s", f.FormatType), Ctx: ctx}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return mq.Result{Error: err, Ctx: ctx}
|
||||
}
|
||||
|
||||
// Set target field
|
||||
targetField := f.TargetField
|
||||
if targetField == "" {
|
||||
targetField = f.SourceField // overwrite source if no target specified
|
||||
}
|
||||
data[targetField] = formattedValue
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (f *FormatHandler) formatDate(value any) (string, error) {
|
||||
var t time.Time
|
||||
var err error
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
// Try parsing various date formats
|
||||
formats := []string{
|
||||
time.RFC3339,
|
||||
"2006-01-02 15:04:05",
|
||||
"2006-01-02",
|
||||
"01/02/2006",
|
||||
"02-01-2006",
|
||||
"2006/01/02",
|
||||
}
|
||||
|
||||
for _, format := range formats {
|
||||
if t, err = time.Parse(format, v); err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to parse date string: %s", v)
|
||||
}
|
||||
case time.Time:
|
||||
t = v
|
||||
case int64:
|
||||
t = time.Unix(v, 0)
|
||||
case float64:
|
||||
t = time.Unix(int64(v), 0)
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported date type: %T", value)
|
||||
}
|
||||
|
||||
// Get output format from config
|
||||
outputFormat := f.FormatConfig["output_format"]
|
||||
if outputFormat == "" {
|
||||
outputFormat = "2006-01-02 15:04:05" // default format
|
||||
}
|
||||
|
||||
return t.Format(outputFormat), nil
|
||||
}
|
||||
|
||||
func (f *FormatHandler) formatNumber(value any) (string, error) {
|
||||
var num float64
|
||||
var err error
|
||||
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
num, err = strconv.ParseFloat(v, 64)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to parse number string: %s", v)
|
||||
}
|
||||
case int:
|
||||
num = float64(v)
|
||||
case int32:
|
||||
num = float64(v)
|
||||
case int64:
|
||||
num = float64(v)
|
||||
case float32:
|
||||
num = float64(v)
|
||||
case float64:
|
||||
num = v
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported number type: %T", value)
|
||||
}
|
||||
|
||||
// Get precision from config
|
||||
precision := 2
|
||||
if p, exists := f.FormatConfig["precision"]; exists {
|
||||
if parsed, err := strconv.Atoi(p); err == nil {
|
||||
precision = parsed
|
||||
}
|
||||
}
|
||||
|
||||
// Get format style
|
||||
style := f.FormatConfig["style"]
|
||||
switch style {
|
||||
case "scientific":
|
||||
return fmt.Sprintf("%e", num), nil
|
||||
case "percentage":
|
||||
return fmt.Sprintf("%."+strconv.Itoa(precision)+"f%%", num*100), nil
|
||||
default:
|
||||
return fmt.Sprintf("%."+strconv.Itoa(precision)+"f", num), nil
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FormatHandler) formatCurrency(value any) (string, error) {
|
||||
num, err := f.formatNumber(value)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
symbol := f.FormatConfig["symbol"]
|
||||
if symbol == "" {
|
||||
symbol = "$" // default currency symbol
|
||||
}
|
||||
|
||||
position := f.FormatConfig["position"]
|
||||
if position == "suffix" {
|
||||
return num + " " + symbol, nil
|
||||
}
|
||||
return symbol + num, nil
|
||||
}
|
||||
|
||||
func (f *FormatHandler) formatString(value any) (string, error) {
|
||||
str := fmt.Sprintf("%v", value)
|
||||
|
||||
operation := f.FormatConfig["operation"]
|
||||
switch operation {
|
||||
result = h.formatToString(data)
|
||||
case "number":
|
||||
result = h.formatToNumber(data)
|
||||
case "date":
|
||||
result = h.formatDate(data)
|
||||
case "currency":
|
||||
result = h.formatCurrency(data)
|
||||
case "uppercase":
|
||||
return strings.ToUpper(str), nil
|
||||
result = h.formatUppercase(data)
|
||||
case "lowercase":
|
||||
return strings.ToLower(str), nil
|
||||
case "title":
|
||||
return strings.Title(str), nil
|
||||
result = h.formatLowercase(data)
|
||||
case "capitalize":
|
||||
result = h.formatCapitalize(data)
|
||||
case "trim":
|
||||
return strings.TrimSpace(str), nil
|
||||
case "truncate":
|
||||
if lengthStr, exists := f.FormatConfig["length"]; exists {
|
||||
if length, err := strconv.Atoi(lengthStr); err == nil && len(str) > length {
|
||||
return str[:length] + "...", nil
|
||||
result = h.formatTrim(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported format_type: %s", formatType)}
|
||||
}
|
||||
|
||||
resultPayload, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatToString(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
result[key] = fmt.Sprintf("%v", value)
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatToNumber(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if str, ok := value.(string); ok {
|
||||
if num, err := strconv.ParseFloat(str, 64); err == nil {
|
||||
result[key] = num
|
||||
} else {
|
||||
result[key] = value // Keep original if conversion fails
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatDate(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
dateFormat := h.getDateFormat()
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if str, ok := value.(string); ok {
|
||||
if t, err := time.Parse(time.RFC3339, str); err == nil {
|
||||
result[key] = t.Format(dateFormat)
|
||||
} else if t, err := time.Parse("2006-01-02", str); err == nil {
|
||||
result[key] = t.Format(dateFormat)
|
||||
} else {
|
||||
result[key] = value // Keep original if parsing fails
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatCurrency(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
currency := h.getCurrency()
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if num, ok := value.(float64); ok {
|
||||
result[key] = fmt.Sprintf("%s%.2f", currency, num)
|
||||
} else if str, ok := value.(string); ok {
|
||||
if num, err := strconv.ParseFloat(str, 64); err == nil {
|
||||
result[key] = fmt.Sprintf("%s%.2f", currency, num)
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatUppercase(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if str, ok := value.(string); ok {
|
||||
result[key] = strings.ToUpper(str)
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatLowercase(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if str, ok := value.(string); ok {
|
||||
result[key] = strings.ToLower(str)
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatCapitalize(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if str, ok := value.(string); ok {
|
||||
result[key] = strings.Title(strings.ToLower(str))
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) formatTrim(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields(data)
|
||||
|
||||
for key, value := range data {
|
||||
if len(fields) == 0 || contains(fields, key) {
|
||||
if str, ok := value.(string); ok {
|
||||
result[key] = strings.TrimSpace(str)
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
} else {
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *FormatHandler) getTargetFields(data map[string]any) []string {
|
||||
if fields, ok := h.Payload.Data["fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return str, nil
|
||||
default:
|
||||
return str, nil
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *FormatHandler) formatBoolean(value any) (string, error) {
|
||||
var boolVal bool
|
||||
|
||||
switch v := value.(type) {
|
||||
case bool:
|
||||
boolVal = v
|
||||
case string:
|
||||
lower := strings.ToLower(v)
|
||||
boolVal = lower == "true" || lower == "yes" || lower == "1" || lower == "on"
|
||||
case int, int32, int64:
|
||||
boolVal = reflect.ValueOf(v).Int() != 0
|
||||
case float32, float64:
|
||||
boolVal = reflect.ValueOf(v).Float() != 0
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported boolean type: %T", value)
|
||||
func (h *FormatHandler) getDateFormat() string {
|
||||
if format, ok := h.Payload.Data["date_format"].(string); ok {
|
||||
return format
|
||||
}
|
||||
|
||||
trueValue := f.FormatConfig["true_value"]
|
||||
falseValue := f.FormatConfig["false_value"]
|
||||
|
||||
if trueValue == "" {
|
||||
trueValue = "true"
|
||||
}
|
||||
if falseValue == "" {
|
||||
falseValue = "false"
|
||||
}
|
||||
|
||||
if boolVal {
|
||||
return trueValue, nil
|
||||
}
|
||||
return falseValue, nil
|
||||
return "2006-01-02" // Default date format
|
||||
}
|
||||
|
||||
func (f *FormatHandler) formatArray(value any) (string, error) {
|
||||
rv := reflect.ValueOf(value)
|
||||
if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array {
|
||||
return "", fmt.Errorf("value is not an array or slice")
|
||||
func (h *FormatHandler) getCurrency() string {
|
||||
if currency, ok := h.Payload.Data["currency"].(string); ok {
|
||||
return currency
|
||||
}
|
||||
|
||||
separator := f.FormatConfig["separator"]
|
||||
if separator == "" {
|
||||
separator = ", "
|
||||
}
|
||||
|
||||
var elements []string
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
elements = append(elements, fmt.Sprintf("%v", rv.Index(i).Interface()))
|
||||
}
|
||||
|
||||
return strings.Join(elements, separator), nil
|
||||
return "$" // Default currency symbol
|
||||
}
|
||||
|
||||
// Factory functions for different format types
|
||||
func NewDateFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler {
|
||||
func contains(slice []string, item string) bool {
|
||||
for _, s := range slice {
|
||||
if s == item {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func NewFormatHandler(id string) *FormatHandler {
|
||||
return &FormatHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "format_date",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "format", "date"},
|
||||
},
|
||||
FormatType: "date",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
FormatConfig: config,
|
||||
}
|
||||
}
|
||||
|
||||
func NewNumberFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler {
|
||||
return &FormatHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "format_number",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "format", "number"},
|
||||
},
|
||||
FormatType: "number",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
FormatConfig: config,
|
||||
}
|
||||
}
|
||||
|
||||
func NewCurrencyFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler {
|
||||
return &FormatHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "format_currency",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "format", "currency"},
|
||||
},
|
||||
FormatType: "currency",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
FormatConfig: config,
|
||||
}
|
||||
}
|
||||
|
||||
func NewStringFormatter(id, sourceField, targetField string, config map[string]string) *FormatHandler {
|
||||
return &FormatHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "format_string",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "format", "string"},
|
||||
},
|
||||
FormatType: "string",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
FormatConfig: config,
|
||||
Operation: dag.Operation{ID: id, Key: "format", Type: dag.Function, Tags: []string{"data", "transformation"}},
|
||||
}
|
||||
}
|
||||
|
280
handlers/group_handler.go
Normal file
280
handlers/group_handler.go
Normal file
@@ -0,0 +1,280 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// GroupHandler handles data grouping operations with aggregation
|
||||
type GroupHandler struct {
|
||||
dag.Operation
|
||||
}
|
||||
|
||||
func (h *GroupHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
// Extract the data array
|
||||
dataArray, ok := data["data"].([]interface{})
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("expected 'data' field to be an array")}
|
||||
}
|
||||
|
||||
groupByFields := h.getGroupByFields()
|
||||
if len(groupByFields) == 0 {
|
||||
return mq.Result{Error: fmt.Errorf("group_by fields not specified")}
|
||||
}
|
||||
|
||||
aggregations := h.getAggregations()
|
||||
result := h.groupData(dataArray, groupByFields, aggregations)
|
||||
|
||||
// Update the data with grouped result
|
||||
data["data"] = result
|
||||
data["grouped"] = true
|
||||
data["group_count"] = len(result)
|
||||
|
||||
resultPayload, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (h *GroupHandler) groupData(dataArray []interface{}, groupByFields []string, aggregations map[string]string) []map[string]any {
|
||||
groups := make(map[string][]map[string]any)
|
||||
|
||||
// Group data by specified fields
|
||||
for _, item := range dataArray {
|
||||
record, ok := item.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Create group key
|
||||
groupKey := h.createGroupKey(record, groupByFields)
|
||||
groups[groupKey] = append(groups[groupKey], record)
|
||||
}
|
||||
|
||||
// Apply aggregations
|
||||
var result []map[string]any
|
||||
for _, records := range groups {
|
||||
groupResult := make(map[string]any)
|
||||
|
||||
// Add group by fields to result
|
||||
if len(records) > 0 {
|
||||
for _, field := range groupByFields {
|
||||
groupResult[field] = records[0][field]
|
||||
}
|
||||
}
|
||||
|
||||
// Apply aggregations
|
||||
for field, aggType := range aggregations {
|
||||
switch aggType {
|
||||
case "count":
|
||||
groupResult[field+"_count"] = len(records)
|
||||
case "sum":
|
||||
groupResult[field+"_sum"] = h.sumField(records, field)
|
||||
case "avg", "average":
|
||||
sum := h.sumField(records, field)
|
||||
if count := len(records); count > 0 {
|
||||
groupResult[field+"_avg"] = sum / float64(count)
|
||||
}
|
||||
case "min":
|
||||
groupResult[field+"_min"] = h.minField(records, field)
|
||||
case "max":
|
||||
groupResult[field+"_max"] = h.maxField(records, field)
|
||||
case "first":
|
||||
if len(records) > 0 {
|
||||
groupResult[field+"_first"] = records[0][field]
|
||||
}
|
||||
case "last":
|
||||
if len(records) > 0 {
|
||||
groupResult[field+"_last"] = records[len(records)-1][field]
|
||||
}
|
||||
case "concat":
|
||||
groupResult[field+"_concat"] = h.concatField(records, field)
|
||||
case "unique":
|
||||
groupResult[field+"_unique"] = h.uniqueField(records, field)
|
||||
}
|
||||
}
|
||||
|
||||
// Add record count
|
||||
groupResult["_record_count"] = len(records)
|
||||
|
||||
result = append(result, groupResult)
|
||||
}
|
||||
|
||||
// Sort results for consistent output
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
for _, field := range groupByFields {
|
||||
if fmt.Sprintf("%v", result[i][field]) < fmt.Sprintf("%v", result[j][field]) {
|
||||
return true
|
||||
} else if fmt.Sprintf("%v", result[i][field]) > fmt.Sprintf("%v", result[j][field]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *GroupHandler) createGroupKey(record map[string]any, fields []string) string {
|
||||
var keyParts []string
|
||||
for _, field := range fields {
|
||||
keyParts = append(keyParts, fmt.Sprintf("%v", record[field]))
|
||||
}
|
||||
return fmt.Sprintf("%v", keyParts)
|
||||
}
|
||||
|
||||
func (h *GroupHandler) sumField(records []map[string]any, field string) float64 {
|
||||
var sum float64
|
||||
for _, record := range records {
|
||||
if val, ok := record[field]; ok {
|
||||
switch v := val.(type) {
|
||||
case float64:
|
||||
sum += v
|
||||
case int:
|
||||
sum += float64(v)
|
||||
case int64:
|
||||
sum += float64(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
func (h *GroupHandler) minField(records []map[string]any, field string) interface{} {
|
||||
if len(records) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var min interface{}
|
||||
for _, record := range records {
|
||||
if val, ok := record[field]; ok {
|
||||
if min == nil {
|
||||
min = val
|
||||
} else {
|
||||
if h.compareValues(val, min) < 0 {
|
||||
min = val
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
func (h *GroupHandler) maxField(records []map[string]any, field string) interface{} {
|
||||
if len(records) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var max interface{}
|
||||
for _, record := range records {
|
||||
if val, ok := record[field]; ok {
|
||||
if max == nil {
|
||||
max = val
|
||||
} else {
|
||||
if h.compareValues(val, max) > 0 {
|
||||
max = val
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func (h *GroupHandler) concatField(records []map[string]any, field string) string {
|
||||
var values []string
|
||||
separator := h.getConcatSeparator()
|
||||
|
||||
for _, record := range records {
|
||||
if val, ok := record[field]; ok && val != nil {
|
||||
values = append(values, fmt.Sprintf("%v", val))
|
||||
}
|
||||
}
|
||||
|
||||
result := ""
|
||||
for i, val := range values {
|
||||
if i > 0 {
|
||||
result += separator
|
||||
}
|
||||
result += val
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *GroupHandler) uniqueField(records []map[string]any, field string) []interface{} {
|
||||
seen := make(map[string]bool)
|
||||
var unique []interface{}
|
||||
|
||||
for _, record := range records {
|
||||
if val, ok := record[field]; ok && val != nil {
|
||||
key := fmt.Sprintf("%v", val)
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
unique = append(unique, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return unique
|
||||
}
|
||||
|
||||
func (h *GroupHandler) compareValues(a, b interface{}) int {
|
||||
aStr := fmt.Sprintf("%v", a)
|
||||
bStr := fmt.Sprintf("%v", b)
|
||||
if aStr < bStr {
|
||||
return -1
|
||||
} else if aStr > bStr {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (h *GroupHandler) getGroupByFields() []string {
|
||||
if fields, ok := h.Payload.Data["group_by"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *GroupHandler) getAggregations() map[string]string {
|
||||
result := make(map[string]string)
|
||||
if aggs, ok := h.Payload.Data["aggregations"].(map[string]interface{}); ok {
|
||||
for field, aggType := range aggs {
|
||||
if str, ok := aggType.(string); ok {
|
||||
result[field] = str
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *GroupHandler) getConcatSeparator() string {
|
||||
if sep, ok := h.Payload.Data["concat_separator"].(string); ok {
|
||||
return sep
|
||||
}
|
||||
return ", " // Default separator
|
||||
}
|
||||
|
||||
func NewGroupHandler(id string) *GroupHandler {
|
||||
return &GroupHandler{
|
||||
Operation: dag.Operation{ID: id, Key: "group", Type: dag.Function, Tags: []string{"data", "aggregation"}},
|
||||
}
|
||||
}
|
@@ -1,338 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// GroupingHandler groups data by specified fields and applies aggregations
|
||||
type GroupingHandler struct {
|
||||
dag.Operation
|
||||
GroupByFields []string `json:"group_by_fields"` // fields to group by
|
||||
Aggregations []AggregationConfig `json:"aggregations"` // aggregation configurations
|
||||
SourceField string `json:"source_field"` // field containing array to group
|
||||
TargetField string `json:"target_field"` // field to store grouped result
|
||||
Options GroupingOptions `json:"options"` // additional options
|
||||
}
|
||||
|
||||
type AggregationConfig struct {
|
||||
Field string `json:"field"` // field to aggregate
|
||||
Operation string `json:"operation"` // sum, count, avg, min, max, concat, first, last
|
||||
Alias string `json:"alias"` // optional alias for result field
|
||||
}
|
||||
|
||||
type GroupingOptions struct {
|
||||
SortBy string `json:"sort_by"` // field to sort groups by
|
||||
SortDirection string `json:"sort_direction"` // asc or desc
|
||||
IncludeCount bool `json:"include_count"` // include count of items in each group
|
||||
CountAlias string `json:"count_alias"` // alias for count field (default: "count")
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Get source data
|
||||
sourceData, exists := data[g.SourceField]
|
||||
if !exists {
|
||||
return mq.Result{Error: fmt.Errorf("source field '%s' not found", g.SourceField), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Convert to slice of maps
|
||||
items, err := g.convertToSliceOfMaps(sourceData)
|
||||
if err != nil {
|
||||
return mq.Result{Error: err, Ctx: ctx}
|
||||
}
|
||||
|
||||
// Group the data
|
||||
groups := g.groupData(items)
|
||||
|
||||
// Apply aggregations
|
||||
result := g.applyAggregations(groups)
|
||||
|
||||
// Sort if requested
|
||||
if g.Options.SortBy != "" {
|
||||
result = g.sortGroups(result)
|
||||
}
|
||||
|
||||
// Set target field
|
||||
targetField := g.TargetField
|
||||
if targetField == "" {
|
||||
targetField = "grouped_data"
|
||||
}
|
||||
data[targetField] = result
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) convertToSliceOfMaps(data any) ([]map[string]any, error) {
|
||||
rv := reflect.ValueOf(data)
|
||||
|
||||
if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array {
|
||||
return nil, fmt.Errorf("source data must be an array or slice")
|
||||
}
|
||||
|
||||
var items []map[string]any
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
item := rv.Index(i).Interface()
|
||||
|
||||
// Convert item to map[string]any
|
||||
itemMap := make(map[string]any)
|
||||
itemBytes, err := json.Marshal(item)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal item at index %d: %v", i, err)
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(itemBytes, &itemMap); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal item at index %d: %v", i, err)
|
||||
}
|
||||
|
||||
items = append(items, itemMap)
|
||||
}
|
||||
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) groupData(items []map[string]any) map[string][]map[string]any {
|
||||
groups := make(map[string][]map[string]any)
|
||||
|
||||
for _, item := range items {
|
||||
// Create group key
|
||||
var keyParts []string
|
||||
for _, field := range g.GroupByFields {
|
||||
value := fmt.Sprintf("%v", item[field])
|
||||
keyParts = append(keyParts, value)
|
||||
}
|
||||
groupKey := strings.Join(keyParts, "|")
|
||||
|
||||
// Add item to group
|
||||
groups[groupKey] = append(groups[groupKey], item)
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) applyAggregations(groups map[string][]map[string]any) []map[string]any {
|
||||
var result []map[string]any
|
||||
|
||||
for groupKey, items := range groups {
|
||||
groupResult := make(map[string]any)
|
||||
|
||||
// Add group key fields
|
||||
keyParts := strings.Split(groupKey, "|")
|
||||
for i, field := range g.GroupByFields {
|
||||
if i < len(keyParts) {
|
||||
groupResult[field] = keyParts[i]
|
||||
}
|
||||
}
|
||||
|
||||
// Add count if requested
|
||||
if g.Options.IncludeCount {
|
||||
countAlias := g.Options.CountAlias
|
||||
if countAlias == "" {
|
||||
countAlias = "count"
|
||||
}
|
||||
groupResult[countAlias] = len(items)
|
||||
}
|
||||
|
||||
// Apply aggregations
|
||||
for _, agg := range g.Aggregations {
|
||||
fieldAlias := agg.Alias
|
||||
if fieldAlias == "" {
|
||||
fieldAlias = agg.Field + "_" + agg.Operation
|
||||
}
|
||||
|
||||
aggregatedValue := g.performAggregation(items, agg)
|
||||
groupResult[fieldAlias] = aggregatedValue
|
||||
}
|
||||
|
||||
result = append(result, groupResult)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) performAggregation(items []map[string]any, agg AggregationConfig) any {
|
||||
switch agg.Operation {
|
||||
case "count":
|
||||
return len(items)
|
||||
case "sum":
|
||||
return g.sumValues(items, agg.Field)
|
||||
case "avg":
|
||||
sum := g.sumValues(items, agg.Field)
|
||||
if count := len(items); count > 0 {
|
||||
return sum / float64(count)
|
||||
}
|
||||
return 0
|
||||
case "min":
|
||||
return g.minValue(items, agg.Field)
|
||||
case "max":
|
||||
return g.maxValue(items, agg.Field)
|
||||
case "first":
|
||||
if len(items) > 0 {
|
||||
return items[0][agg.Field]
|
||||
}
|
||||
return nil
|
||||
case "last":
|
||||
if len(items) > 0 {
|
||||
return items[len(items)-1][agg.Field]
|
||||
}
|
||||
return nil
|
||||
case "concat":
|
||||
return g.concatValues(items, agg.Field)
|
||||
case "unique":
|
||||
return g.uniqueValues(items, agg.Field)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) sumValues(items []map[string]any, field string) float64 {
|
||||
var sum float64
|
||||
for _, item := range items {
|
||||
if value, exists := item[field]; exists {
|
||||
if num := g.toFloat64(value); num != 0 {
|
||||
sum += num
|
||||
}
|
||||
}
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) minValue(items []map[string]any, field string) any {
|
||||
var min any
|
||||
for _, item := range items {
|
||||
if value, exists := item[field]; exists {
|
||||
if min == nil {
|
||||
min = value
|
||||
} else {
|
||||
if g.compareValues(value, min) < 0 {
|
||||
min = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) maxValue(items []map[string]any, field string) any {
|
||||
var max any
|
||||
for _, item := range items {
|
||||
if value, exists := item[field]; exists {
|
||||
if max == nil {
|
||||
max = value
|
||||
} else {
|
||||
if g.compareValues(value, max) > 0 {
|
||||
max = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) concatValues(items []map[string]any, field string) string {
|
||||
var values []string
|
||||
for _, item := range items {
|
||||
if value, exists := item[field]; exists {
|
||||
values = append(values, fmt.Sprintf("%v", value))
|
||||
}
|
||||
}
|
||||
return strings.Join(values, ", ")
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) uniqueValues(items []map[string]any, field string) []any {
|
||||
seen := make(map[string]bool)
|
||||
var unique []any
|
||||
|
||||
for _, item := range items {
|
||||
if value, exists := item[field]; exists {
|
||||
key := fmt.Sprintf("%v", value)
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
unique = append(unique, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
return unique
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) toFloat64(value any) float64 {
|
||||
switch v := value.(type) {
|
||||
case int:
|
||||
return float64(v)
|
||||
case int32:
|
||||
return float64(v)
|
||||
case int64:
|
||||
return float64(v)
|
||||
case float32:
|
||||
return float64(v)
|
||||
case float64:
|
||||
return v
|
||||
case string:
|
||||
if num, err := strconv.ParseFloat(v, 64); err == nil {
|
||||
return num
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) compareValues(a, b any) int {
|
||||
aFloat := g.toFloat64(a)
|
||||
bFloat := g.toFloat64(b)
|
||||
|
||||
if aFloat < bFloat {
|
||||
return -1
|
||||
} else if aFloat > bFloat {
|
||||
return 1
|
||||
}
|
||||
|
||||
// If numeric comparison doesn't work, compare as strings
|
||||
aStr := fmt.Sprintf("%v", a)
|
||||
bStr := fmt.Sprintf("%v", b)
|
||||
return strings.Compare(aStr, bStr)
|
||||
}
|
||||
|
||||
func (g *GroupingHandler) sortGroups(groups []map[string]any) []map[string]any {
|
||||
sort.Slice(groups, func(i, j int) bool {
|
||||
valueI := groups[i][g.Options.SortBy]
|
||||
valueJ := groups[j][g.Options.SortBy]
|
||||
|
||||
comparison := g.compareValues(valueI, valueJ)
|
||||
|
||||
if g.Options.SortDirection == "desc" {
|
||||
return comparison > 0
|
||||
}
|
||||
return comparison < 0
|
||||
})
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
// Factory function
|
||||
func NewGroupingHandler(id, sourceField, targetField string, groupByFields []string, aggregations []AggregationConfig, options GroupingOptions) *GroupingHandler {
|
||||
return &GroupingHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "group_data",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "grouping", "aggregation"},
|
||||
},
|
||||
GroupByFields: groupByFields,
|
||||
Aggregations: aggregations,
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Options: options,
|
||||
}
|
||||
}
|
@@ -3,364 +3,343 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
"github.com/oarkflow/mq"
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// JSONHandler handles JSON parsing and stringification operations
|
||||
// JSONHandler handles JSON parsing and stringifying operations
|
||||
type JSONHandler struct {
|
||||
dag.Operation
|
||||
OperationType string `json:"operation_type"` // "parse" or "stringify"
|
||||
SourceField string `json:"source_field"` // field containing data to process
|
||||
TargetField string `json:"target_field"` // field to store result
|
||||
Options JSONOptions `json:"options"` // processing options
|
||||
}
|
||||
|
||||
type JSONOptions struct {
|
||||
Pretty bool `json:"pretty"` // pretty print JSON (stringify only)
|
||||
Indent string `json:"indent"` // indentation string (stringify only)
|
||||
EscapeHTML bool `json:"escape_html"` // escape HTML in JSON strings (stringify only)
|
||||
ValidateOnly bool `json:"validate_only"` // only validate, don't parse (parse only)
|
||||
ErrorOnInvalid bool `json:"error_on_invalid"` // return error if JSON is invalid
|
||||
DefaultOnError any `json:"default_on_error"` // default value to use if parsing fails
|
||||
StrictMode bool `json:"strict_mode"` // strict JSON parsing
|
||||
AllowComments bool `json:"allow_comments"` // allow comments in JSON (parse only)
|
||||
AllowTrailing bool `json:"allow_trailing"` // allow trailing commas (parse only)
|
||||
}
|
||||
|
||||
func (j *JSONHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
func (h *JSONHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
// Get source value
|
||||
sourceValue, exists := data[j.SourceField]
|
||||
if !exists {
|
||||
return mq.Result{Error: fmt.Errorf("source field '%s' not found", j.SourceField), Ctx: ctx}
|
||||
operation, ok := h.Payload.Data["operation"].(string)
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("operation not specified")}
|
||||
}
|
||||
|
||||
var result any
|
||||
var err error
|
||||
|
||||
switch j.OperationType {
|
||||
case "parse":
|
||||
result, err = j.parseJSON(sourceValue)
|
||||
case "stringify":
|
||||
result, err = j.stringifyJSON(sourceValue)
|
||||
var result map[string]any
|
||||
switch operation {
|
||||
case "parse", "string_to_json":
|
||||
result = h.parseJSON(data)
|
||||
case "stringify", "json_to_string":
|
||||
result = h.stringifyJSON(data)
|
||||
case "pretty_print":
|
||||
result = h.prettyPrintJSON(data)
|
||||
case "minify":
|
||||
result = h.minifyJSON(data)
|
||||
case "validate":
|
||||
result = h.validateJSON(data)
|
||||
case "extract_fields":
|
||||
result = h.extractFields(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation type: %s", j.OperationType), Ctx: ctx}
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)}
|
||||
}
|
||||
|
||||
resultPayload, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
if j.Options.ErrorOnInvalid {
|
||||
return mq.Result{Error: err, Ctx: ctx}
|
||||
}
|
||||
// Use default value if specified
|
||||
if j.Options.DefaultOnError != nil {
|
||||
result = j.Options.DefaultOnError
|
||||
} else {
|
||||
result = sourceValue // keep original value
|
||||
}
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
// Set target field
|
||||
targetField := j.TargetField
|
||||
if targetField == "" {
|
||||
targetField = j.SourceField // overwrite source if no target specified
|
||||
}
|
||||
data[targetField] = result
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (j *JSONHandler) parseJSON(value any) (any, error) {
|
||||
// Convert value to string
|
||||
jsonStr := fmt.Sprintf("%v", value)
|
||||
func (h *JSONHandler) parseJSON(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
|
||||
// Validate only if requested
|
||||
if j.Options.ValidateOnly {
|
||||
var temp any
|
||||
err := json.Unmarshal([]byte(jsonStr), &temp)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("invalid JSON: %v", err)
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if str, ok := val.(string); ok {
|
||||
var parsed interface{}
|
||||
if err := json.Unmarshal([]byte(str), &parsed); err == nil {
|
||||
targetField := h.getTargetFieldForSource(field)
|
||||
result[targetField] = parsed
|
||||
result[field+"_parsed"] = true
|
||||
} else {
|
||||
result[field+"_parse_error"] = err.Error()
|
||||
result[field+"_parsed"] = false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Preprocess if needed
|
||||
if j.Options.AllowComments {
|
||||
jsonStr = j.removeComments(jsonStr)
|
||||
}
|
||||
|
||||
if j.Options.AllowTrailing {
|
||||
jsonStr = j.removeTrailingCommas(jsonStr)
|
||||
}
|
||||
|
||||
// Parse JSON
|
||||
var result any
|
||||
err := json.Unmarshal([]byte(jsonStr), &result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse JSON: %v", err)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (j *JSONHandler) stringifyJSON(value any) (string, error) {
|
||||
var result []byte
|
||||
var err error
|
||||
|
||||
if j.Options.Pretty {
|
||||
indent := j.Options.Indent
|
||||
if indent == "" {
|
||||
indent = " " // default indentation
|
||||
}
|
||||
result, err = json.MarshalIndent(value, "", indent)
|
||||
} else {
|
||||
result, err = json.Marshal(value)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to stringify JSON: %v", err)
|
||||
}
|
||||
|
||||
return string(result), nil
|
||||
}
|
||||
|
||||
func (j *JSONHandler) removeComments(jsonStr string) string {
|
||||
lines := strings.Split(jsonStr, "\n")
|
||||
var cleanLines []string
|
||||
|
||||
for _, line := range lines {
|
||||
// Remove single-line comments
|
||||
if commentIndex := strings.Index(line, "//"); commentIndex != -1 {
|
||||
line = line[:commentIndex]
|
||||
}
|
||||
cleanLines = append(cleanLines, line)
|
||||
}
|
||||
|
||||
result := strings.Join(cleanLines, "\n")
|
||||
|
||||
// Remove multi-line comments (basic implementation)
|
||||
for {
|
||||
start := strings.Index(result, "/*")
|
||||
if start == -1 {
|
||||
break
|
||||
}
|
||||
end := strings.Index(result[start:], "*/")
|
||||
if end == -1 {
|
||||
break
|
||||
}
|
||||
result = result[:start] + result[start+end+2:]
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (j *JSONHandler) removeTrailingCommas(jsonStr string) string {
|
||||
// Basic implementation - remove commas before closing brackets/braces
|
||||
jsonStr = strings.ReplaceAll(jsonStr, ",}", "}")
|
||||
jsonStr = strings.ReplaceAll(jsonStr, ",]", "]")
|
||||
return jsonStr
|
||||
}
|
||||
|
||||
// Advanced JSON handler for complex operations
|
||||
type AdvancedJSONHandler struct {
|
||||
dag.Operation
|
||||
Operations []JSONOperation `json:"operations"` // chain of JSON operations
|
||||
}
|
||||
|
||||
type JSONOperation struct {
|
||||
Type string `json:"type"` // "parse", "stringify", "validate", "extract", "merge"
|
||||
SourceField string `json:"source_field"` // field to operate on
|
||||
TargetField string `json:"target_field"` // field to store result
|
||||
Options JSONOptions `json:"options"` // operation options
|
||||
Path string `json:"path"` // JSON path for extraction (extract only)
|
||||
MergeWith string `json:"merge_with"` // field to merge with (merge only)
|
||||
}
|
||||
|
||||
func (a *AdvancedJSONHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Execute operations in sequence
|
||||
for i, op := range a.Operations {
|
||||
var result any
|
||||
var err error
|
||||
|
||||
switch op.Type {
|
||||
case "parse", "stringify":
|
||||
handler := &JSONHandler{
|
||||
OperationType: op.Type,
|
||||
SourceField: op.SourceField,
|
||||
TargetField: op.TargetField,
|
||||
Options: op.Options,
|
||||
}
|
||||
|
||||
tempData, _ := json.Marshal(data)
|
||||
tempTask := &mq.Task{Payload: tempData}
|
||||
|
||||
handlerResult := handler.ProcessTask(ctx, tempTask)
|
||||
if handlerResult.Error != nil {
|
||||
return mq.Result{Error: fmt.Errorf("operation %d failed: %v", i+1, handlerResult.Error), Ctx: ctx}
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(handlerResult.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal result from operation %d: %v", i+1, err), Ctx: ctx}
|
||||
}
|
||||
continue
|
||||
|
||||
case "validate":
|
||||
result, err = a.validateJSON(data[op.SourceField])
|
||||
case "extract":
|
||||
result, err = a.extractFromJSON(data[op.SourceField], op.Path)
|
||||
case "merge":
|
||||
result, err = a.mergeJSON(data[op.SourceField], data[op.MergeWith])
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation type: %s", op.Type), Ctx: ctx}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if op.Options.ErrorOnInvalid {
|
||||
return mq.Result{Error: fmt.Errorf("operation %d failed: %v", i+1, err), Ctx: ctx}
|
||||
}
|
||||
result = op.Options.DefaultOnError
|
||||
}
|
||||
|
||||
// Set target field
|
||||
targetField := op.TargetField
|
||||
if targetField == "" {
|
||||
targetField = op.SourceField
|
||||
}
|
||||
data[targetField] = result
|
||||
}
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (a *AdvancedJSONHandler) validateJSON(value any) (bool, error) {
|
||||
jsonStr := fmt.Sprintf("%v", value)
|
||||
var temp any
|
||||
err := json.Unmarshal([]byte(jsonStr), &temp)
|
||||
return err == nil, err
|
||||
}
|
||||
|
||||
func (a *AdvancedJSONHandler) extractFromJSON(value any, path string) (any, error) {
|
||||
// Basic JSON path extraction (simplified implementation)
|
||||
// For production use, consider using a proper JSON path library
|
||||
|
||||
var jsonData any
|
||||
if str, ok := value.(string); ok {
|
||||
if err := json.Unmarshal([]byte(str), &jsonData); err != nil {
|
||||
return nil, fmt.Errorf("invalid JSON: %v", err)
|
||||
}
|
||||
} else {
|
||||
jsonData = value
|
||||
}
|
||||
|
||||
// Split path and navigate
|
||||
parts := strings.Split(strings.Trim(path, "."), ".")
|
||||
current := jsonData
|
||||
|
||||
for _, part := range parts {
|
||||
if part == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
switch v := current.(type) {
|
||||
case map[string]any:
|
||||
current = v[part]
|
||||
default:
|
||||
return nil, fmt.Errorf("cannot navigate path '%s' at part '%s'", path, part)
|
||||
}
|
||||
}
|
||||
|
||||
return current, nil
|
||||
}
|
||||
|
||||
func (a *AdvancedJSONHandler) mergeJSON(value1, value2 any) (any, error) {
|
||||
// Convert both values to maps if they're JSON strings
|
||||
var map1, map2 map[string]any
|
||||
|
||||
if str, ok := value1.(string); ok {
|
||||
if err := json.Unmarshal([]byte(str), &map1); err != nil {
|
||||
return nil, fmt.Errorf("invalid JSON in first value: %v", err)
|
||||
}
|
||||
} else if m, ok := value1.(map[string]any); ok {
|
||||
map1 = m
|
||||
} else {
|
||||
return nil, fmt.Errorf("first value is not a JSON object")
|
||||
}
|
||||
|
||||
if str, ok := value2.(string); ok {
|
||||
if err := json.Unmarshal([]byte(str), &map2); err != nil {
|
||||
return nil, fmt.Errorf("invalid JSON in second value: %v", err)
|
||||
}
|
||||
} else if m, ok := value2.(map[string]any); ok {
|
||||
map2 = m
|
||||
} else {
|
||||
return nil, fmt.Errorf("second value is not a JSON object")
|
||||
}
|
||||
|
||||
// Merge maps
|
||||
func (h *JSONHandler) stringifyJSON(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
for k, v := range map1 {
|
||||
result[k] = v
|
||||
}
|
||||
for k, v := range map2 {
|
||||
result[k] = v // overwrites if key exists
|
||||
fields := h.getTargetFields()
|
||||
indent := h.getIndent()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
return result, nil
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
var jsonBytes []byte
|
||||
var err error
|
||||
|
||||
if indent {
|
||||
jsonBytes, err = json.MarshalIndent(val, "", " ")
|
||||
} else {
|
||||
jsonBytes, err = json.Marshal(val)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
targetField := h.getTargetFieldForSource(field)
|
||||
result[targetField] = string(jsonBytes)
|
||||
result[field+"_stringified"] = true
|
||||
} else {
|
||||
result[field+"_stringify_error"] = err.Error()
|
||||
result[field+"_stringified"] = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Factory functions
|
||||
func NewJSONParser(id, sourceField, targetField string, options JSONOptions) *JSONHandler {
|
||||
func (h *JSONHandler) prettyPrintJSON(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
var prettyJSON interface{}
|
||||
|
||||
// If it's a string, try to parse it first
|
||||
if str, ok := val.(string); ok {
|
||||
if err := json.Unmarshal([]byte(str), &prettyJSON); err != nil {
|
||||
prettyJSON = val
|
||||
}
|
||||
} else {
|
||||
prettyJSON = val
|
||||
}
|
||||
|
||||
if jsonBytes, err := json.MarshalIndent(prettyJSON, "", " "); err == nil {
|
||||
targetField := h.getTargetFieldForSource(field)
|
||||
result[targetField] = string(jsonBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *JSONHandler) minifyJSON(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
var minifyJSON interface{}
|
||||
|
||||
// If it's a string, try to parse it first
|
||||
if str, ok := val.(string); ok {
|
||||
if err := json.Unmarshal([]byte(str), &minifyJSON); err != nil {
|
||||
minifyJSON = val
|
||||
}
|
||||
} else {
|
||||
minifyJSON = val
|
||||
}
|
||||
|
||||
if jsonBytes, err := json.Marshal(minifyJSON); err == nil {
|
||||
targetField := h.getTargetFieldForSource(field)
|
||||
result[targetField] = string(jsonBytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *JSONHandler) validateJSON(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if str, ok := val.(string); ok {
|
||||
var temp interface{}
|
||||
if err := json.Unmarshal([]byte(str), &temp); err == nil {
|
||||
result[field+"_valid_json"] = true
|
||||
result[field+"_json_type"] = h.getJSONType(temp)
|
||||
} else {
|
||||
result[field+"_valid_json"] = false
|
||||
result[field+"_validation_error"] = err.Error()
|
||||
}
|
||||
} else {
|
||||
result[field+"_valid_json"] = true
|
||||
result[field+"_json_type"] = h.getJSONType(val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *JSONHandler) extractFields(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
sourceField := h.getSourceField()
|
||||
fieldsToExtract := h.getFieldsToExtract()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
if val, ok := data[sourceField]; ok {
|
||||
var jsonData map[string]interface{}
|
||||
|
||||
// If it's a string, parse it
|
||||
if str, ok := val.(string); ok {
|
||||
if err := json.Unmarshal([]byte(str), &jsonData); err != nil {
|
||||
result["extract_error"] = err.Error()
|
||||
return result
|
||||
}
|
||||
} else if obj, ok := val.(map[string]interface{}); ok {
|
||||
jsonData = obj
|
||||
} else {
|
||||
result["extract_error"] = "source field is not a JSON object or string"
|
||||
return result
|
||||
}
|
||||
|
||||
// Extract specified fields
|
||||
for _, fieldPath := range fieldsToExtract {
|
||||
if extractedVal := h.extractNestedField(jsonData, fieldPath); extractedVal != nil {
|
||||
result[fieldPath] = extractedVal
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *JSONHandler) extractNestedField(data map[string]interface{}, fieldPath string) interface{} {
|
||||
// Simple implementation for dot notation
|
||||
// For more complex path extraction, could use jsonpath library
|
||||
if val, ok := data[fieldPath]; ok {
|
||||
return val
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *JSONHandler) getJSONType(val interface{}) string {
|
||||
switch val.(type) {
|
||||
case map[string]interface{}:
|
||||
return "object"
|
||||
case []interface{}:
|
||||
return "array"
|
||||
case string:
|
||||
return "string"
|
||||
case float64:
|
||||
return "number"
|
||||
case bool:
|
||||
return "boolean"
|
||||
case nil:
|
||||
return "null"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
func (h *JSONHandler) getTargetFields() []string {
|
||||
if fields, ok := h.Payload.Data["fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *JSONHandler) getSourceField() string {
|
||||
if field, ok := h.Payload.Data["source_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *JSONHandler) getFieldsToExtract() []string {
|
||||
if fields, ok := h.Payload.Data["extract_fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *JSONHandler) getTargetFieldForSource(sourceField string) string {
|
||||
// Check if there's a specific mapping
|
||||
if mapping, ok := h.Payload.Data["field_mapping"].(map[string]interface{}); ok {
|
||||
if target, ok := mapping[sourceField].(string); ok {
|
||||
return target
|
||||
}
|
||||
}
|
||||
|
||||
// Default: append suffix based on operation
|
||||
operation, _ := h.Payload.Data["operation"].(string)
|
||||
switch operation {
|
||||
case "parse", "string_to_json":
|
||||
return sourceField + "_parsed"
|
||||
case "stringify", "json_to_string":
|
||||
return sourceField + "_string"
|
||||
case "pretty_print":
|
||||
return sourceField + "_pretty"
|
||||
case "minify":
|
||||
return sourceField + "_minified"
|
||||
default:
|
||||
return sourceField + "_result"
|
||||
}
|
||||
}
|
||||
|
||||
func (h *JSONHandler) getIndent() bool {
|
||||
if indent, ok := h.Payload.Data["indent"].(bool); ok {
|
||||
return indent
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func NewJSONHandler(id string) *JSONHandler {
|
||||
return &JSONHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "json_parse",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "json", "parse"},
|
||||
},
|
||||
OperationType: "parse",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func NewJSONStringifier(id, sourceField, targetField string, options JSONOptions) *JSONHandler {
|
||||
return &JSONHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "json_stringify",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "json", "stringify"},
|
||||
},
|
||||
OperationType: "stringify",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func NewAdvancedJSONHandler(id string, operations []JSONOperation) *AdvancedJSONHandler {
|
||||
return &AdvancedJSONHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "advanced_json",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "json", "advanced"},
|
||||
},
|
||||
Operations: operations,
|
||||
Operation: dag.Operation{ID: id, Key: "json", Type: dag.Function, Tags: []string{"data", "transformation", "json"}},
|
||||
}
|
||||
}
|
||||
|
@@ -3,8 +3,6 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/oarkflow/json"
|
||||
@@ -12,248 +10,201 @@ import (
|
||||
"github.com/oarkflow/mq/dag"
|
||||
)
|
||||
|
||||
// SplitJoinHandler handles splitting strings into arrays and joining arrays into strings
|
||||
// SplitJoinHandler handles string split and join operations
|
||||
type SplitJoinHandler struct {
|
||||
dag.Operation
|
||||
OpType string `json:"op_type"` // "split" or "join"
|
||||
SourceField string `json:"source_field"` // field to operate on
|
||||
TargetField string `json:"target_field"` // field to store result
|
||||
Delimiter string `json:"delimiter"` // delimiter for split/join
|
||||
Options SplitJoinOptions `json:"options"`
|
||||
}
|
||||
|
||||
type SplitJoinOptions struct {
|
||||
TrimSpaces bool `json:"trim_spaces"` // trim spaces from elements (split only)
|
||||
RemoveEmpty bool `json:"remove_empty"` // remove empty elements (split only)
|
||||
MaxSplit int `json:"max_split"` // maximum number of splits (-1 for unlimited)
|
||||
UseRegex bool `json:"use_regex"` // treat delimiter as regex pattern (split only)
|
||||
CaseInsensitive bool `json:"case_insensitive"` // case insensitive regex (split only)
|
||||
Prefix string `json:"prefix"` // prefix for joined string (join only)
|
||||
Suffix string `json:"suffix"` // suffix for joined string (join only)
|
||||
}
|
||||
|
||||
func (s *SplitJoinHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
func (h *SplitJoinHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
err := json.Unmarshal(task.Payload, &data)
|
||||
if err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal task payload: %w", err)}
|
||||
}
|
||||
|
||||
// Get source value
|
||||
sourceValue, exists := data[s.SourceField]
|
||||
if !exists {
|
||||
return mq.Result{Error: fmt.Errorf("source field '%s' not found", s.SourceField), Ctx: ctx}
|
||||
operation, ok := h.Payload.Data["operation"].(string)
|
||||
if !ok {
|
||||
return mq.Result{Error: fmt.Errorf("operation not specified")}
|
||||
}
|
||||
|
||||
var result any
|
||||
var err error
|
||||
|
||||
switch s.OpType {
|
||||
var result map[string]any
|
||||
switch operation {
|
||||
case "split":
|
||||
result, err = s.performSplit(sourceValue)
|
||||
result = h.splitOperation(data)
|
||||
case "join":
|
||||
result, err = s.performJoin(sourceValue)
|
||||
result = h.joinOperation(data)
|
||||
case "split_to_array":
|
||||
result = h.splitToArrayOperation(data)
|
||||
case "join_from_array":
|
||||
result = h.joinFromArrayOperation(data)
|
||||
default:
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation: %s", s.OpType), Ctx: ctx}
|
||||
return mq.Result{Error: fmt.Errorf("unsupported operation: %s", operation)}
|
||||
}
|
||||
|
||||
resultPayload, err := json.Marshal(result)
|
||||
if err != nil {
|
||||
return mq.Result{Error: err, Ctx: ctx}
|
||||
return mq.Result{Error: fmt.Errorf("failed to marshal result: %w", err)}
|
||||
}
|
||||
|
||||
// Set target field
|
||||
targetField := s.TargetField
|
||||
if targetField == "" {
|
||||
targetField = s.SourceField // overwrite source if no target specified
|
||||
}
|
||||
data[targetField] = result
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
return mq.Result{Payload: resultPayload, Ctx: ctx}
|
||||
}
|
||||
|
||||
func (s *SplitJoinHandler) performSplit(value any) ([]string, error) {
|
||||
// Convert value to string
|
||||
str := fmt.Sprintf("%v", value)
|
||||
func (h *SplitJoinHandler) splitOperation(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
separator := h.getSeparator()
|
||||
|
||||
var parts []string
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
if s.Options.UseRegex {
|
||||
// Use regex for splitting
|
||||
flags := ""
|
||||
if s.Options.CaseInsensitive {
|
||||
flags = "(?i)"
|
||||
}
|
||||
pattern := flags + s.Delimiter
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if str, ok := val.(string); ok {
|
||||
parts := strings.Split(str, separator)
|
||||
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex pattern '%s': %v", pattern, err)
|
||||
}
|
||||
// Create individual fields for each part
|
||||
for i, part := range parts {
|
||||
result[fmt.Sprintf("%s_%d", field, i)] = strings.TrimSpace(part)
|
||||
}
|
||||
|
||||
if s.Options.MaxSplit > 0 {
|
||||
parts = re.Split(str, s.Options.MaxSplit+1)
|
||||
} else {
|
||||
parts = re.Split(str, -1)
|
||||
}
|
||||
} else {
|
||||
// Use simple string splitting
|
||||
if s.Options.MaxSplit > 0 {
|
||||
parts = strings.SplitN(str, s.Delimiter, s.Options.MaxSplit+1)
|
||||
} else {
|
||||
parts = strings.Split(str, s.Delimiter)
|
||||
// Also store as array
|
||||
result[field+"_parts"] = parts
|
||||
result[field+"_count"] = len(parts)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the parts based on options
|
||||
var processedParts []string
|
||||
for _, part := range parts {
|
||||
if s.Options.TrimSpaces {
|
||||
part = strings.TrimSpace(part)
|
||||
}
|
||||
|
||||
if s.Options.RemoveEmpty && part == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
processedParts = append(processedParts, part)
|
||||
}
|
||||
|
||||
return processedParts, nil
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *SplitJoinHandler) performJoin(value any) (string, error) {
|
||||
// Convert value to slice of strings
|
||||
parts, err := s.convertToStringSlice(value)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
func (h *SplitJoinHandler) joinOperation(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
targetField := h.getTargetField()
|
||||
separator := h.getSeparator()
|
||||
sourceFields := h.getSourceFields()
|
||||
|
||||
// Join the parts
|
||||
joined := strings.Join(parts, s.Delimiter)
|
||||
|
||||
// Add prefix/suffix if specified
|
||||
if s.Options.Prefix != "" {
|
||||
joined = s.Options.Prefix + joined
|
||||
}
|
||||
if s.Options.Suffix != "" {
|
||||
joined = joined + s.Options.Suffix
|
||||
}
|
||||
|
||||
return joined, nil
|
||||
}
|
||||
|
||||
func (s *SplitJoinHandler) convertToStringSlice(value any) ([]string, error) {
|
||||
rv := reflect.ValueOf(value)
|
||||
|
||||
if rv.Kind() != reflect.Slice && rv.Kind() != reflect.Array {
|
||||
return nil, fmt.Errorf("value must be an array or slice for join operation")
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
var parts []string
|
||||
for i := 0; i < rv.Len(); i++ {
|
||||
element := rv.Index(i).Interface()
|
||||
parts = append(parts, fmt.Sprintf("%v", element))
|
||||
}
|
||||
|
||||
return parts, nil
|
||||
}
|
||||
|
||||
// Advanced split/join handler for complex operations
|
||||
type AdvancedSplitJoinHandler struct {
|
||||
dag.Operation
|
||||
Operations []SplitJoinOperation `json:"operations"` // chain of split/join operations
|
||||
}
|
||||
|
||||
type SplitJoinOperation struct {
|
||||
Type string `json:"type"` // "split" or "join"
|
||||
SourceField string `json:"source_field"` // field to operate on
|
||||
TargetField string `json:"target_field"` // field to store result
|
||||
Delimiter string `json:"delimiter"` // delimiter for operation
|
||||
Options SplitJoinOptions `json:"options"` // operation options
|
||||
}
|
||||
|
||||
func (a *AdvancedSplitJoinHandler) ProcessTask(ctx context.Context, task *mq.Task) mq.Result {
|
||||
var data map[string]any
|
||||
if err := json.Unmarshal(task.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal data: %v", err), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Execute operations in sequence
|
||||
for i, op := range a.Operations {
|
||||
handler := &SplitJoinHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: fmt.Sprintf("%s_op_%d", a.ID, i),
|
||||
Key: "temp_split_join",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "temp"},
|
||||
},
|
||||
OpType: op.Type,
|
||||
SourceField: op.SourceField,
|
||||
TargetField: op.TargetField,
|
||||
Delimiter: op.Delimiter,
|
||||
Options: op.Options,
|
||||
}
|
||||
|
||||
// Create a temporary task for this operation
|
||||
tempData, _ := json.Marshal(data)
|
||||
tempTask := &mq.Task{Payload: tempData}
|
||||
|
||||
result := handler.ProcessTask(ctx, tempTask)
|
||||
if result.Error != nil {
|
||||
return mq.Result{Error: fmt.Errorf("operation %d failed: %v", i+1, result.Error), Ctx: ctx}
|
||||
}
|
||||
|
||||
// Update data with the result
|
||||
if err := json.Unmarshal(result.Payload, &data); err != nil {
|
||||
return mq.Result{Error: fmt.Errorf("failed to unmarshal result from operation %d: %v", i+1, err), Ctx: ctx}
|
||||
for _, field := range sourceFields {
|
||||
if val, ok := data[field]; ok && val != nil {
|
||||
parts = append(parts, fmt.Sprintf("%v", val))
|
||||
}
|
||||
}
|
||||
|
||||
bt, _ := json.Marshal(data)
|
||||
return mq.Result{Payload: bt, Ctx: ctx}
|
||||
if len(parts) > 0 {
|
||||
result[targetField] = strings.Join(parts, separator)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// Factory functions
|
||||
func NewSplitHandler(id, sourceField, targetField, delimiter string, options SplitJoinOptions) *SplitJoinHandler {
|
||||
func (h *SplitJoinHandler) splitToArrayOperation(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
fields := h.getTargetFields()
|
||||
separator := h.getSeparator()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
for _, field := range fields {
|
||||
if val, ok := data[field]; ok {
|
||||
if str, ok := val.(string); ok {
|
||||
parts := strings.Split(str, separator)
|
||||
var cleanParts []interface{}
|
||||
for _, part := range parts {
|
||||
cleanParts = append(cleanParts, strings.TrimSpace(part))
|
||||
}
|
||||
result[field+"_array"] = cleanParts
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *SplitJoinHandler) joinFromArrayOperation(data map[string]any) map[string]any {
|
||||
result := make(map[string]any)
|
||||
targetField := h.getTargetField()
|
||||
separator := h.getSeparator()
|
||||
sourceField := h.getSourceField()
|
||||
|
||||
// Copy all original data
|
||||
for key, value := range data {
|
||||
result[key] = value
|
||||
}
|
||||
|
||||
if val, ok := data[sourceField]; ok {
|
||||
if arr, ok := val.([]interface{}); ok {
|
||||
var parts []string
|
||||
for _, item := range arr {
|
||||
if item != nil {
|
||||
parts = append(parts, fmt.Sprintf("%v", item))
|
||||
}
|
||||
}
|
||||
result[targetField] = strings.Join(parts, separator)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *SplitJoinHandler) getTargetFields() []string {
|
||||
if fields, ok := h.Payload.Data["fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *SplitJoinHandler) getTargetField() string {
|
||||
if field, ok := h.Payload.Data["target_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return "joined_field"
|
||||
}
|
||||
|
||||
func (h *SplitJoinHandler) getSourceField() string {
|
||||
if field, ok := h.Payload.Data["source_field"].(string); ok {
|
||||
return field
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *SplitJoinHandler) getSourceFields() []string {
|
||||
if fields, ok := h.Payload.Data["source_fields"].([]interface{}); ok {
|
||||
var result []string
|
||||
for _, field := range fields {
|
||||
if str, ok := field.(string); ok {
|
||||
result = append(result, str)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *SplitJoinHandler) getSeparator() string {
|
||||
if sep, ok := h.Payload.Data["separator"].(string); ok {
|
||||
return sep
|
||||
}
|
||||
return "," // Default separator
|
||||
}
|
||||
|
||||
func NewSplitJoinHandler(id string) *SplitJoinHandler {
|
||||
return &SplitJoinHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "split_string",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "string", "split"},
|
||||
},
|
||||
OpType: "split",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Delimiter: delimiter,
|
||||
Options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func NewJoinHandler(id, sourceField, targetField, delimiter string, options SplitJoinOptions) *SplitJoinHandler {
|
||||
return &SplitJoinHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "join_array",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "array", "join"},
|
||||
},
|
||||
OpType: "join",
|
||||
SourceField: sourceField,
|
||||
TargetField: targetField,
|
||||
Delimiter: delimiter,
|
||||
Options: options,
|
||||
}
|
||||
}
|
||||
|
||||
func NewAdvancedSplitJoinHandler(id string, operations []SplitJoinOperation) *AdvancedSplitJoinHandler {
|
||||
return &AdvancedSplitJoinHandler{
|
||||
Operation: dag.Operation{
|
||||
ID: id,
|
||||
Key: "advanced_split_join",
|
||||
Type: dag.Function,
|
||||
Tags: []string{"data", "string", "array", "advanced"},
|
||||
},
|
||||
Operations: operations,
|
||||
Operation: dag.Operation{ID: id, Key: "split_join", Type: dag.Function, Tags: []string{"data", "transformation", "string"}},
|
||||
}
|
||||
}
|
||||
|
@@ -1,43 +0,0 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
var (
|
||||
taskProcessed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "tasks_processed_total",
|
||||
Help: "Total number of tasks processed.",
|
||||
},
|
||||
[]string{"status"},
|
||||
)
|
||||
taskProcessingTime = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "task_processing_time_seconds",
|
||||
Help: "Histogram of task processing times.",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(taskProcessed)
|
||||
prometheus.MustRegister(taskProcessingTime)
|
||||
}
|
||||
|
||||
func RecordTaskProcessed(status string) {
|
||||
taskProcessed.WithLabelValues(status).Inc()
|
||||
}
|
||||
|
||||
func RecordTaskProcessingTime(duration float64) {
|
||||
taskProcessingTime.Observe(duration)
|
||||
}
|
||||
|
||||
func StartMetricsServer(port string) {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
go http.ListenAndServe(port, nil)
|
||||
}
|
Reference in New Issue
Block a user