update MaskRCNN template

2025-09-27 01:56:03 +08:00 · 2021-01-29 12:07:25 -08:00
parent f1bdbbad28
commit 35ea2164eb
1 changed files with 124 additions and 118 deletions
--- a/db/yaml/workflows/hyperparameter-tuning/20210118175809.yaml
+++ b/db/yaml/workflows/hyperparameter-tuning/20210118175809.yaml
@@ -1,11 +1,24 @@
 # source: https://github.com/onepanelio/templates/blob/master/workflows/nni-hyperparameter-tuning/mnist/
+# Workflow Template example for hyperparameter tuning
+# Documentation: https://docs.onepanel.ai/docs/reference/workflows/hyperparameter-tuning
+#
+# Only change the fields marked with [CHANGE]
 entrypoint: main
 arguments:
  parameters:
+
+    # [CHANGE] Path to your training/model architecture code repository
+    # Change this value and revision value to your code repository and branch respectively
    - name: source
      value: https://github.com/onepanelio/templates
+
+    # [CHANGE] Revision is the branch or tag that you want to use
+    # You can change this to any tag or branch name in your repository
    - name: revision
-      value: master
+      value: v0.18.0
+
+    # [CHANGE] Default configuration for the NNI tuner
+    # See https://docs.onepanel.ai/docs/reference/workflows/hyperparameter-tuning#understanding-the-configurations
    - name: config
      displayName: Configuration
      required: true
@@ -29,6 +42,9 @@ arguments:
          command: python main.py --output /mnt/output
          codeDir: .
          # gpuNum: 1                 # uncomment and update to number of GPUs
+
+    # [CHANGE] Search space configuration
+    # Change according to your hyperparameters and ranges
    - name: search-space
      displayName: Search space configuration
      required: true
@@ -42,6 +58,9 @@ arguments:
          "learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] },
          "epochs": { "_type": "choice", "_value": [10] }
        }
+
+    # Node pool dropdown (Node group in EKS)
+    # You can add more of these if you have additional tasks that can run on different node pools
    - displayName: Node pool
      hint: Name of node pool or group to run this workflow task
      type: select.nodepool
@@ -49,6 +68,110 @@ arguments:
      value: {{.DefaultNodePoolOption}}
      required: true

+templates:
+  - name: main
+    dag:
+      tasks:
+        - name: hyperparameter-tuning
+          template: hyperparameter-tuning
+        - name: metrics-writer
+          template: metrics-writer
+          dependencies: [hyperparameter-tuning]
+          arguments:
+            # Use sys-metrics artifact output from hyperparameter-tuning Task
+            # This writes the best metrics to the Workflow
+            artifacts:
+              - name: sys-metrics
+                from: "{{tasks.hyperparameter-tuning.outputs.artifacts.sys-metrics}}"
+  - name: hyperparameter-tuning
+    inputs:
+      artifacts:
+        - name: src
+          # Clone the above repository into '/mnt/data/src'
+          # See https://docs.onepanel.ai/docs/reference/workflows/artifacts#git for private repositories
+          git:
+            repo: '{{workflow.parameters.source}}'
+            revision: '{{workflow.parameters.revision}}'
+          path: /mnt/data/src
+        # [CHANGE] Path where config.yaml will be generated or already exists
+        # Update the path below so that config.yaml is written to the same directory as your main.py file
+        # Note that your source code is cloned to /mnt/data/src
+        - name: config
+          path: /mnt/data/src/workflows/hyperparameter-tuning/mnist/config.yaml
+          raw:
+            data: '{{workflow.parameters.config}}'
+        # [CHANGE] Path where search_space.json will be generated or already exists
+        # Update the path below so that search_space.json is written to the same directory as your main.py file
+        # Note that your source code is cloned to /mnt/data/src
+        - name: search-space
+          path: /mnt/data/src/workflows/hyperparameter-tuning/mnist/search_space.json
+          raw:
+            data: '{{workflow.parameters.search-space}}'
+    outputs:
+      artifacts:
+        - name: output
+          path: /mnt/output
+          optional: true
+    container:
+      image: onepanel/dl:0.17.0
+      args:
+        - --config
+        # [CHANGE] Update the path below to point to config.yaml path as described above
+        - /mnt/data/src/workflows/hyperparameter-tuning/mnist/config.yaml
+      workingDir: /mnt
+      volumeMounts:
+        - name: hyperparamtuning-data
+          mountPath: /mnt/data
+        - name: hyperparamtuning-output
+          mountPath: /mnt/output
+    nodeSelector:
+      {{.NodePoolLabel}}: '{{workflow.parameters.sys-node-pool}}'
+    sidecars:
+      - name: nni-web-ui
+        image: onepanel/nni-web-ui:0.17.0
+        env:
+          - name: ONEPANEL_INTERACTIVE_SIDECAR
+            value: 'true'
+        ports:
+          - containerPort: 9000
+            name: nni
+      - name: tensorboard
+        image: onepanel/dl:0.17.0
+        command:
+          - sh
+          - '-c'
+        env:
+          - name: ONEPANEL_INTERACTIVE_SIDECAR
+            value: 'true'
+        args:
+          # Read logs from /mnt/output/tensorboard - /mnt/output is auto-mounted from volumeMounts
+          - tensorboard --logdir /mnt/output/tensorboard
+        ports:
+          - containerPort: 6006
+            name: tensorboard
+  # Use the metrics-writer tasks to write best metrics to Workflow
+  - name: metrics-writer
+    inputs:
+      artifacts:
+        - name: sys-metrics
+          path: /tmp/sys-metrics.json
+        - git:
+            repo: https://github.com/onepanelio/templates.git
+            revision: v0.18.0
+          name: src
+          path: /mnt/src
+    container:
+      image: onepanel/python-sdk:v0.16.0
+      command:
+        - python
+        - -u
+      args:
+        - /mnt/src/tasks/metrics-writer/main.py
+        - --from_file=/tmp/sys-metrics.json
+
+# [CHANGE] Volumes that will mount to /mnt/data (annotated data) and /mnt/output (models, checkpoints, logs)
+# Update this depending on your annotation data, model, checkpoint, logs, etc. sizes
+# Example values: 250Mi, 500Gi, 1Ti
 volumeClaimTemplates:
  - metadata:
      name: hyperparamtuning-data
@@ -64,120 +187,3 @@ volumeClaimTemplates:
      resources:
        requests:
          storage: 20Gi
-
-templates:
-  - name: main
-    dag:
-      tasks:
-        - name: hyperparameter-tuning
-          template: hyperparameter-tuning
-        - name: workflow-metrics-writer
-          template: workflow-metrics-writer
-          dependencies: [hyperparameter-tuning]
-          arguments:
-            # Use sys-metrics artifact output from hyperparameter-tuning Task
-            artifacts:
-              - name: best-metrics
-                from: "{{tasks.hyperparameter-tuning.outputs.artifacts.sys-metrics}}"
-  - name: hyperparameter-tuning
-    inputs:
-      artifacts:
-        - name: src
-          git:
-            repo: '{{workflow.parameters.source}}'
-            revision: '{{workflow.parameters.revision}}'
-          path: /mnt/data/src
-        - name: config
-          path: /mnt/data/src/workflows/hyperparameter-tuning/mnist/config.yaml
-          raw:
-            data: '{{workflow.parameters.config}}'
-        - name: search-space
-          path: /mnt/data/src/workflows/hyperparameter-tuning/mnist/search_space.json
-          raw:
-            data: '{{workflow.parameters.search-space}}'
-    outputs:
-      artifacts:
-        - name: output
-          path: /mnt/output
-          optional: true
-    container:
-      image: onepanel/dl:0.17.0
-      args:
-        - --config
-        - /mnt/data/src/workflows/hyperparameter-tuning/mnist/config.yaml
-      workingDir: /mnt
-      volumeMounts:
-        - name: hyperparamtuning-data
-          mountPath: /mnt/data
-        - name: hyperparamtuning-output
-          mountPath: /mnt/output
-    nodeSelector:
-      {{.NodePoolLabel}}: '{{workflow.parameters.sys-node-pool}}'
-    sidecars:
-      - name: nni-web-ui
-        image: 'onepanel/nni-web-ui:0.17.0'
-        env:
-          - name: ONEPANEL_INTERACTIVE_SIDECAR
-            value: 'true'
-        ports:
-          - containerPort: 9000
-            name: nni
-      - name: tensorboard
-        image: 'tensorflow/tensorflow:2.3.0'
-        command:
-          - sh
-          - '-c'
-        env:
-          - name: ONEPANEL_INTERACTIVE_SIDECAR
-            value: 'true'
-        args:
-          # Read logs from /mnt/output/tensorboard - /mnt/output is auto-mounted from volumeMounts
-          - tensorboard --logdir /mnt/output/tensorboard
-        ports:
-          - containerPort: 6006
-            name: tensorboard
-  - name: workflow-metrics-writer
-    inputs:
-      artifacts:
-        - name: best-metrics
-          path: /tmp/sys-metrics.json
-    script:
-      image: onepanel/python-sdk:v0.16.0
-      command: [python, '-u']
-      source: |
-        import os
-        import json
-
-        import onepanel.core.api
-        from onepanel.core.api.models.metric import Metric
-        from onepanel.core.api.rest import ApiException
-        from onepanel.core.api.models import Parameter
-
-        # Load Task A metrics
-        with open('/tmp/sys-metrics.json') as f:
-            metrics = json.load(f)
-
-        with open('/var/run/secrets/kubernetes.io/serviceaccount/token') as f:
-            token = f.read()
-
-        # Configure API authorization
-        configuration = onepanel.core.api.Configuration(
-            host = os.getenv('ONEPANEL_API_URL'),
-            api_key = {
-                'authorization': token
-            }
-        )
-        configuration.api_key_prefix['authorization'] = 'Bearer'
-
-        # Call SDK method to save metrics
-        with onepanel.core.api.ApiClient(configuration) as api_client:
-            api_instance = onepanel.core.api.WorkflowServiceApi(api_client)
-            namespace = '{{workflow.namespace}}'
-            uid = '{{workflow.name}}'
-            body = onepanel.core.api.AddWorkflowExecutionsMetricsRequest()
-            body.metrics = metrics
-            try:
-                api_response = api_instance.add_workflow_execution_metrics(namespace, uid, body)
-                print('Metrics added.')
-            except ApiException as e:
-                print("Exception when calling WorkflowServiceApi->add_workflow_execution_metrics: %s\n" % e)