diff --git a/db/go/20201016170415_update_cvat.go b/db/go/20201016170415_update_cvat.go index 62c68a7..019da10 100644 --- a/db/go/20201016170415_update_cvat.go +++ b/db/go/20201016170415_update_cvat.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201016170415() { @@ -15,7 +16,9 @@ func initialize20201016170415() { // Up20201016170415 updates cvat to a new version func Up20201016170415(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("20201016170415_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201016170415.yaml"), + cvatTemplateName) } // Down20201016170415 does nothing diff --git a/db/go/20201028145442_update_jupyter_lab_template.go b/db/go/20201028145442_update_jupyter_lab_template.go index 1004d69..40e9cba 100644 --- a/db/go/20201028145442_update_jupyter_lab_template.go +++ b/db/go/20201028145442_update_jupyter_lab_template.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201028145442() { @@ -16,7 +17,9 @@ func initialize20201028145442() { // These hooks will attempt to persist conda, pip, and jupyterlab extensions between pause and shut-down. func Up20201028145442(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("20201028145442_jupyterlab.yaml", jupyterLabTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("jupyterlab", "20201028145442.yaml"), + jupyterLabTemplateName) } // Down20201028145442 removes the lifecycle hooks from the template. diff --git a/db/go/20201028145443_update_vscode_template.go b/db/go/20201028145443_update_vscode_template.go index 85126fc..47ab529 100644 --- a/db/go/20201028145443_update_vscode_template.go +++ b/db/go/20201028145443_update_vscode_template.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201028145443() { @@ -18,11 +19,15 @@ func initialize20201028145443() { // On workspace resume / start, the code then tries to install these packages. func Up20201028145443(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("vscode_20201028145443.yaml", vscodeWorkspaceTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("vscode", "20201028145443.yaml"), + vscodeWorkspaceTemplateName) } // Down20201028145443 removes the lifecycle hooks from VSCode workspace template. func Down20201028145443(tx *sql.Tx) error { // This code is executed when the migration is rolled back. - return updateWorkspaceTemplateManifest("vscode_20201028145443.yaml", vscodeWorkspaceTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("vscode", "20201028145443.yaml"), + vscodeWorkspaceTemplateName) } diff --git a/db/go/20201031165106_add_tensorboard_env_var_to_jupyterlab_template.go b/db/go/20201031165106_add_tensorboard_env_var_to_jupyterlab_template.go index 40ec2fd..2f2ec20 100644 --- a/db/go/20201031165106_add_tensorboard_env_var_to_jupyterlab_template.go +++ b/db/go/20201031165106_add_tensorboard_env_var_to_jupyterlab_template.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201031165106() { @@ -16,11 +17,15 @@ func initialize20201031165106() { // These hooks will attempt to persist conda, pip, and jupyterlab extensions between pause and shut-down. func Up20201031165106(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("20201031165106_jupyterlab.yaml", jupyterLabTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("jupyterlab.yaml", "20201031165106"), + jupyterLabTemplateName) } // Down20201031165106 removes the lifecycle hooks from the template. func Down20201031165106(tx *sql.Tx) error { // This code is executed when the migration is rolled back. - return updateWorkspaceTemplateManifest("20201028145442_jupyterlab.yaml", jupyterLabTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("jupyterlab", "20201028145442.yaml"), + jupyterLabTemplateName) } diff --git a/db/go/20201102104048_update_cvat_reduce_vols.go b/db/go/20201102104048_update_cvat_reduce_vols.go index ebe2b61..8e1ce25 100644 --- a/db/go/20201102104048_update_cvat_reduce_vols.go +++ b/db/go/20201102104048_update_cvat_reduce_vols.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201102104048() { @@ -17,11 +18,15 @@ func initialize20201102104048() { // are placed under one path, and that path is on one volume. func Up20201102104048(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("20201102104048_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201102104048.yaml"), + cvatTemplateName) } // Down20201102104048 reverts CVAT back to original amount of volumes. func Down20201102104048(tx *sql.Tx) error { // This code is executed when the migration is rolled back. - return updateWorkspaceTemplateManifest("20201016170415_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201016170415.yaml"), + cvatTemplateName) } diff --git a/db/go/20201113094916_update_cvat_onepanel_sdk.go b/db/go/20201113094916_update_cvat_onepanel_sdk.go index 60b9335..e08baa3 100644 --- a/db/go/20201113094916_update_cvat_onepanel_sdk.go +++ b/db/go/20201113094916_update_cvat_onepanel_sdk.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201113094916() { @@ -16,11 +17,15 @@ func initialize20201113094916() { //Of note, this replaces the authentication request endpoint. func Up20201113094916(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("20201113094916_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201113094916.yaml"), + cvatTemplateName) } //Down20201113094916 updates CVAT back to previous python-sdk version of 0.14.0 func Down20201113094916(tx *sql.Tx) error { // This code is executed when the migration is rolled back. - return updateWorkspaceTemplateManifest("20201102104048_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201102104048.yaml"), + cvatTemplateName) } diff --git a/db/go/20201115133046_update_cvat_env_vars.go b/db/go/20201115133046_update_cvat_env_vars.go index c6f863c..5cc5034 100644 --- a/db/go/20201115133046_update_cvat_env_vars.go +++ b/db/go/20201115133046_update_cvat_env_vars.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201115133046() { @@ -15,11 +16,15 @@ func initialize20201115133046() { //Up20201115133046 updates CVAT environment variables func Up20201115133046(tx *sql.Tx) error { // This code is executed when the migration is applied. - return updateWorkspaceTemplateManifest("20201115133046_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201115133046.yaml"), + cvatTemplateName) } //Down20201115133046 reverts latest environment variable updates func Down20201115133046(tx *sql.Tx) error { // This code is executed when the migration is rolled back. - return updateWorkspaceTemplateManifest("20201113094916_cvat.yaml", cvatTemplateName) + return updateWorkspaceTemplateManifest( + filepath.Join("cvat", "20201113094916"), + cvatTemplateName) } diff --git a/db/go/20201115134934_add_tensorboard_to_tfod.go b/db/go/20201115134934_add_tensorboard_to_tfod.go index d1bd7e8..69ddcbc 100644 --- a/db/go/20201115134934_add_tensorboard_to_tfod.go +++ b/db/go/20201115134934_add_tensorboard_to_tfod.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201115134934() { @@ -12,11 +13,11 @@ func initialize20201115134934() { } } -//Up20201115134934 add TensorBoard sidecar to TFODs +// Up20201115134934 add TensorBoard sidecar to TFODs func Up20201115134934(tx *sql.Tx) error { // This code is executed when the migration is applied. return updateWorkflowTemplateManifest( - "20201115134934_tfod.yaml", + filepath.Join("tfod", "20201115134934.yaml"), tensorflowObjectDetectionWorkflowTemplateName, map[string]string{ "used-by": "cvat", @@ -24,7 +25,7 @@ func Up20201115134934(tx *sql.Tx) error { ) } -//Down20201115134934 do nothing +// Down20201115134934 do nothing func Down20201115134934(tx *sql.Tx) error { // This code is executed when the migration is rolled back. return nil diff --git a/db/go/20201115145814_add_tensorboard_to_maskrcnn.go b/db/go/20201115145814_add_tensorboard_to_maskrcnn.go index 378a8de..62b6070 100644 --- a/db/go/20201115145814_add_tensorboard_to_maskrcnn.go +++ b/db/go/20201115145814_add_tensorboard_to_maskrcnn.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201115145814() { @@ -12,11 +13,11 @@ func initialize20201115145814() { } } -//Up20201115145814 add TensorBoard sidecar to TFODs +// Up20201115145814 add TensorBoard sidecar to TFODs func Up20201115145814(tx *sql.Tx) error { // This code is executed when the migration is applied. return updateWorkflowTemplateManifest( - "20201115145814_maskrcnn.yaml", + filepath.Join("maskrcnn", "20201115145814.yaml"), maskRCNNWorkflowTemplateName, map[string]string{ "used-by": "cvat", @@ -24,7 +25,7 @@ func Up20201115145814(tx *sql.Tx) error { ) } -//Down20201115145814 do nothing +// Down20201115145814 do nothing func Down20201115145814(tx *sql.Tx) error { // This code is executed when the migration is rolled back. return nil diff --git a/db/go/20201130130433_update_tfod_path.go b/db/go/20201130130433_update_tfod_path.go index f48aa13..2e4cad4 100644 --- a/db/go/20201130130433_update_tfod_path.go +++ b/db/go/20201130130433_update_tfod_path.go @@ -3,6 +3,7 @@ package migration import ( "database/sql" "github.com/pressly/goose" + "path/filepath" ) func initialize20201130130433() { @@ -12,11 +13,11 @@ func initialize20201130130433() { } } -//Up20201130130433 remove namespace to resolve checkpoint path issue +// Up20201130130433 remove namespace to resolve checkpoint path issue func Up20201130130433(tx *sql.Tx) error { // This code is executed when the migration is applied. return updateWorkflowTemplateManifest( - "20201130130433_tfod.yaml", + filepath.Join("tfod", "20201130130433.yaml"), tensorflowObjectDetectionWorkflowTemplateName, map[string]string{ "used-by": "cvat", @@ -24,11 +25,11 @@ func Up20201130130433(tx *sql.Tx) error { ) } -//Down20201130130433 do nothing +// Down20201130130433 do nothing func Down20201130130433(tx *sql.Tx) error { // This code is executed when the migration is rolled back. return updateWorkflowTemplateManifest( - "20201115134934_tfod.yaml", + filepath.Join("tfod", "20201115134934.yaml"), tensorflowObjectDetectionWorkflowTemplateName, map[string]string{ "used-by": "cvat", diff --git a/db/go/20201208155115_replace_tty_with_env_var_for_tfod.go b/db/go/20201208155115_replace_tty_with_env_var_for_tfod.go new file mode 100644 index 0000000..defea30 --- /dev/null +++ b/db/go/20201208155115_replace_tty_with_env_var_for_tfod.go @@ -0,0 +1,38 @@ +package migration + +import ( + "database/sql" + "github.com/pressly/goose" + "path/filepath" +) + +func initialize20201208155115() { + if _, ok := initializedMigrations[20201208155115]; !ok { + goose.AddMigration(Up20201208155115, Down20201208155115) + initializedMigrations[20201208155115] = true + } +} + +// Up20201208155115 update the tfod workflow template to replace tty with an environment variable +func Up20201208155115(tx *sql.Tx) error { + // This code is executed when the migration is applied. + return updateWorkflowTemplateManifest( + filepath.Join("tfod", "20201208155115.yaml"), + tensorflowObjectDetectionWorkflowTemplateName, + map[string]string{ + "used-by": "cvat", + }, + ) +} + +// Down20201208155115 rolls back the environment variable change +func Down20201208155115(tx *sql.Tx) error { + // This code is executed when the migration is rolled back. + return updateWorkflowTemplateManifest( + filepath.Join("tfod", "20201130130433.yaml"), + tensorflowObjectDetectionWorkflowTemplateName, + map[string]string{ + "used-by": "cvat", + }, + ) +} diff --git a/db/go/20201208155805_replace_tty_with_env_var_for_maskrcnn.go b/db/go/20201208155805_replace_tty_with_env_var_for_maskrcnn.go new file mode 100644 index 0000000..bb6f464 --- /dev/null +++ b/db/go/20201208155805_replace_tty_with_env_var_for_maskrcnn.go @@ -0,0 +1,38 @@ +package migration + +import ( + "database/sql" + "github.com/pressly/goose" + "path/filepath" +) + +func initialize20201208155805() { + if _, ok := initializedMigrations[20201208155805]; !ok { + goose.AddMigration(Up20201208155805, Down20201208155805) + initializedMigrations[20201208155805] = true + } +} + +// Up20201208155805 update the maskrcnn workflow template to replace tty with an environment variable +func Up20201208155805(tx *sql.Tx) error { + // This code is executed when the migration is applied. + return updateWorkflowTemplateManifest( + filepath.Join("maskrcnn", "20201208155115.yaml"), + maskRCNNWorkflowTemplateName, + map[string]string{ + "used-by": "cvat", + }, + ) +} + +// Down20201208155805 rolls back the environment variable change +func Down20201208155805(tx *sql.Tx) error { + // This code is executed when the migration is rolled back. + return updateWorkflowTemplateManifest( + filepath.Join("maskrcnn", "20201115145814.yaml"), + maskRCNNWorkflowTemplateName, + map[string]string{ + "used-by": "cvat", + }, + ) +} diff --git a/db/go/db.go b/db/go/db.go index f1aa668..18f21d0 100644 --- a/db/go/db.go +++ b/db/go/db.go @@ -72,7 +72,9 @@ func Initialize() { initialize20201115133046() initialize20201115134934() initialize20201115145814() - initialize20201130130433() + initialize20201130130433() + initialize20201208155115() + initialize20201208155805() if err := client.DB.Close(); err != nil { log.Printf("[error] closing db %v", err) @@ -142,13 +144,21 @@ func ReplaceArtifactRepositoryType(client *v1.Client, namespace *v1.Namespace, w return nil } -// readDataFile returns the contents of a file in the db/data/{name} directory -func readDataFile(name string) (string, error) { +// readDataFile returns the contents of a file in the db/data/{path} directory +// path can indicate subdirectories like cvat/20201016170415.yaml +func readDataFile(path string) (string, error) { curDir, err := os.Getwd() if err != nil { return "", err } - data, err := ioutil.ReadFile(filepath.Join(curDir, "db", "data", name)) + + finalPath := []string{curDir, "db", "yaml"} + + for _, pathPart := range strings.Split(path, string(os.PathSeparator)) { + finalPath = append(finalPath, pathPart) + } + + data, err := ioutil.ReadFile(filepath.Join(finalPath...)) if err != nil { return "", err } diff --git a/db/data/20201016170415_cvat.yaml b/db/yaml/cvat/20201016170415.yaml similarity index 100% rename from db/data/20201016170415_cvat.yaml rename to db/yaml/cvat/20201016170415.yaml diff --git a/db/data/20201102104048_cvat.yaml b/db/yaml/cvat/20201102104048.yaml similarity index 100% rename from db/data/20201102104048_cvat.yaml rename to db/yaml/cvat/20201102104048.yaml diff --git a/db/data/20201113094916_cvat.yaml b/db/yaml/cvat/20201113094916.yaml similarity index 100% rename from db/data/20201113094916_cvat.yaml rename to db/yaml/cvat/20201113094916.yaml diff --git a/db/data/20201115133046_cvat.yaml b/db/yaml/cvat/20201115133046.yaml similarity index 100% rename from db/data/20201115133046_cvat.yaml rename to db/yaml/cvat/20201115133046.yaml diff --git a/db/data/jupyter_lab_20200929153931.yaml b/db/yaml/jupyterlab/20200929153931.yaml similarity index 100% rename from db/data/jupyter_lab_20200929153931.yaml rename to db/yaml/jupyterlab/20200929153931.yaml diff --git a/db/data/20201028145442_jupyterlab.yaml b/db/yaml/jupyterlab/20201028145442.yaml similarity index 100% rename from db/data/20201028145442_jupyterlab.yaml rename to db/yaml/jupyterlab/20201028145442.yaml diff --git a/db/data/20201031165106_jupyterlab.yaml b/db/yaml/jupyterlab/20201031165106.yaml similarity index 100% rename from db/data/20201031165106_jupyterlab.yaml rename to db/yaml/jupyterlab/20201031165106.yaml diff --git a/db/data/20201115145814_maskrcnn.yaml b/db/yaml/maskrcnn/20201115145814.yaml similarity index 100% rename from db/data/20201115145814_maskrcnn.yaml rename to db/yaml/maskrcnn/20201115145814.yaml diff --git a/db/yaml/maskrcnn/20201208155115.yaml b/db/yaml/maskrcnn/20201208155115.yaml new file mode 100644 index 0000000..053b637 --- /dev/null +++ b/db/yaml/maskrcnn/20201208155115.yaml @@ -0,0 +1,192 @@ +entrypoint: main +arguments: + parameters: + - name: source + value: https://github.com/onepanelio/Mask_RCNN.git + displayName: Model source code + type: hidden + visibility: private + + - name: cvat-annotation-path + value: annotation-dump/sample_dataset + hint: Path to annotated data in default object storage (i.e S3). In CVAT, this parameter will be pre-populated. + displayName: Dataset path + visibility: private + + - name: cvat-output-path + value: workflow-data/output/sample_output + hint: Path to store output artifacts in default object storage (i.e s3). In CVAT, this parameter will be pre-populated. + displayName: Workflow output path + visibility: private + + - name: cvat-finetune-checkpoint + value: '' + hint: Select the last fine-tune checkpoint for this model. It may take up to 5 minutes for a recent checkpoint show here. Leave empty if this is the first time you're training this model. + displayName: Checkpoint path + visibility: public + + - name: cvat-num-classes + displayName: Number of classes + hint: Number of classes (i.e in CVAT taks) + 1 for background + value: '81' + visibility: private + + - name: hyperparameters + displayName: Hyperparameters + visibility: public + type: textarea.textarea + value: |- + stage-1-epochs=1 # Epochs for network heads + stage-2-epochs=2 # Epochs for finetune layers + stage-3-epochs=3 # Epochs for all layers + hint: "Please refer to our documentation for more information on parameters. Number of classes will be automatically populated if you had 'sys-num-classes' parameter in a workflow." + + - name: dump-format + value: cvat_coco + displayName: CVAT dump format + visibility: public + + - name: tf-image + visibility: public + value: tensorflow/tensorflow:1.13.1-py3 + type: select.select + displayName: Select TensorFlow image + hint: Select the GPU image if you are running on a GPU node pool + options: + - name: 'TensorFlow 1.13.1 CPU Image' + value: 'tensorflow/tensorflow:1.13.1-py3' + - name: 'TensorFlow 1.13.1 GPU Image' + value: 'tensorflow/tensorflow:1.13.1-gpu-py3' + + - displayName: Node pool + hint: Name of node pool or group to run this workflow task + type: select.select + visibility: public + name: sys-node-pool + value: Standard_D4s_v3 + required: true + options: + - name: 'CPU: 2, RAM: 8GB' + value: Standard_D2s_v3 + - name: 'CPU: 4, RAM: 16GB' + value: Standard_D4s_v3 + - name: 'GPU: 1xK80, CPU: 6, RAM: 56GB' + value: Standard_NC6 +templates: + - name: main + dag: + tasks: + - name: train-model + template: tensorflow + # Uncomment the lines below if you want to send Slack notifications + # - arguments: + # artifacts: + # - from: '{{tasks.train-model.outputs.artifacts.sys-metrics}}' + # name: metrics + # parameters: + # - name: status + # value: '{{tasks.train-model.status}}' + # dependencies: + # - train-model + # name: notify-in-slack + # template: slack-notify-success + - name: tensorflow + container: + args: + - | + apt-get update \ + && apt-get install -y git wget libglib2.0-0 libsm6 libxext6 libxrender-dev \ + && pip install -r requirements.txt \ + && pip install boto3 pyyaml google-cloud-storage \ + && git clone https://github.com/waleedka/coco \ + && cd coco/PythonAPI \ + && python setup.py build_ext install \ + && rm -rf build \ + && cd ../../ \ + && wget https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5 \ + && python setup.py install && ls \ + && python samples/coco/cvat.py train --dataset=/mnt/data/datasets \ + --model=workflow_maskrcnn \ + --extras="{{workflow.parameters.hyperparameters}}" \ + --ref_model_path="{{workflow.parameters.cvat-finetune-checkpoint}}" \ + --num_classes="{{workflow.parameters.cvat-num-classes}}" \ + && cd /mnt/src/ \ + && python prepare_dataset.py /mnt/data/datasets/annotations/instances_default.json + command: + - sh + - -c + image: '{{workflow.parameters.tf-image}}' + volumeMounts: + - mountPath: /mnt/data + name: data + - mountPath: /mnt/output + name: output + workingDir: /mnt/src + nodeSelector: + beta.kubernetes.io/instance-type: '{{workflow.parameters.sys-node-pool}}' + sidecars: + - name: tensorboard + image: tensorflow/tensorflow:2.3.0 + command: [sh, -c] + env: + - name: ONEPANEL_INTERACTIVE_SIDECAR + value: 'true' + args: ["tensorboard --logdir /mnt/output/"] + ports: + - containerPort: 6006 + name: tensorboard + inputs: + artifacts: + - name: data + path: /mnt/data/datasets/ + {{.ArtifactRepositoryType}}: + key: '{{workflow.namespace}}/{{workflow.parameters.cvat-annotation-path}}' + - git: + repo: '{{workflow.parameters.source}}' + revision: "no-boto" + name: src + path: /mnt/src + outputs: + artifacts: + - name: model + optional: true + path: /mnt/output + {{.ArtifactRepositoryType}}: + key: '{{workflow.namespace}}/{{workflow.parameters.cvat-output-path}}/{{workflow.name}}' +# Uncomment the lines below if you want to send Slack notifications +#- container: +# args: +# - SLACK_USERNAME=Onepanel SLACK_TITLE="{{workflow.name}} {{inputs.parameters.status}}" +# SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd +# SLACK_MESSAGE=$(cat /tmp/metrics.json)} ./slack-notify +# command: +# - sh +# - -c +# image: technosophos/slack-notify +# inputs: +# artifacts: +# - name: metrics +# optional: true +# path: /tmp/metrics.json +# parameters: +# - name: status +# name: slack-notify-success +volumeClaimTemplates: + - metadata: + creationTimestamp: null + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Gi + - metadata: + creationTimestamp: null + name: output + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Gi \ No newline at end of file diff --git a/db/data/20201115134934_tfod.yaml b/db/yaml/tfod/20201115134934.yaml similarity index 100% rename from db/data/20201115134934_tfod.yaml rename to db/yaml/tfod/20201115134934.yaml diff --git a/db/data/20201130130433_tfod.yaml b/db/yaml/tfod/20201130130433.yaml similarity index 100% rename from db/data/20201130130433_tfod.yaml rename to db/yaml/tfod/20201130130433.yaml diff --git a/db/yaml/tfod/20201208155115.yaml b/db/yaml/tfod/20201208155115.yaml new file mode 100644 index 0000000..f252d6f --- /dev/null +++ b/db/yaml/tfod/20201208155115.yaml @@ -0,0 +1,223 @@ +entrypoint: main +arguments: + parameters: + - name: source + value: https://github.com/tensorflow/models.git + displayName: Model source code + type: hidden + visibility: private + + - name: trainingsource + value: https://github.com/onepanelio/cvat-training.git + type: hidden + visibility: private + + - name: revision + value: v1.13.0 + type: hidden + visibility: private + + - name: cvat-annotation-path + value: annotation-dump/sample_dataset + displayName: Dataset path + hint: Path to annotated data in default object storage (i.e S3). In CVAT, this parameter will be pre-populated. + visibility: private + + - name: cvat-output-path + value: workflow-data/output/sample_output + hint: Path to store output artifacts in default object storage (i.e s3). In CVAT, this parameter will be pre-populated. + displayName: Workflow output path + visibility: private + + - name: cvat-model + value: frcnn-res50-coco + displayName: Model + hint: TF Detection API's model to use for training. + type: select.select + visibility: public + options: + - name: 'Faster RCNN-ResNet 101-COCO' + value: frcnn-res101-coco + - name: 'Faster RCNN-ResNet 101-Low Proposal-COCO' + value: frcnn-res101-low + - name: 'Faster RCNN-ResNet 50-COCO' + value: frcnn-res50-coco + - name: 'Faster RCNN-NAS-COCO' + value: frcnn-nas-coco + - name: 'SSD MobileNet V1-COCO' + value: ssd-mobilenet-v1-coco2 + - name: 'SSD MobileNet V2-COCO' + value: ssd-mobilenet-v2-coco + - name: 'SSDLite MobileNet-COCO' + value: ssdlite-mobilenet-coco + + - name: hyperparameters + value: |- + num-steps=10000 + displayName: Hyperparameters + visibility: public + type: textarea.textarea + hint: "Please refer to our documentation for more information on parameters. Number of classes will be automatically populated if you had 'sys-num-classes' parameter in a workflow." + + - name: cvat-finetune-checkpoint + value: '' + hint: Select the last fine-tune checkpoint for this model. It may take up to 5 minutes for a recent checkpoint show here. Leave empty if this is the first time you're training this model. + displayName: Checkpoint path + visibility: public + + - name: cvat-num-classes + value: '81' + hint: Number of classes + displayName: Number of classes + visibility: private + + - name: tf-image + value: tensorflow/tensorflow:1.13.1-py3 + type: select.select + displayName: Select TensorFlow image + visibility: public + hint: Select the GPU image if you are running on a GPU node pool + options: + - name: 'TensorFlow 1.13.1 CPU Image' + value: 'tensorflow/tensorflow:1.13.1-py3' + - name: 'TensorFlow 1.13.1 GPU Image' + value: 'tensorflow/tensorflow:1.13.1-gpu-py3' + + - displayName: Node pool + hint: Name of node pool or group to run this workflow task + type: select.select + name: sys-node-pool + value: Standard_D4s_v3 + visibility: public + required: true + options: + - name: 'CPU: 2, RAM: 8GB' + value: Standard_D2s_v3 + - name: 'CPU: 4, RAM: 16GB' + value: Standard_D4s_v3 + - name: 'GPU: 1xK80, CPU: 6, RAM: 56GB' + value: Standard_NC6 + - name: dump-format + value: cvat_tfrecord + visibility: public +templates: + - name: main + dag: + tasks: + - name: train-model + template: tensorflow + # Uncomment the lines below if you want to send Slack notifications + # - arguments: + # artifacts: + # - from: '{{tasks.train-model.outputs.artifacts.sys-metrics}}' + # name: metrics + # parameters: + # - name: status + # value: '{{tasks.train-model.status}}' + # dependencies: + # - train-model + # name: notify-in-slack + # template: slack-notify-success + - name: tensorflow + container: + args: + - | + apt-get update && \ + apt-get install -y python3-pip git wget unzip libglib2.0-0 libsm6 libxext6 libxrender-dev && \ + pip install pillow lxml Cython contextlib2 jupyter matplotlib numpy scipy boto3 pycocotools pyyaml google-cloud-storage && \ + cd /mnt/src/tf/research && \ + export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim && \ + cd /mnt/src/train && \ + python convert_workflow.py \ + --extras="{{workflow.parameters.hyperparameters}}" \ + --model="{{workflow.parameters.cvat-model}}" \ + --num_classes="{{workflow.parameters.cvat-num-classes}}" \ + --sys_finetune_checkpoint={{workflow.parameters.cvat-finetune-checkpoint}} + command: + - sh + - -c + image: '{{workflow.parameters.tf-image}}' + volumeMounts: + - mountPath: /mnt/data + name: data + - mountPath: /mnt/output + name: output + workingDir: /mnt/src + nodeSelector: + beta.kubernetes.io/instance-type: '{{workflow.parameters.sys-node-pool}}' + sidecars: + - name: tensorboard + image: tensorflow/tensorflow:2.3.0 + command: [sh, -c] + env: + - name: ONEPANEL_INTERACTIVE_SIDECAR + value: 'true' + args: ["tensorboard --logdir /mnt/output/"] + ports: + - containerPort: 6006 + name: tensorboard + inputs: + artifacts: + - name: data + path: /mnt/data/datasets/ + {{.ArtifactRepositoryType}}: + key: '{{workflow.namespace}}/{{workflow.parameters.cvat-annotation-path}}' + - name: models + path: /mnt/data/models/ + optional: true + {{.ArtifactRepositoryType}}: + key: '{{workflow.parameters.cvat-finetune-checkpoint}}' + - git: + repo: '{{workflow.parameters.source}}' + revision: '{{workflow.parameters.revision}}' + name: src + path: /mnt/src/tf + - git: + repo: '{{workflow.parameters.trainingsource}}' + revision: 'optional-artifacts' + name: tsrc + path: /mnt/src/train + outputs: + artifacts: + - name: model + optional: true + path: /mnt/output + {{.ArtifactRepositoryType}}: + key: '{{workflow.namespace}}/{{workflow.parameters.cvat-output-path}}/{{workflow.name}}' +# Uncomment the lines below if you want to send Slack notifications +#- container: +# args: +# - SLACK_USERNAME=Onepanel SLACK_TITLE="{{workflow.name}} {{inputs.parameters.status}}" +# SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd +# SLACK_MESSAGE=$(cat /tmp/metrics.json)} ./slack-notify +# command: +# - sh +# - -c +# image: technosophos/slack-notify +# inputs: +# artifacts: +# - name: metrics +# optional: true +# path: /tmp/metrics.json +# parameters: +# - name: status +# name: slack-notify-success +volumeClaimTemplates: + - metadata: + creationTimestamp: null + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Gi + - metadata: + creationTimestamp: null + name: output + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 200Gi \ No newline at end of file diff --git a/db/data/vscode_20200929144301.yaml b/db/yaml/vscode/vscode_20200929144301.yaml similarity index 100% rename from db/data/vscode_20200929144301.yaml rename to db/yaml/vscode/vscode_20200929144301.yaml diff --git a/db/data/vscode_20201028145443.yaml b/db/yaml/vscode/vscode_20201028145443.yaml similarity index 100% rename from db/data/vscode_20201028145443.yaml rename to db/yaml/vscode/vscode_20201028145443.yaml diff --git a/pkg/workflow_execution.go b/pkg/workflow_execution.go index 26a6339..f1734dc 100644 --- a/pkg/workflow_execution.go +++ b/pkg/workflow_execution.go @@ -53,6 +53,32 @@ var ( workflowTemplateVersionLabelKey = "onepanel.io/workflow-template-version" ) +// envVarValueInSidecars returns true if any of the sidecars contain an environment variable with the input name and value +// false otherwise +func envVarValueInSidecars(sidecars []wfv1.UserContainer, name, value string) bool { + for _, s := range sidecars { + for _, e := range s.Env { + if e.Name == name && e.Value == value { + return true + } + } + } + + return false +} + +// hasEnvVarValue returns true if any of the env vars have the given name and value +// false otherwise +func hasEnvVarValue(envVars []corev1.EnvVar, name, value string) bool { + for _, e := range envVars { + if e.Name == name && e.Value == value { + return true + } + } + + return false +} + func typeWorkflow(wf *wfv1.Workflow) (workflow *WorkflowExecution) { manifest, err := json.Marshal(wf) if err != nil { @@ -306,15 +332,13 @@ func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts if template.Metadata.Annotations == nil { template.Metadata.Annotations = make(map[string]string) } - template.Metadata.Annotations["sidecar.istio.io/inject"] = "false" + //For workflows with accessible sidecars, we need istio //Istio does not prevent the main container from stopping - for _, s := range template.Sidecars { - if s.TTY == true { - template.Metadata.Annotations["sidecar.istio.io/inject"] = "true" - //Only need one instance to require istio injection - break - } + if envVarValueInSidecars(template.Sidecars, "ONEPANEL_INTERACTIVE_SIDECAR", "true") { + template.Metadata.Annotations["sidecar.istio.io/inject"] = "true" + } else { + template.Metadata.Annotations["sidecar.istio.io/inject"] = "false" } if template.Container != nil { @@ -490,11 +514,13 @@ func (c *Client) injectAccessForSidecars(namespace string, wf *wfv1.Workflow) ([ for tIdx, t := range wf.Spec.Templates { //Inject services, virtual routes for si, s := range t.Sidecars { - //If TTY is true, sidecar needs to be accessible by HTTP + //If ONEPANEL_INTERACTIVE_SIDECAR is true, sidecar needs to be accessible by HTTP //Otherwise, we skip the sidecar - if s.TTY != true { + hasInjectIstio := hasEnvVarValue(s.Env, "ONEPANEL_INTERACTIVE_SIDECAR", "true") + if !hasInjectIstio { continue } + if len(s.Ports) == 0 { msg := fmt.Sprintf("sidecar %s must have at least one port.", s.Name) return nil, util.NewUserError(codes.InvalidArgument, msg)