Files
onepanel/db/go/20200824101019_update_tf_object_detection_training.go

282 lines
8.2 KiB
Go

package migration
import (
"database/sql"
v1 "github.com/onepanelio/core/pkg"
"github.com/pressly/goose"
)
const tensorflowObjectDetectionTraining2 = `arguments:
parameters:
- name: source
value: https://github.com/tensorflow/models.git
displayName: Model source code
type: hidden
visibility: private
- name: trainingsource
value: https://github.com/onepanelio/cvat-training.git
type: hidden
visibility: private
- name: revision
value: v1.13.0
type: hidden
visibility: private
- name: cvat-annotation-path
value: annotation-dump/sample_dataset
displayName: Dataset path
hint: Path to annotated data in default object storage (i.e S3). In CVAT, this parameter will be pre-populated.
visibility: private
- name: cvat-output-path
value: workflow-data/output/sample_output
hint: Path to store output artifacts in default object storage (i.e s3). In CVAT, this parameter will be pre-populated.
displayName: Workflow output path
visibility: private
- name: cvat-model
value: frcnn-res50-coco
displayName: Model
hint: TF Detection API's model to use for training.
type: select.select
visibility: public
options:
- name: 'Faster RCNN-ResNet 101-COCO'
value: frcnn-res101-coco
- name: 'Faster RCNN-ResNet 101-Low Proposal-COCO'
value: frcnn-res101-low
- name: 'Faster RCNN-ResNet 50-COCO'
value: frcnn-res50-coco
- name: 'Faster RCNN-NAS-COCO'
value: frcnn-nas-coco
- name: 'SSD MobileNet V1-COCO'
value: ssd-mobilenet-v1-coco2
- name: 'SSD MobileNet V2-COCO'
value: ssd-mobilenet-v2-coco
- name: 'SSDLite MobileNet-COCO'
value: ssdlite-mobilenet-coco
- name: hyperparameters
value: |-
num-steps=10000
displayName: Hyperparameters
visibility: public
type: textarea.textarea
hint: "Please refer to our <a href='https://docs.onepanel.ai/docs/getting-started/use-cases/computervision/annotation/cvat/cvat_annotation_model#arguments-optional' target='_blank'>documentation</a> for more information on parameters. Number of classes will be automatically populated if you had 'sys-num-classes' parameter in a workflow."
- name: cvat-finetune-checkpoint
value: ''
hint: Select the last fine-tune checkpoint for this model. It may take up to 5 minutes for a recent checkpoint show here. Leave empty if this is the first time you're training this model.
displayName: Checkpoint path
visibility: public
- name: cvat-num-classes
value: '81'
hint: Number of classes
displayName: Number of classes
visibility: private
- name: tf-image
value: tensorflow/tensorflow:1.13.1-py3
type: select.select
displayName: Select TensorFlow image
visibility: public
hint: Select the GPU image if you are running on a GPU node pool
options:
- name: 'TensorFlow 1.13.1 CPU Image'
value: 'tensorflow/tensorflow:1.13.1-py3'
- name: 'TensorFlow 1.13.1 GPU Image'
value: 'tensorflow/tensorflow:1.13.1-gpu-py3'
- displayName: Node pool
hint: Name of node pool or group to run this workflow task
type: select.select
name: sys-node-pool
value: Standard_D4s_v3
visibility: public
required: true
options:
- name: 'CPU: 2, RAM: 8GB'
value: Standard_D2s_v3
- name: 'CPU: 4, RAM: 16GB'
value: Standard_D4s_v3
- name: 'GPU: 1xK80, CPU: 6, RAM: 56GB'
value: Standard_NC6
- name: dump-format
value: cvat_tfrecord
visibility: public
entrypoint: main
templates:
- dag:
tasks:
- name: train-model
template: tensorflow
# Uncomment the lines below if you want to send Slack notifications
# - arguments:
# artifacts:
# - from: '{{tasks.train-model.outputs.artifacts.sys-metrics}}'
# name: metrics
# parameters:
# - name: status
# value: '{{tasks.train-model.status}}'
# dependencies:
# - train-model
# name: notify-in-slack
# template: slack-notify-success
name: main
- container:
args:
- |
apt-get update && \
apt-get install -y python3-pip git wget unzip libglib2.0-0 libsm6 libxext6 libxrender-dev && \
pip install pillow lxml Cython contextlib2 jupyter matplotlib numpy scipy boto3 pycocotools pyyaml google-cloud-storage && \
cd /mnt/src/tf/research && \
export PYTHONPATH=$PYTHONPATH:` + "`pwd`:`pwd`" + `/slim && \
cd /mnt/src/train && \
python convert_workflow.py \
--extras="{{workflow.parameters.hyperparameters}}" \
--model="{{workflow.parameters.cvat-model}}" \
--num_classes="{{workflow.parameters.cvat-num-classes}}" \
--sys_finetune_checkpoint={{workflow.parameters.cvat-finetune-checkpoint}}
command:
- sh
- -c
image: '{{workflow.parameters.tf-image}}'
volumeMounts:
- mountPath: /mnt/data
name: data
- mountPath: /mnt/output
name: output
workingDir: /mnt/src
nodeSelector:
beta.kubernetes.io/instance-type: '{{workflow.parameters.sys-node-pool}}'
inputs:
artifacts:
- name: data
path: /mnt/data/datasets/
{{.ArtifactRepositoryType}}:
key: '{{workflow.namespace}}/{{workflow.parameters.cvat-annotation-path}}'
- name: models
path: /mnt/data/models/
optional: true
{{.ArtifactRepositoryType}}:
key: '{{workflow.namespace}}/{{workflow.parameters.cvat-finetune-checkpoint}}'
- git:
repo: '{{workflow.parameters.source}}'
revision: '{{workflow.parameters.revision}}'
name: src
path: /mnt/src/tf
- git:
repo: '{{workflow.parameters.trainingsource}}'
revision: 'optional-artifacts'
name: tsrc
path: /mnt/src/train
name: tensorflow
outputs:
artifacts:
- name: model
optional: true
path: /mnt/output
{{.ArtifactRepositoryType}}:
key: '{{workflow.namespace}}/{{workflow.parameters.cvat-output-path}}/{{workflow.name}}'
# Uncomment the lines below if you want to send Slack notifications
#- container:
# args:
# - SLACK_USERNAME=Onepanel SLACK_TITLE="{{workflow.name}} {{inputs.parameters.status}}"
# SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd
# SLACK_MESSAGE=$(cat /tmp/metrics.json)} ./slack-notify
# command:
# - sh
# - -c
# image: technosophos/slack-notify
# inputs:
# artifacts:
# - name: metrics
# optional: true
# path: /tmp/metrics.json
# parameters:
# - name: status
# name: slack-notify-success
volumeClaimTemplates:
- metadata:
creationTimestamp: null
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Gi
- metadata:
creationTimestamp: null
name: output
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 200Gi`
func initialize20200824101019() {
if _, ok := initializedMigrations[20200824101019]; !ok {
goose.AddMigration(Up20200824101019, Down20200824101019)
initializedMigrations[20200824101019] = true
}
}
// Up20200824101019 updates the tf object detection training workflow template
func Up20200824101019(tx *sql.Tx) error {
// This code is executed when the migration is applied.
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()
migrationsRan, err := getRanSQLMigrations(client)
if err != nil {
return err
}
if _, ok := migrationsRan[20200824101019]; ok {
return nil
}
namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}
// Update tfObjectDetectionTraining
workflowTemplate := &v1.WorkflowTemplate{
Name: tensorflowObjectDetectionWorkflowTemplateName,
Manifest: tensorflowObjectDetectionTraining2,
Labels: map[string]string{
"used-by": "cvat",
},
}
if err := workflowTemplate.GenerateUID(workflowTemplate.Name); err != nil {
return err
}
for _, namespace := range namespaces {
err = ReplaceArtifactRepositoryType(client, namespace, workflowTemplate, nil)
if err != nil {
return err
}
if _, err := client.CreateWorkflowTemplateVersion(namespace.Name, workflowTemplate); err != nil {
return err
}
}
return nil
}
// Down20200824101019 does nothing
func Down20200824101019(tx *sql.Tx) error {
// This code is executed when the migration is rolled back.
return nil
}