Compare commits

...

23 Commits

Author SHA1 Message Date
Andrey Melnikov
9f05ab150a Merge pull request #913 from Vafilor/feat/add.deep.learning.desktop
feat: added deep learning desktop workspace
2021-04-14 11:01:56 -07:00
Andrey Melnikov
81de77d88b feat: added description to deep learning workspace 2021-04-14 10:58:22 -07:00
Andrey Melnikov
ea47eaf49d feat: added deep learning desktop workspace 2021-04-14 10:22:24 -07:00
Andrey Melnikov
1b2d5623b4 Merge pull request #908 from Vafilor/fix/release.command.repo
fix: updated repo in generate release notes as the name has changed
2021-04-01 16:31:42 -07:00
Andrey Melnikov
86895a9dfe Update README.md 2021-04-01 14:44:37 -07:00
Andrey Melnikov
ec94a13cd9 fix: updated repo in generate release notes as the name has changed 2021-04-01 10:53:26 -07:00
Andrey Melnikov
22836c85e1 Merge pull request #907 from Vafilor/feat/pns.updates
fix: don't use port 80 for host port
2021-04-01 10:29:23 -07:00
Andrey Melnikov
a3ab4a86b0 fjx: don't use port 80 for host port so it doesn't take it 2021-04-01 09:08:36 -07:00
Andrey Melnikov
b6ef84a0aa Merge pull request #906 from Vafilor/feat/pns.updates
fix: wrong onepanel/dl version
2021-03-31 19:21:58 -07:00
Andrey Melnikov
9f513dda9b fix: wrong onepanel/dl version 2021-03-31 18:59:21 -07:00
Andrey Melnikov
1bb3e7506d Merge pull request #905 from Vafilor/feat/pns.updates
fix: bug with remove hyperparam tuning migration
2021-03-30 15:13:02 -07:00
Andrey Melnikov
0f19e4d618 fix: bug with remove hyperparam tuning migration 2021-03-30 14:02:08 -07:00
Rush Tehrani
6c251761f5 Merge pull request #904 from Vafilor/feat/pns.updates
feat: Update code to work better with PNS executor
2021-03-30 12:47:54 -07:00
Andrey Melnikov
2cad065778 chore: codacy fixes 2021-03-29 16:23:31 -07:00
Andrey Melnikov
2fe0a239c5 feat: remove hyperparameter tuning workflow if there are no workflow executions ran by it 2021-03-29 16:18:41 -07:00
Andrey Melnikov
8287e178b5 fix: wrong onepanel/dl version for updated jupyterlab template 2021-03-29 12:35:11 -07:00
Andrey Melnikov
b869f2eb22 Merge branch 'feat/pns.updates' of github.com:Vafilor/core into feat/pns.updates 2021-03-29 12:09:54 -07:00
Andrey Melnikov
c4893ed0d7 feat: updated migrations and updated code to better work with pns executor based on Long Nguyen's suggestions. 2021-03-29 12:08:38 -07:00
Andrey Melnikov
4882671b52 feat: updated migrations and updated code to better work with pns executor based on Long Nguyen's suggestions. 2021-03-29 12:03:12 -07:00
Andrey Melnikov
a2009de7b1 Merge pull request #903 from Vafilor/fix/contributing.commands
fix: bash missing character in contributing guide
2021-03-26 13:06:22 -07:00
Andrey Melnikov
75680ee621 Merge pull request #902 from Vafilor/fix/contributing.commands
fix: windows sections of contribution guide
2021-03-26 12:59:57 -07:00
Andrey Melnikov
948f61da13 Merge pull request #892 from lnguyen/master
fix: fixes stability issue with pns executor
2021-03-26 12:09:17 -07:00
Long Nguyen
50dd0b9264 fixes stability issue with pns executor 2021-03-05 10:20:50 -05:00
19 changed files with 1114 additions and 25 deletions

View File

@@ -1,6 +1,6 @@
<img width="200px" src="img/logo.png">
![build](https://img.shields.io/github/workflow/status/onepanelio/core/Publish%20dev%20docker%20image/master?color=01579b)
![build](https://img.shields.io/github/workflow/status/onepanelio/onepanel/Publish%20dev%20docker%20image/master?color=01579b)
![code](https://img.shields.io/codacy/grade/d060fc4d1ac64b85b78f85c691ead86a?color=01579b)
[![release](https://img.shields.io/github/v/release/onepanelio/core?color=01579b)](https://github.com/onepanelio/core/releases)
[![sdk](https://img.shields.io/pypi/v/onepanel-sdk?color=01579b&label=sdk)](https://pypi.org/project/onepanel-sdk/)

View File

@@ -52,7 +52,7 @@ See https://docs.onepanel.ai
` + "```" + `
# Download the binary
curl -sLO https://github.com/onepanelio/core/releases/download/v%s/opctl-linux-amd64
curl -sLO https://github.com/onepanelio/onepanel/releases/download/v%s/opctl-linux-amd64
# Make binary executable
chmod +x opctl-linux-amd64
@@ -68,7 +68,7 @@ opctl version
` + "```" + `
# Download the binary
curl -sLO https://github.com/onepanelio/core/releases/download/v%s/opctl-macos-amd64
curl -sLO https://github.com/onepanelio/onepanel/releases/download/v%s/opctl-macos-amd64
# Make binary executable
chmod +x opctl-macos-amd64
@@ -82,7 +82,7 @@ opctl version
## Windows
Download the [attached executable](https://github.com/onepanelio/core/releases/download/v%s/opctl-windows-amd64.exe), rename it to "opctl" and move it to a folder that is in your PATH environment variable.
Download the [attached executable](https://github.com/onepanelio/onepanel/releases/download/v%s/opctl-windows-amd64.exe), rename it to "opctl" and move it to a folder that is in your PATH environment variable.
`
var repositories = []string{

View File

@@ -0,0 +1,58 @@
package migration
import (
"database/sql"
"github.com/pressly/goose"
"path/filepath"
)
func initialize20210323175655() {
if _, ok := initializedMigrations[20210323175655]; !ok {
goose.AddMigration(Up20210323175655, Down20210323175655)
initializedMigrations[20210323175655] = true
}
}
// Up20210323175655 update workflows to support new PNS mode
func Up20210323175655(tx *sql.Tx) error {
// This code is executed when the migration is applied.
if err := updateWorkflowTemplateManifest(
filepath.Join("workflows", "pytorch-mnist-training", "20210323175655.yaml"),
pytorchWorkflowTemplateName,
map[string]string{
"created-by": "system",
"framework": "pytorch",
}); err != nil {
return err
}
return updateWorkflowTemplateManifest(
filepath.Join("workflows", "tensorflow-mnist-training", "20210323175655.yaml"),
tensorflowWorkflowTemplateName,
map[string]string{
"created-by": "system",
"framework": "tensorflow",
})
}
// Down20210323175655 reverts updating workflows to support PNS
func Down20210323175655(tx *sql.Tx) error {
// This code is executed when the migration is rolled back.
if err := updateWorkflowTemplateManifest(
filepath.Join("workflows", "tensorflow-mnist-training", "20210118175809.yaml"),
tensorflowWorkflowTemplateName,
map[string]string{
"created-by": "system",
"framework": "tensorflow",
}); err != nil {
return err
}
return updateWorkflowTemplateManifest(
filepath.Join("workflows", "pytorch-mnist-training", "20210118175809.yaml"),
pytorchWorkflowTemplateName,
map[string]string{
"created-by": "system",
"framework": "pytorch",
})
}

View File

@@ -0,0 +1,55 @@
package migration
import (
"database/sql"
"github.com/pressly/goose"
"path/filepath"
)
func initialize20210329171739() {
if _, ok := initializedMigrations[20210329171739]; !ok {
goose.AddMigration(Up20210329171739, Down20210329171739)
initializedMigrations[20210329171739] = true
}
}
// Up20210329171739 updates workspaces to use new images
func Up20210329171739(tx *sql.Tx) error {
// This code is executed when the migration is applied.
if err := updateWorkspaceTemplateManifest(
filepath.Join("workspaces", "cvat", "20210323175655.yaml"),
cvatTemplateName); err != nil {
return err
}
if err := updateWorkspaceTemplateManifest(
filepath.Join("workspaces", "jupyterlab", "20210323175655.yaml"),
jupyterLabTemplateName); err != nil {
return err
}
return updateWorkspaceTemplateManifest(
filepath.Join("workspaces", "vscode", "20210323175655.yaml"),
vscodeWorkspaceTemplateName)
}
// Down20210329171739 rolls back image updates for workspaces
func Down20210329171739(tx *sql.Tx) error {
// This code is executed when the migration is rolled back.
if err := updateWorkspaceTemplateManifest(
filepath.Join("workspaces", "cvat", "20210224180017.yaml"),
cvatTemplateName); err != nil {
return err
}
if err := updateWorkspaceTemplateManifest(
filepath.Join("workspaces", "jupyterlab", "20210224180017.yaml"),
jupyterLabTemplateName); err != nil {
return err
}
return updateWorkspaceTemplateManifest(
filepath.Join("workspaces", "vscode", "20210224180017.yaml"),
vscodeWorkspaceTemplateName)
}

View File

@@ -0,0 +1,109 @@
package migration
import (
"database/sql"
uid2 "github.com/onepanelio/core/pkg/util/uid"
"github.com/pressly/goose"
"path/filepath"
)
func initialize20210329194731() {
if _, ok := initializedMigrations[20210329194731]; !ok {
goose.AddMigration(Up20210329194731, Down20210329194731)
initializedMigrations[20210329194731] = true
}
}
// Up20210329194731 removes the hyperparameter-tuning workflow if there are no executions
func Up20210329194731(tx *sql.Tx) error {
// This code is executed when the migration is applied.
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()
namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}
uid, err := uid2.GenerateUID(hyperparameterTuningTemplateName, 30)
if err != nil {
return err
}
for _, namespace := range namespaces {
workflowTemplate, err := client.GetWorkflowTemplateRaw(namespace.Name, uid)
if err != nil {
return err
}
if workflowTemplate == nil {
continue
}
workflowExecutionsCount, err := client.CountWorkflowExecutionsForWorkflowTemplate(workflowTemplate.ID)
if err != nil {
return err
}
cronWorkflowsCount, err := client.CountCronWorkflows(namespace.Name, uid)
if err != nil {
return err
}
// Archive the template if we have no resources associated with it
if workflowExecutionsCount == 0 && cronWorkflowsCount == 0 {
if _, err := client.ArchiveWorkflowTemplate(namespace.Name, uid); err != nil {
return err
}
}
}
return nil
}
// Down20210329194731 returns the hyperparameter-tuning workflow if it was deleted
func Down20210329194731(tx *sql.Tx) error {
// This code is executed when the migration is rolled back.
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()
namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}
uid, err := uid2.GenerateUID("hyperparameter-tuning", 30)
if err != nil {
return err
}
for _, namespace := range namespaces {
workflowTemplate, err := client.GetWorkflowTemplateRaw(namespace.Name, uid)
if err != nil {
return err
}
if workflowTemplate == nil {
err := createWorkflowTemplate(
filepath.Join("workflows", "hyperparameter-tuning", "20210118175809.yaml"),
hyperparameterTuningTemplateName,
map[string]string{
"framework": "tensorflow",
"tuner": "TPE",
"created-by": "system",
},
)
if err != nil {
return err
}
}
}
return nil
}

View File

@@ -0,0 +1,31 @@
package migration
import (
"database/sql"
"github.com/pressly/goose"
"path/filepath"
)
var deepLearningDesktopTemplateName = "Deep Learning Desktop"
func initialize20210414165510() {
if _, ok := initializedMigrations[20210414165510]; !ok {
goose.AddMigration(Up20210414165510, Down20210414165510)
initializedMigrations[20210414165510] = true
}
}
// Up20210414165510 creates the Deep Learning Desktop Workspace Template
func Up20210414165510(tx *sql.Tx) error {
// This code is executed when the migration is applied.
return createWorkspaceTemplate(
filepath.Join("workspaces", "vnc", "20210414165510.yaml"),
deepLearningDesktopTemplateName,
"Deep learning desktop with VNC")
}
// Down20210414165510 removes the Deep Learning Desktop Workspace Template
func Down20210414165510(tx *sql.Tx) error {
// This code is executed when the migration is rolled back.
return archiveWorkspaceTemplate(deepLearningDesktopTemplateName)
}

View File

@@ -91,6 +91,10 @@ func Initialize() {
initialize20210129142057()
initialize20210129152427()
initialize20210224180017()
initialize20210323175655()
initialize20210329171739()
initialize20210329194731()
initialize20210414165510()
if err := client.DB.Close(); err != nil {
log.Printf("[error] closing db %v", err)

View File

@@ -1,10 +1,92 @@
package migration
import (
"fmt"
v1 "github.com/onepanelio/core/pkg"
uid2 "github.com/onepanelio/core/pkg/util/uid"
)
// createWorkspaceTemplate will create the workspace template given by {{templateName}} with the contents
// given by {{filename}}
// It will do so for all namespaces.
func createWorkspaceTemplate(filename, templateName, description string) error {
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()
namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}
newManifest, err := readDataFile(filename)
if err != nil {
return err
}
uid, err := uid2.GenerateUID(templateName, 30)
if err != nil {
return err
}
for _, namespace := range namespaces {
workspaceTemplate := &v1.WorkspaceTemplate{
UID: uid,
Name: templateName,
Manifest: newManifest,
Description: description,
}
err = ReplaceArtifactRepositoryType(client, namespace, nil, workspaceTemplate)
if err != nil {
return err
}
if _, err := client.CreateWorkspaceTemplate(namespace.Name, workspaceTemplate); err != nil {
return err
}
}
return nil
}
func archiveWorkspaceTemplate(templateName string) error {
client, err := getClient()
if err != nil {
return err
}
defer client.DB.Close()
namespaces, err := client.ListOnepanelEnabledNamespaces()
if err != nil {
return err
}
uid, err := uid2.GenerateUID(templateName, 30)
if err != nil {
return err
}
for _, namespace := range namespaces {
hasRunning, err := client.WorkspaceTemplateHasRunningWorkspaces(namespace.Name, uid)
if err != nil {
return fmt.Errorf("Unable to get check running workspaces")
}
if hasRunning {
return fmt.Errorf("unable to archive workspace template. There are running workspaces that use it")
}
_, err = client.ArchiveWorkspaceTemplate(namespace.Name, uid)
if err != nil {
return err
}
}
return nil
}
// updateWorkspaceTemplateManifest will update the workspace template given by {{templateName}} with the contents
// given by {{filename}}
// It will do so for all namespaces.

View File

@@ -103,7 +103,7 @@ templates:
command:
- sh
- -c
image: onepanel/dl:0.17.0
image: onepanel/dl:v0.20.0
volumeMounts:
- mountPath: /mnt/data
name: processed-data
@@ -112,7 +112,7 @@ templates:
workingDir: /mnt/src
sidecars:
- name: tensorboard
image: onepanel/dl:0.17.0
image: onepanel/dl:v0.20.0
command: [ sh, -c ]
env:
- name: ONEPANEL_INTERACTIVE_SIDECAR
@@ -155,7 +155,7 @@ templates:
command:
- sh
- -c
image: onepanel/dl:0.17.0
image: onepanel/dl:v0.20.0
volumeMounts:
- mountPath: /mnt/data
name: data

View File

@@ -0,0 +1,207 @@
# source: https://github.com/onepanelio/templates/blob/master/workflows/pytorch-mnist-training/
arguments:
parameters:
- name: epochs
value: '10'
- displayName: Node pool
hint: Name of node pool or group to run this workflow task
type: select.nodepool
name: sys-node-pool
value: {{.DefaultNodePoolOption}}
visibility: public
required: true
entrypoint: main
templates:
- name: main
dag:
tasks:
- name: train-model
template: train-model
- name: train-model
# Indicates that we want to push files in /mnt/output to object storage
outputs:
artifacts:
- name: output
path: /mnt/output
optional: true
script:
image: onepanel/dl:v0.20.0
command:
- python
- '-u'
source: |
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.utils.tensorboard import SummaryWriter
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
def train(model, device, train_loader, optimizer, epoch, batch_size, writer):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
writer.add_scalar('training loss', loss.item(), epoch)
def test(model, device, test_loader, epoch, writer):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
loss = test_loss / len(test_loader.dataset)
accuracy = correct / len(test_loader.dataset)
print('\nTest set: Average loss: {}, Accuracy: {}\n'.format(
loss, accuracy))
# Store metrics for this task
metrics = [
{'name': 'accuracy', 'value': accuracy},
{'name': 'loss', 'value': loss}
]
with open('/mnt/tmp/sys-metrics.json', 'w') as f:
json.dump(metrics, f)
def main(params):
writer = SummaryWriter(log_dir='/mnt/output/tensorboard')
use_cuda = torch.cuda.is_available()
torch.manual_seed(params['seed'])
device = torch.device('cuda' if use_cuda else 'cpu')
train_kwargs = {'batch_size': params['batch_size']}
test_kwargs = {'batch_size': params['test_batch_size']}
if use_cuda:
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
dataset1 = datasets.MNIST('/mnt/data', train=True, download=True,
transform=transform)
dataset2 = datasets.MNIST('/mnt/data', train=False,
transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=params['lr'])
scheduler = StepLR(optimizer, step_size=1, gamma=params['gamma'])
for epoch in range(1, params['epochs'] + 1):
train(model, device, train_loader, optimizer, epoch, params['batch_size'], writer)
test(model, device, test_loader, epoch, writer)
scheduler.step()
# Save model
torch.save(model.state_dict(), '/mnt/output/model.pt')
writer.close()
if __name__ == '__main__':
params = {
'seed': 1,
'batch_size': 64,
'test_batch_size': 1000,
'epochs': {{workflow.parameters.epochs}},
'lr': 0.001,
'gamma': 0.7,
}
main(params)
volumeMounts:
# TensorBoard sidecar will automatically mount these volumes
# The `data` volume is mounted for saving datasets
# The `output` volume is mounted to save model output and share TensorBoard logs
- name: data
mountPath: /mnt/data
- name: output
mountPath: /mnt/output
nodeSelector:
{{.NodePoolLabel}}: '{{workflow.parameters.sys-node-pool}}'
sidecars:
- name: tensorboard
image: onepanel/dl:v0.20.0
command:
- sh
- '-c'
env:
- name: ONEPANEL_INTERACTIVE_SIDECAR
value: 'true'
args:
# Read logs from /mnt/output - this directory is auto-mounted from volumeMounts
- tensorboard --logdir /mnt/output/tensorboard
ports:
- containerPort: 6006
name: tensorboard
volumeClaimTemplates:
# Provision volumes for storing data and output
- metadata:
name: data
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 2Gi
- metadata:
name: output
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 2Gi

View File

@@ -0,0 +1,118 @@
# source: https://github.com/onepanelio/templates/blob/master/workflows/tensorflow-mnist-training/
arguments:
parameters:
- name: epochs
value: '10'
- displayName: Node pool
hint: Name of node pool or group to run this workflow task
type: select.nodepool
name: sys-node-pool
value: {{.DefaultNodePoolOption}}
visibility: public
required: true
entrypoint: main
templates:
- name: main
dag:
tasks:
- name: train-model
template: train-model
- name: train-model
# Indicates that we want to push files in /mnt/output to object storage
outputs:
artifacts:
- name: output
path: /mnt/output
optional: true
script:
image: onepanel/dl:v0.20.0
command:
- python
- '-u'
source: |
import json
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(filters=32, kernel_size=5, activation='relu'),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Conv2D(filters=64, kernel_size=5, activation='relu'),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=124, activation='relu'),
tf.keras.layers.Dropout(rate=0.75),
tf.keras.layers.Dense(units=10, activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Write TensorBoard logs to /mnt/output
log_dir = '/mnt/output/tensorboard/'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model.fit(x=x_train,
y=y_train,
epochs={{workflow.parameters.epochs}},
validation_data=(x_test, y_test),
callbacks=[tensorboard_callback])
# Store metrics for this task
loss, accuracy = model.evaluate(x_test, y_test)
metrics = [
{'name': 'accuracy', 'value': accuracy},
{'name': 'loss', 'value': loss}
]
with open('/mnt/tmp/sys-metrics.json', 'w') as f:
json.dump(metrics, f)
# Save model
model.save('/mnt/output/model.h5')
volumeMounts:
# TensorBoard sidecar will automatically mount these volumes
# The `data` volume is mounted to support Keras datasets
# The `output` volume is mounted to save model output and share TensorBoard logs
- name: data
mountPath: /home/root/.keras/datasets
- name: output
mountPath: /mnt/output
nodeSelector:
{{.NodePoolLabel}}: '{{workflow.parameters.sys-node-pool}}'
sidecars:
- name: tensorboard
image: onepanel/dl:v0.20.0
command:
- sh
- '-c'
env:
- name: ONEPANEL_INTERACTIVE_SIDECAR
value: 'true'
args:
# Read logs from /mnt/output - this directory is auto-mounted from volumeMounts
- tensorboard --logdir /mnt/output/tensorboard
ports:
- containerPort: 6006
name: tensorboard
volumeClaimTemplates:
# Provision volumes for storing data and output
- metadata:
name: data
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 2Gi
- metadata:
name: output
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 2Gi

View File

@@ -0,0 +1,134 @@
containers:
- name: cvat-db
image: postgres:10-alpine
env:
- name: POSTGRES_USER
value: root
- name: POSTGRES_DB
value: cvat
- name: POSTGRES_HOST_AUTH_METHOD
value: trust
- name: PGDATA
value: /var/lib/psql/data
ports:
- containerPort: 5432
name: tcp
volumeMounts:
- name: db
mountPath: /var/lib/psql
- name: cvat-redis
image: redis:4.0-alpine
ports:
- containerPort: 6379
name: tcp
- name: cvat
image: onepanel/cvat:v0.19.0_cvat.1.0.0
env:
- name: DJANGO_MODWSGI_EXTRA_ARGS
value: ""
- name: ALLOWED_HOSTS
value: '*'
- name: CVAT_REDIS_HOST
value: localhost
- name: CVAT_POSTGRES_HOST
value: localhost
- name: CVAT_SHARE_URL
value: /cvat/data
- name: CVAT_SHARE_DIR
value: /share
- name: CVAT_DATA_DIR
value: /cvat/data
- name: CVAT_MEDIA_DATA_DIR
value: /cvat/data/data
- name: CVAT_KEYS_DIR
value: /cvat/data/keys
- name: CVAT_MODELS_DIR
value: /cvat/data/models
- name: CVAT_LOGS_DIR
value: /cvat/logs
- name: CVAT_ANNOTATIONS_OBJECT_STORAGE_PREFIX
value: 'artifacts/$(ONEPANEL_RESOURCE_NAMESPACE)/annotations/'
- name: CVAT_ONEPANEL_WORKFLOWS_LABEL
value: 'key=used-by,value=cvat'
- name: NVIDIA_VISIBLE_DEVICES
value: all
- name: NVIDIA_DRIVER_CAPABILITIES
value: compute,utility
- name: NVIDIA_REQUIRE_CUDA
value: "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411"
- name: ONEPANEL_MAIN_CONTAINER
value: 'true'
ports:
- containerPort: 8080
name: http
volumeMounts:
- name: cvat-data
mountPath: /cvat
- name: share
mountPath: /share
- name: sys-namespace-config
mountPath: /etc/onepanel
readOnly: true
- name: cvat-ui
image: onepanel/cvat-ui:v0.19.0_cvat.1.0.0
ports:
- containerPort: 80
name: http
- name: sys-filesyncer
image: onepanel/filesyncer:v0.20.0
imagePullPolicy: Always
args:
- server
- -server-prefix=/sys/filesyncer
volumeMounts:
- name: share
mountPath: /share
- name: sys-namespace-config
mountPath: /etc/onepanel
readOnly: true
ports:
- name: cvat-ui
port: 80
protocol: TCP
targetPort: 80
- name: cvat
port: 8080
protocol: TCP
targetPort: 8080
- name: fs
port: 8888
protocol: TCP
targetPort: 8888
routes:
- match:
- uri:
prefix: /sys/filesyncer
route:
- destination:
port:
number: 8888
- match:
- uri:
regex: /api/.*|/git/.*|/tensorflow/.*|/onepanelio/.*|/tracking/.*|/auto_annotation/.*|/analytics/.*|/static/.*|/admin/.*|/documentation/.*|/dextr/.*|/reid/.*
- queryParams:
id:
regex: \d+.*
route:
- destination:
port:
number: 8080
- match:
- uri:
prefix: /
route:
- destination:
port:
number: 80
volumeClaimTemplates:
- metadata:
name: db
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi

View File

@@ -0,0 +1,101 @@
containers:
- name: jupyterlab
image: onepanel/dl:v0.20.0
command: ["/bin/bash", "-c", "pip install onepanel-sdk && start.sh LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 jupyter lab --LabApp.token='' --LabApp.allow_remote_access=True --LabApp.allow_origin=\"*\" --LabApp.disable_check_xsrf=True --LabApp.trust_xheaders=True --LabApp.base_url=/ --LabApp.tornado_settings='{\"headers\":{\"Content-Security-Policy\":\"frame-ancestors * 'self'\"}}' --notebook-dir='/data' --allow-root"]
workingDir: /data
env:
- name: tornado
value: "'{'headers':{'Content-Security-Policy':\"frame-ancestors\ *\ 'self'\"}}'"
- name: TENSORBOARD_PROXY_URL
value: '//$(ONEPANEL_RESOURCE_UID)--$(ONEPANEL_RESOURCE_NAMESPACE).$(ONEPANEL_DOMAIN)/tensorboard'
- name: ONEPANEL_MAIN_CONTAINER
value: 'true'
ports:
- containerPort: 8888
name: jupyterlab
- containerPort: 6006
name: tensorboard
- containerPort: 8080
name: nni
volumeMounts:
- name: data
mountPath: /data
lifecycle:
postStart:
exec:
command:
- /bin/sh
- -c
- >
condayml="/data/.environment.yml";
jupytertxt="/data/.jupexported.txt";
if [ -f "$condayml" ]; then conda env update -f $condayml; fi;
if [ -f "$jupytertxt" ]; then cat $jupytertxt | xargs -n 1 jupyter labextension install --no-build && jupyter lab build --minimize=False; fi;
preStop:
exec:
command:
- /bin/sh
- -c
- >
conda env export > /data/.environment.yml -n base;
jupyter labextension list 1>/dev/null 2> /data/.jup.txt;
cat /data/.jup.txt | sed -n '2,$p' | awk 'sub(/v/,"@", $2){print $1$2}' > /data/.jupexported.txt;
- name: sys-filesyncer
image: onepanel/filesyncer:v0.20.0
imagePullPolicy: Always
args:
- server
- -host=localhost:8889
- -server-prefix=/sys/filesyncer
volumeMounts:
- name: data
mountPath: /data
- name: sys-namespace-config
mountPath: /etc/onepanel
readOnly: true
ports:
- name: jupyterlab
port: 80
protocol: TCP
targetPort: 8888
- name: tensorboard
port: 6006
protocol: TCP
targetPort: 6006
- name: nni
port: 8080
protocol: TCP
targetPort: 8080
- name: fs
port: 8889
protocol: TCP
targetPort: 8889
routes:
- match:
- uri:
prefix: /sys/filesyncer
route:
- destination:
port:
number: 8889
- match:
- uri:
prefix: /tensorboard
route:
- destination:
port:
number: 6006
- match:
- uri:
prefix: /nni
route:
- destination:
port:
number: 8080
- match:
- uri:
prefix: /
route:
- destination:
port:
number: 80

View File

@@ -0,0 +1,57 @@
arguments:
parameters:
# parameter screen-resolution allows users to select screen resolution
- name: screen-resolution
value: 1680x1050
type: select.select
displayName: Screen Resolution
options:
- name: 1280x1024
value: 1280x1024
- name: 1680x1050
value: 1680x1050
- name: 2880x1800
value: 2880x1800
containers:
- name: ubuntu
image: onepanel/vnc:dl-vnc
env:
- name: VNC_PASSWORDLESS
value: true
- name: VNC_RESOLUTION
value: '{{workflow.parameters.screen-resolution}}'
ports:
- containerPort: 6901
name: vnc
volumeMounts:
- name: data
mountPath: /data
ports:
- name: vnc
port: 80
protocol: TCP
targetPort: 6901
routes:
- match:
- uri:
prefix: /
route:
- destination:
port:
number: 80
# DAG Workflow to be executed once a Workspace action completes (optional)
#postExecutionWorkflow:
# entrypoint: main
# templates:
# - name: main
# dag:
# tasks:
# - name: slack-notify
# template: slack-notify
# - name: slack-notify
# container:
# image: technosophos/slack-notify
# args:
# - SLACK_USERNAME=onepanel SLACK_TITLE="Your workspace is ready" SLACK_ICON=https://www.gravatar.com/avatar/5c4478592fe00878f62f0027be59c1bd SLACK_MESSAGE="Your workspace is now running" ./slack-notify
# command:
# - sh

View File

@@ -0,0 +1,68 @@
containers:
- name: vscode
image: onepanel/vscode:v0.20.0_code-server.3.9.1
command: ["/bin/bash", "-c", "pip install onepanel-sdk && /usr/bin/entrypoint.sh --bind-addr 0.0.0.0:8080 --auth none ."]
env:
- name: ONEPANEL_MAIN_CONTAINER
value: 'true'
ports:
- containerPort: 8080
name: vscode
volumeMounts:
- name: data
mountPath: /data
lifecycle:
postStart:
exec:
command:
- /bin/sh
- -c
- >
condayml="/data/.environment.yml";
vscodetxt="/data/.vscode-extensions.txt";
if [ -f "$condayml" ]; then conda env update -f $condayml; fi;
if [ -f "$vscodetxt" ]; then cat $vscodetxt | xargs -n 1 code-server --install-extension; fi;
preStop:
exec:
command:
- /bin/sh
- -c
- >
conda env export > /data/.environment.yml -n base;
code-server --list-extensions | tail -n +2 > /data/.vscode-extensions.txt;
- name: sys-filesyncer
image: onepanel/filesyncer:v0.20.0
imagePullPolicy: Always
args:
- server
- -server-prefix=/sys/filesyncer
volumeMounts:
- name: data
mountPath: /data
- name: sys-namespace-config
mountPath: /etc/onepanel
readOnly: true
ports:
- name: vscode
port: 8080
protocol: TCP
targetPort: 8080
- name: fs
port: 8888
protocol: TCP
targetPort: 8888
routes:
- match:
- uri:
prefix: /sys/filesyncer
route:
- destination:
port:
number: 8888
- match:
- uri:
prefix: /
route:
- destination:
port:
number: 8080

15
main.go
View File

@@ -4,6 +4,14 @@ import (
"context"
"flag"
"fmt"
migrations "github.com/onepanelio/core/db/go"
"github.com/pressly/goose"
"math"
"net"
"net/http"
"path/filepath"
"strings"
"github.com/gorilla/handlers"
grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
grpc_logrus "github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus"
@@ -11,12 +19,10 @@ import (
"github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
"github.com/jmoiron/sqlx"
api "github.com/onepanelio/core/api/gen"
migrations "github.com/onepanelio/core/db/go"
v1 "github.com/onepanelio/core/pkg"
"github.com/onepanelio/core/pkg/util/env"
"github.com/onepanelio/core/server"
"github.com/onepanelio/core/server/auth"
"github.com/pressly/goose"
log "github.com/sirupsen/logrus"
"github.com/tmc/grpc-websocket-proxy/wsproxy"
"google.golang.org/grpc"
@@ -29,11 +35,6 @@ import (
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"math"
"net"
"net/http"
"path/filepath"
"strings"
)
var (

View File

@@ -2,11 +2,19 @@ package v1
import (
"bufio"
"cloud.google.com/go/storage"
"database/sql"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"cloud.google.com/go/storage"
sq "github.com/Masterminds/squirrel"
"github.com/argoproj/argo/persist/sqldb"
"github.com/argoproj/argo/workflow/hydrator"
@@ -19,17 +27,10 @@ import (
uid2 "github.com/onepanelio/core/pkg/util/uid"
"golang.org/x/net/context"
"gopkg.in/yaml.v2"
"io"
"io/ioutil"
networking "istio.io/api/networking/v1alpha3"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/watch"
"net/http"
"regexp"
yaml2 "sigs.k8s.io/yaml"
"strconv"
"strings"
"time"
wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1"
"github.com/argoproj/argo/workflow/common"
@@ -307,7 +308,6 @@ func injectEnvironmentVariables(container *corev1.Container, systemConfig System
func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts *WorkflowExecutionOptions) (err error) {
if opts.PodGCStrategy == nil {
if wf.Spec.PodGC == nil {
//TODO - Load this data from onepanel config-map or secret
podGCStrategy := env.Get("ARGO_POD_GC_STRATEGY", "OnPodCompletion")
strategy := PodGCStrategy(podGCStrategy)
wf.Spec.PodGC = &wfv1.PodGC{
@@ -334,6 +334,23 @@ func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts
Medium: corev1.StorageMediumMemory,
},
},
}, corev1.Volume{ // Artifacts out
Name: "tmp",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
},
},
})
// Create artifacts out volume
wf.Spec.Volumes = append(wf.Spec.Volumes, corev1.Volume{
Name: "out",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
},
},
})
systemConfig, err := c.GetSystemConfig()
@@ -367,6 +384,12 @@ func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts
Name: "sys-dshm",
MountPath: "/dev/shm",
})
template.Container.VolumeMounts = append(template.Container.VolumeMounts, corev1.VolumeMount{
Name: "tmp",
MountPath: "/mnt/tmp",
})
err = c.injectHostPortAndResourcesToContainer(template, opts, systemConfig)
if err != nil {
return err
@@ -386,7 +409,7 @@ func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts
// Always add output artifacts for metrics but make them optional
template.Outputs.Artifacts = append(template.Outputs.Artifacts, wfv1.Artifact{
Name: "sys-metrics",
Path: "/tmp/sys-metrics.json",
Path: "/mnt/tmp/sys-metrics.json",
Optional: true,
Archive: &wfv1.ArchiveStrategy{
None: &wfv1.NoneStrategy{},
@@ -2424,3 +2447,17 @@ func (c *Client) ListWorkflowExecutionsField(namespace, field string) (value []s
return
}
// CountWorkflowExecutionsForWorkflowTemplate returns the number of workflow executions associated with the workflow template identified by it's id.
func (c *Client) CountWorkflowExecutionsForWorkflowTemplate(workflowTemplateID uint64) (count int, err error) {
err = sb.Select("COUNT(*)").
From("workflow_executions we").
Join("workflow_template_versions wtv ON we.workflow_template_version_id = wtv.id").
Join("workflow_templates wt ON wtv.workflow_template_id = wt.id").
Where(sq.Eq{"wt.id": workflowTemplateID}).
RunWith(c.DB).
QueryRow().
Scan(&count)
return
}

View File

@@ -402,6 +402,21 @@ func (c *Client) getWorkflowTemplateById(id uint64) (workflowTemplate *WorkflowT
return
}
func (c *Client) getWorkflowTemplateRaw(namespace, uid string) (workflowTemplate *WorkflowTemplate, err error) {
workflowTemplate = &WorkflowTemplate{}
query := sb.Select(getWorkflowTemplateColumns("wt", "")...).
From("workflow_templates wt").
Where(sq.Eq{
"namespace": namespace,
"uid": uid,
})
err = c.DB.Getx(workflowTemplate, query)
return
}
// getWorkflowTemplate gets the workflowtemplate given the input data.
// it also loads the argo workflow and labels data.
// If version is <= 0, the latest workflow template is fetched.
@@ -737,6 +752,16 @@ func (c *Client) UpdateWorkflowTemplateVersion(wtv *WorkflowTemplateVersion) err
return updateWorkflowTemplateVersionDB(c.DB, wtv)
}
// GetWorkflowTemplateRaw returns the WorkflowTemplate without any version information
func (c *Client) GetWorkflowTemplateRaw(namespace, uid string) (workflowTemplate *WorkflowTemplate, err error) {
workflowTemplate, err = c.getWorkflowTemplateRaw(namespace, uid)
if err != nil && strings.Contains(err.Error(), "sql: no rows in result set") {
return nil, nil
}
return
}
// GetWorkflowTemplate returns a WorkflowTemplate with data loaded from various sources
// If version is 0, it returns the latest version data.
//
@@ -905,6 +930,8 @@ func (c *Client) getLatestWorkflowTemplate(namespace, uid string) (*WorkflowTemp
return c.getWorkflowTemplate(namespace, uid, 0) //version=0 means latest
}
// ArchiveWorkflowTemplate will mark the workflow template identified by the (namespace, uid) as archived
// and will archive or delete resources where appropriate.
func (c *Client) ArchiveWorkflowTemplate(namespace, uid string) (archived bool, err error) {
workflowTemplate, err := c.getLatestWorkflowTemplate(namespace, uid)
if err != nil {

View File

@@ -392,7 +392,7 @@ func generateNodeCaptureContainer(workspace *Workspace, config SystemConfig) map
"ports": []interface{}{
map[string]interface{}{
"name": "node-capturer",
"hostPort": 80,
"hostPort": 49151,
"containerPort": 80,
},
},