Compare commits

..

22 Commits

Author SHA1 Message Date
Aleksandr Melnikov
03f8f47664 Merge pull request #642 from Vafilor/fix/jupyterlab.migrations
feat: added convenience method to update workspace template manifest
2020-10-06 10:47:18 -07:00
Andrey Melnikov
c85496d216 update: added method specifically to update the manifest of a workspace template and modified recent migration to use it.
This fixes an issue where the jupyterlab migration wiped out the old description.
2020-10-06 10:28:03 -07:00
Rush Tehrani
5f6415548d Merge pull request #640 from aleksandrmelnikov/fix/core.637-dedicated.nodes.via.hostport
fix: Fixing issues with using hostPort. Removed prior logic that still relied on running nodes.
2020-10-05 15:40:26 -07:00
Aleksandr Melnikov
c641c17a8c Updating code that generates an extra container for a workspace.
- Renamed function to make it clearer what it's doing with the extra container
- Added documentation for the function
- Removed listing nodes code, since we only care if the workspace has
a nodeSelector set.
2020-10-05 14:17:00 -07:00
Aleksandr Melnikov
83a2543b13 Updating function name to reflect what it's doing. 2020-10-05 14:04:32 -07:00
Aleksandr Melnikov
e8dae0f2e9 Since we're no longer relying on running nodes, we don't need logic
relating to them.
- We can just check if a nodeSelector is set on the template.
2020-10-05 13:59:14 -07:00
Rush Tehrani
b85bf4d688 Merge pull request #638 from aleksandrmelnikov/fix/core.637-dedicated.nodes.via.hostport
fix: Replace resource requests and limits with hostPort, as a means of grabbing dedicated nodes.
2020-10-05 12:17:45 -07:00
Aleksandr Melnikov
7fe0ab2654 Tweaking names so it's more clear why they are there. 2020-10-05 11:51:04 -07:00
Aleksandr Melnikov
dfa6eb2fe6 Removing function that's no longer used. 2020-10-05 11:46:19 -07:00
Aleksandr Melnikov
cc2c51ace5 Removing resource requests and limits.
- Using hostPort on the node as a way to require dedicated nodes
for workspaces and workflows.
2020-10-05 11:46:01 -07:00
Andrey Melnikov
897462ede7 Merge pull request #636 from aleksandrmelnikov/fix/if.no.running.node.don't.try.to.set.resources
fix: Do not try to calculate resource requests and limits if the node is not running.
2020-10-02 16:25:04 -07:00
Aleksandr Melnikov
4e3c24fd89 Merge pull request #630 from Vafilor/fix/workspace.start.resource.requirements
fix: added resource requirements to start workspace
2020-10-02 16:17:15 -07:00
Aleksandr Melnikov
276e105f20 Adding documentation for function call.
- Refactoring the parameter names so they are actually usefully named.
2020-10-02 15:53:29 -07:00
Aleksandr Melnikov
656026ac84 Refactored workflow_execution resource calculation to use the new
function, to put the logic into the same place.
2020-10-02 13:08:48 -07:00
Aleksandr Melnikov
95bea11e43 Refactored the resource calculation piece into it's own function.
Added code logic to check the cpu and memory generated by the node.
- If they are empty, then skip trying to add an extra container
for workspace.
2020-10-02 13:08:29 -07:00
Andrey Melnikov
c6f65510d8 Merge pull request #635 from Vafilor/fix/remove.dependabot.from.release.notes
fix: remove dependabot[bot] from release notes
2020-10-01 22:55:31 -07:00
Andrey Melnikov
d6e279dde5 fix: remove dependabot[bot] from release notes 2020-10-01 22:52:08 -07:00
Andrey Melnikov
e99b0e943d Merge pull request #632 from onepanelio/fix/tags
fix: build docker image on tag push
2020-10-01 22:44:58 -07:00
Andrey Melnikov
22a7c31f1d fix: testing tag push 2020-10-01 22:31:26 -07:00
Andrey Melnikov
b6c0f24170 fix: added resource requirements to start workspace 2020-10-01 21:21:57 -07:00
Andrey Melnikov
9c04ee066d Merge pull request #629 from Vafilor/fix/migrations
fix: issue where more than one namespace caused migration issues
2020-10-01 13:41:35 -07:00
Andrey Melnikov
29c3e808e1 fix: issue where more than one namespace caused migration issues.
The `version` of a WorkspaceTemplate is set in an iteration, so we need to move the declaration inside the loop so the version is always reset to 0.
2020-10-01 13:29:50 -07:00
7 changed files with 93 additions and 191 deletions

29
.github/workflows/push_tag.yaml vendored Normal file
View File

@@ -0,0 +1,29 @@
name: Publish docker image on tag push
on:
push:
tags:
- '*'
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- uses: olegtarasov/get-tag@v2
id: tagName
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: onepanel/core
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_TOKEN }}
tags: "${{ env.GIT_TAG_NAME }}"
- name: Set Slack Message
run: echo "::set-env name=SLACK_MESSAGE::Tag $GIT_TAG_NAME. Docker Tag onepanel/core:$GIT_TAG_NAME"
- name: Notify Slack Channels
uses: rtCamp/action-slack-notify@v2.0.0
env:
SLACK_CHANNEL: dev
SLACK_ICON: https://avatars1.githubusercontent.com/u/30390575?s=48&v=4
SLACK_TITLE: New Core Version
SLACK_USERNAME: opBot
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

View File

@@ -155,6 +155,10 @@ func printMarkDown(issues []*issue, version *string) {
fmt.Println("# Contributors")
contributors := make([]user, 0)
for _, contributor := range contributorsMap {
// Sorry, no bots.
if contributor.Login == "dependabot[bot]" {
continue
}
contributors = append(contributors, contributor)
}
sort.Slice(contributors, func(i, j int) bool { return contributors[i].ContributionsCount > contributors[j].ContributionsCount })

View File

@@ -105,13 +105,14 @@ func Up20200821162630(tx *sql.Tx) error {
if err != nil {
return err
}
workspaceTemplate := &v1.WorkspaceTemplate{
UID: uid,
Name: jupyterLabTemplateName,
Manifest: jupyterWorkspaceTemplate2,
}
for _, namespace := range namespaces {
workspaceTemplate := &v1.WorkspaceTemplate{
UID: uid,
Name: jupyterLabTemplateName,
Manifest: jupyterWorkspaceTemplate2,
}
if _, err := client.UpdateWorkspaceTemplate(namespace.Name, workspaceTemplate); err != nil {
return err
}

View File

@@ -2,7 +2,6 @@ package migration
import (
"database/sql"
v1 "github.com/onepanelio/core/pkg"
uid2 "github.com/onepanelio/core/pkg/util/uid"
"github.com/pressly/goose"
)
@@ -102,14 +101,8 @@ func Up20200929153931(tx *sql.Tx) error {
if err != nil {
return err
}
workspaceTemplate := &v1.WorkspaceTemplate{
UID: uid,
Name: jupyterLabTemplateName,
Manifest: jupyterWorkspaceTemplate3,
}
for _, namespace := range namespaces {
if _, err := client.UpdateWorkspaceTemplate(namespace.Name, workspaceTemplate); err != nil {
if _, err := client.UpdateWorkspaceTemplateManifest(namespace.Name, uid, jupyterWorkspaceTemplate3); err != nil {
return err
}
}
@@ -144,14 +137,9 @@ func Down20200929153931(tx *sql.Tx) error {
if err != nil {
return err
}
workspaceTemplate := &v1.WorkspaceTemplate{
UID: uid,
Name: jupyterLabTemplateName,
Manifest: jupyterWorkspaceTemplate2,
}
for _, namespace := range namespaces {
if _, err := client.UpdateWorkspaceTemplate(namespace.Name, workspaceTemplate); err != nil {
if _, err := client.UpdateWorkspaceTemplateManifest(namespace.Name, uid, jupyterWorkspaceTemplate2); err != nil {
return err
}
}

View File

@@ -18,7 +18,6 @@ import (
"gopkg.in/yaml.v2"
"io"
"io/ioutil"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/watch"
"net/http"
"strconv"
@@ -197,93 +196,21 @@ func injectArtifactRepositoryConfig(artifact *wfv1.Artifact, namespaceConfig *Na
}
}
// injectContainerResourceQuotas adds resource requests and limits if they exist
// Code grabs the resource request information from the nodeSelector, compared against running nodes.
// If the running node is not present, no resource information is retrieved.
func (c *Client) injectContainerResourceQuotas(wf *wfv1.Workflow, template *wfv1.Template, systemConfig SystemConfig) error {
// injectHostPortToContainer adds a hostPort to the template container, if a nodeSelector is present.
// Kubernetes will ensure that multiple containers with the same hostPort do not share the same node.
func (c *Client) injectHostPortToContainer(template *wfv1.Template) error {
if template.NodeSelector == nil {
return nil
}
supportedNodePoolLabels := []string{"beta.kubernetes.io/instance-type", "node.kubernetes.io/instance-type"}
nodePoolLabel := ""
var value string
for k, v := range template.NodeSelector {
for _, supportedNodePoolLabel := range supportedNodePoolLabels {
if k == supportedNodePoolLabel {
nodePoolLabel = k
value = v
break
}
}
ports := []corev1.ContainerPort{
{Name: "node-capturer", HostPort: 80, ContainerPort: 80},
}
if value == "" {
return nil
if template.Container != nil {
template.Container.Ports = ports
}
if strings.Contains(value, "{{workflow.") {
parts := strings.Split(strings.Replace(value, "}}", "", -1), ".")
paramName := parts[len(parts)-1]
for _, param := range wf.Spec.Arguments.Parameters {
if param.Name == paramName && param.Value != nil {
value = *param.Value
break
}
}
}
runningNodes, err := c.Interface.CoreV1().Nodes().List(ListOptions{})
if err != nil {
return err
}
var cpu string
var memory string
var gpu int64
gpuManufacturer := ""
for _, node := range runningNodes.Items {
if node.Labels[nodePoolLabel] == value {
cpuInt := node.Status.Allocatable.Cpu().MilliValue()
cpu = strconv.FormatFloat(float64(cpuInt)*.9, 'f', 0, 64) + "m"
memoryInt := node.Status.Allocatable.Memory().MilliValue()
kiBase := 1024.0
ninetyPerc := float64(memoryInt) * .9
toKi := ninetyPerc / kiBase / kiBase
memory = strconv.FormatFloat(toKi, 'f', 0, 64) + "Ki"
//Check for Nvidia
gpuQuantity := node.Status.Allocatable["nvidia.com/gpu"]
if gpuQuantity.IsZero() == false {
gpu = gpuQuantity.Value()
gpuManufacturer = "nvidia.com/gpu"
}
//Check for AMD
//Source: https://github.com/RadeonOpenCompute/k8s-device-plugin/blob/master/example/pod/alexnet-gpu.yaml
gpuQuantity = node.Status.Allocatable["amd.com/gpu"]
if gpuQuantity.IsZero() == false {
gpu = gpuQuantity.Value()
gpuManufacturer = "amd.com/gpu"
}
}
}
if cpu != "" && memory != "" {
resourceList := corev1.ResourceRequirements{
Limits: nil,
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse(cpu),
corev1.ResourceMemory: resource.MustParse(memory),
},
}
if gpu > 0 {
stringGpu := strconv.FormatInt(gpu, 10)
resourceList.Limits = make(map[corev1.ResourceName]resource.Quantity)
resourceList.Limits[corev1.ResourceName(gpuManufacturer)] = resource.MustParse(stringGpu)
}
if template.Container != nil {
template.Container.Resources = resourceList
}
if template.Script != nil {
template.Script.Container.Resources = resourceList
}
if template.Script != nil {
template.Script.Container.Ports = ports
}
return nil
}
@@ -355,7 +282,7 @@ func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts
Name: "sys-dshm",
MountPath: "/dev/shm",
})
err = c.injectContainerResourceQuotas(wf, template, systemConfig)
err = c.injectHostPortToContainer(template)
if err != nil {
return err
}
@@ -363,7 +290,7 @@ func (c *Client) injectAutomatedFields(namespace string, wf *wfv1.Workflow, opts
}
if template.Script != nil {
err = c.injectContainerResourceQuotas(wf, template, systemConfig)
err = c.injectHostPortToContainer(template)
if err != nil {
return err
}

View File

@@ -15,7 +15,6 @@ import (
"github.com/onepanelio/core/pkg/util/request"
log "github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
"strconv"
"strings"
"time"
)
@@ -286,32 +285,15 @@ func (c *Client) addResourceRequestsAndLimitsToWorkspaceTemplate(t wfv1.Template
if !ok {
return nil, errors.New("unable to type check statefulset manifest")
}
//Get node selected
labelKey := "sys-node-pool-label"
labelKeyVal := ""
for _, parameter := range argoTemplate.Spec.Arguments.Parameters {
if parameter.Name == labelKey {
labelKeyVal = *parameter.Value
extraContainer := generateExtraContainerWithHostPortToSequesterNode()
if extraContainer != nil {
containers, ok := templateSpec["containers"].([]interface{})
if !ok {
return nil, errors.New("unable to type check statefulset manifest")
}
}
nodePoolKey := "sys-node-pool"
nodePoolVal := ""
for _, parameter := range workspace.Parameters {
if parameter.Name == nodePoolKey {
nodePoolVal = *parameter.Value
}
templateSpec["containers"] = append([]interface{}{extraContainer}, containers...)
}
extraContainer, err := generateExtraContainerWithResources(c, labelKeyVal, nodePoolVal)
if err != nil {
return nil, err
}
containers, ok := templateSpec["containers"].([]interface{})
if !ok {
return nil, errors.New("unable to type check statefulset manifest")
}
templateSpec["containers"] = append([]interface{}{extraContainer}, containers...)
resultManifest, err := yaml.Marshal(statefulSet)
if err != nil {
return nil, err
@@ -319,78 +301,25 @@ func (c *Client) addResourceRequestsAndLimitsToWorkspaceTemplate(t wfv1.Template
return resultManifest, nil
}
// generateExtraContainerWithResources will add an extra container to a workspace.
// The extra container will have the calculated resource request for the node selected by the workspace.
// generateExtraContainerWithHostPortToSequesterNode will add an extra container to a workspace.
// The extra container have a hostPort set. Kubernetes will ensure the hostPort does not get conflict
// between containers, scheduling a new node as needed.
// The container will sleep once started, and generally consume negligible resources.
//
// The node that was selected has to be already running, in order to get the resource request correct.
func generateExtraContainerWithResources(c *Client, labelKeyVal string, nodePoolVal string) (map[string]interface{}, error) {
runningNodes, err := c.Interface.CoreV1().Nodes().List(ListOptions{})
if err != nil {
return nil, err
}
var cpu string
var memory string
var gpu int64
gpuManufacturer := ""
for _, node := range runningNodes.Items {
if node.Labels[labelKeyVal] == nodePoolVal {
cpuInt := node.Status.Allocatable.Cpu().MilliValue()
cpu = strconv.FormatFloat(float64(cpuInt)*.9, 'f', 0, 64) + "m"
memoryInt := node.Status.Allocatable.Memory().MilliValue()
kiBase := 1024.0
ninetyPerc := float64(memoryInt) * .9
toKi := ninetyPerc / kiBase / kiBase
memory = strconv.FormatFloat(toKi, 'f', 0, 64) + "Ki"
//Check for Nvidia
gpuQuantity := node.Status.Allocatable["nvidia.com/gpu"]
if gpuQuantity.IsZero() == false {
gpu = gpuQuantity.Value()
gpuManufacturer = "nvidia.com/gpu"
}
//Check for AMD
//Source: https://github.com/RadeonOpenCompute/k8s-device-plugin/blob/master/example/pod/alexnet-gpu.yaml
gpuQuantity = node.Status.Allocatable["amd.com/gpu"]
if gpuQuantity.IsZero() == false {
gpu = gpuQuantity.Value()
gpuManufacturer = "amd.com/gpu"
}
}
}
func generateExtraContainerWithHostPortToSequesterNode() map[string]interface{} {
extraContainer := map[string]interface{}{
"image": "alpine:latest",
"name": "resource-requester",
"name": "node-capturer",
"command": []interface{}{"/bin/sh"},
"args": []interface{}{"-c", "while :; do sleep 2073600; done"},
"resources": map[string]interface{}{
"requests": map[string]interface{}{
"cpu": cpu,
"memory": memory,
"ports": []interface{}{
map[string]interface{}{
"name": "node-capturer",
"hostPort": 80,
"containerPort": 80,
},
"limits": map[string]interface{}{},
},
}
if gpu > 0 {
res, ok := extraContainer["resources"].(map[string]interface{})
if !ok {
return nil, errors.New("unable to type check extraContainer")
}
reqs, ok := res["requests"].(map[string]interface{})
if !ok {
return nil, errors.New("unable to type check extraContainer")
}
reqs[gpuManufacturer] = gpu
limits, ok := res["limits"].(map[string]interface{})
if !ok {
return nil, errors.New("unable to type check extraContainer")
}
limits[gpuManufacturer] = gpu
}
return extraContainer, err
return extraContainer
}
// startWorkspace starts a workspace and related resources. It assumes a DB record already exists
@@ -441,6 +370,17 @@ func (c *Client) startWorkspace(namespace string, parameters []byte, workspace *
}
}
templates := argoTemplate.Spec.Templates
for i, t := range templates {
if t.Name == WorkspaceStatefulSetResource {
resultManifest, err := c.addResourceRequestsAndLimitsToWorkspaceTemplate(t, argoTemplate, workspace)
if err != nil {
return nil, err
}
templates[i].Resource.Manifest = string(resultManifest)
}
}
_, err = c.CreateWorkflowExecution(namespace, &WorkflowExecution{
Parameters: workspace.Parameters,
}, workflowTemplate)

View File

@@ -1158,6 +1158,19 @@ func (c *Client) UpdateWorkspaceTemplate(namespace string, workspaceTemplate *Wo
return workspaceTemplate, nil
}
// UpdateWorkspaceTemplateManifest updates a workspace template by creating a new version where the only difference is the manifest
func (c *Client) UpdateWorkspaceTemplateManifest(namespace, uid string, manifest string) (*WorkspaceTemplate, error) {
existingTemplate, err := c.GetWorkspaceTemplate(namespace, uid, 0)
if err != nil {
return nil, err
}
existingTemplate.UID = uid
existingTemplate.Manifest = manifest
return c.UpdateWorkspaceTemplate(namespace, existingTemplate)
}
// ListWorkspaceTemplates returns a list of workspace templates that are not archived, sorted by most recent created first
func (c *Client) ListWorkspaceTemplates(namespace string, request *request.Request) (workspaceTemplates []*WorkspaceTemplate, err error) {
sb := c.workspaceTemplatesSelectBuilder(namespace).