• Stars
    star
    125
  • Rank 284,672 (Top 6 %)
  • Language
    Go
  • License
    Apache License 2.0
  • Created almost 6 years ago
  • Updated about 4 years ago

Reviews

There are no reviews yet. Be the first to send feedback to the community and the maintainers!

Repository Details

Kubernetes cluster simulator for evaluating schedulers.

Kubernetes cluster simulator

GoDoc Build Status Coverage Status

Kubernetes cluster simulator for evaluating schedulers.

Usage

See example directory.

// 1. Create a KubeSim with a pod queue and a scheduler.
queue := queue.NewPriorityQueue()
sched := buildScheduler() // see below
kubesim := kubesim.NewKubeSimFromConfigPathOrDie(configPath, queue, sched)

// 2. Register one or more pod submitters to KubeSim.
numOfSubmittingPods := 8
kubesim.AddSubmitter(newMySubmitter(numOfSubmittingPods))

// 3. Run the main loop of KubeSim.
//    In each execution of the loop, KubeSim
//      1) stores pods submitted from the registered submitters to its queue,
//      2) invokes scheduler with pending pods and cluster state,
//      3) emits cluster metrics to designated location(s) if enabled
//      4) progresses the simulated clock
if err := kubesim.Run(ctx); err != nil && errors.Cause(err) != context.Canceled {
    log.L.Fatal(err)
}

func buildScheduler() scheduler.Scheduler {
    // 1. Create a generic scheduler that mimics a kube-scheduler.
    sched := scheduler.NewGenericScheduler( /* preemption enabled */ true)

    // 2. Register extender(s)
    sched.AddExtender(
        scheduler.Extender{
            Name:             "MyExtender",
            Filter:           filterExtender,
            Prioritize:       prioritizeExtender,
            Weight:           1,
            NodeCacheCapable: true,
        },
    )

    // 2. Register plugin(s)
    // Predicate
    sched.AddPredicate("GeneralPredicates", predicates.GeneralPredicates)
    // Prioritizer
    sched.AddPrioritizer(priorities.PriorityConfig{
        Name:   "BalancedResourceAllocation",
        Map:    priorities.BalancedResourceAllocationMap,
        Reduce: nil,
        Weight: 1,
    })
    sched.AddPrioritizer(priorities.PriorityConfig{
        Name:   "LeastRequested",
        Map:    priorities.LeastRequestedPriorityMap,
        Reduce: nil,
        Weight: 1,
    })

    return &sched
}

Pod submitter interface

See pkg/submitter/submitter.go.

// Submitter defines the submitter interface.
type Submitter interface {
	// Submit submits pods to a simulated cluster.
	// The return value is a list of submitter events.
	// Submitters are called serially in the same order that they are registered to the simulated
	// cluster.
	// This method must never block.
    Submit(clock clock.Clock, nodeLister algorithm.NodeLister, metrics metrics.Metrics) ([]Event, error)
}

// Event defines the interface of a submitter event.
// Submit can returns any type in a list that implements this interface.
type Event interface {
	IsSubmitterEvent() bool
}

// SubmitEvent represents an event of submitting a pod to a cluster.
type SubmitEvent struct {
	Pod *v1.Pod
}

// DeleteEvent represents an event of deleting a pod from a cluster.
type DeleteEvent struct {
	PodName      string
	PodNamespace string
}

// UpdateEvent represents an event of updating the manifest of a pending pod.
type UpdateEvent struct {
	PodName      string
	PodNamespace string
	NewPod       *v1.Pod
}

// TerminateSubmitterEvent represents an event of terminating the submission process.
type TerminateSubmitterEvent struct {
}

kube-scheduler-compatible scheduler interface

See pkg/scheduler/generic_scheduler.go and pkg/scheduler/extender.go.

k8s-cluster-simulator provides GenericScheduler, which follows the behavior of kube-scheduler's genericScheduler. GenericScheduler makes scheduling decision for each given pod in the one-by-one manner, with predicates and prioritizers.

The interfaces of predicates and prioritizers are similar to those of kube-scheduler.

// NewGenericScheduler creates a new GenericScheduler.
func NewGenericScheduler(preeptionEnabled bool) GenericScheduler {
	return GenericScheduler{
		predicates:        map[string]predicates.FitPredicate{},
		preemptionEnabled: preeptionEnabled,
	}
}

// AddExtender adds an extender to this GenericScheduler.
func (sched *GenericScheduler) AddExtender(extender Extender) {
	sched.extenders = append(sched.extenders, extender)
}

// AddPredicate adds a predicate plugin to this GenericScheduler.
func (sched *GenericScheduler) AddPredicate(name string, predicate predicates.FitPredicate) {
	sched.predicates[name] = predicate
}

// AddPrioritizer adds a prioritizer plugin to this GenericScheduler.
func (sched *GenericScheduler) AddPrioritizer(prioritizer priorities.PriorityConfig) {
	sched.prioritizers = append(sched.prioritizers, prioritizer)
}

// Extender reperesents a scheduler extender.
type Extender struct {
	// Name identifies this Extender.
	Name string

	// Filter filters out the nodes that cannot run the given pod in api.ExtenderArgs.
	// This function can be nil.
	Filter func(api.ExtenderArgs) api.ExtenderFilterResult

	// Prioritize ranks each node that has passes the filtering stage.
	// The weighted scores are summed up and the total score is used for the node selection.
	Prioritize func(api.ExtenderArgs) api.HostPriorityList
	Weight     int

	// NodeCacheCapable specifies that this Extender is capable of caching node information, so the
	// scheduler should only send minimal information about the eligible nodes assuming that the
	// extender already cached full details of all nodes in the cluster.
	// Specifically, ExtenderArgs.NodeNames is populated iff NodeCacheCapable == true, and
	// ExtenderArgs.Nodes.Items is populated iff NodeCacheCapable == false.
	NodeCacheCapable bool

	// Ignorable specifies whether the extender is ignorable (i.e. the scheduler process should not
	// fail when this extender returns an error).
	Ignorable bool
}

Lowest-level scheduler interface

See pkg/scheduler/scheduler.go.

k8s-cluster-simulator also supports the lowest-level scheduler interface, which makes scheduling decisions for (subset of) pending pods and running pods, given the cluster state at a clock.

// Scheduler defines the lowest-level scheduler interface.
type Scheduler interface {
	// Schedule makes scheduling decisions for (subset of) pending pods and running pods.
	// The return value is a list of scheduling events.
	// This method must never block.
	Schedule(
		clock clock.Clock,
		podQueue queue.PodQueue,
		nodeLister algorithm.NodeLister,
		nodeInfoMap map[string]*nodeinfo.NodeInfo) ([]Event, error)
}

// Event defines the interface of a scheduling event.
// Submit can returns any type in a list that implements this interface.
type Event interface {
	IsSchedulerEvent() bool
}

// BindEvent represents an event of deciding the binding of a pod to a node.
type BindEvent struct {
	Pod            *v1.Pod
	ScheduleResult core.ScheduleResult
}

// DeleteEvent represents an event of the deleting a bound pod on a node.
type DeleteEvent struct {
	PodNamespace string
	PodName      string
	NodeName     string
}

How to specify the resource usage of each pod

Embed a YAML in the annotations field of the pod manifest. e.g.,

metadata:
  name: nginx-sim
  annotations:
    simSpec: |
- seconds: 5        # an execution phase of this pod
  resourceUsage:    # resource usage (not request, nor limit)
    cpu: 1
    memory: 2Gi
    nvidia.com/gpu: 0
- seconds: 10       # another phase that follows the previous one
  resourceUsage:
    cpu: 2
    memory: 4Gi
    nvidia.com/gpu: 1

Supported v1.Pod fields

These fields are populated or used by the simulator.

v1.Pod{
    ObjectMeta: metav1.ObjectMeta{
        UID,                // populated when this pod is submitted to the simulator
        CreationTimestamp,  // populated when this pod is submitted to the simulator
        DeletionTimestamp,  // populated when a deletion event for this pod has been accepted by the simulator
    },
    Spec: v1.PodSpec {
        NodeName,                       // populated when the cluster binds this pod to a node
        TerminationGracePeriodSeconds,  // read when this pod is deleted
        Priority,                       // read by PriorityQueue to sort pods,
                                        // and read when the scheduler trys to schedule this pod
    },
    Status: v1.PodStatus{
        Phase,              // populated by the simulator. Pending -> Running -> Succeeded xor Failed
        Conditions,         // populated by the simulator
        Reason,             // populated by the simulator
        Message,            // populated by the simulator
        StartTime,          // populated by the simulator when this pod has started its execution
        ContainerStatuses,  // populated by the simulator
    },
}

Supported v1.Node fields

These fields are populated and used by the simulator.

v1.Node{
    TypeMeta: metav1.TypeMeta{
        Kind:       "Node",
        APIVersion: "v1",
    },
    ObjectMeta: // determined by the config
    Spec:       // determined by the config
    Status: v1.NodeStatus{
        Capacity:                           // Determined by the config
        Allocatable:                        // Same as Capacity
        Conditions:  []v1.NodeCondition{    // populated by the simulator
            {
                Type:               v1.NodeReady,
                Status:             v1.ConditionTrue,
                LastHeartbeatTime:  // clock,
                LastTransitionTime: // clock,
                Reason:             "KubeletReady",
                Message:            "kubelet is posting ready status",
            },
            {
                Type:               v1.NodeOutOfDisk,
                Status:             v1.ConditionFalse,
                LastHeartbeatTime:  // clock,
                LastTransitionTime: // clock,
                Reason:             "KubeletHasSufficientDisk",
                Message:            "kubelet has sufficient disk space available",
            },
            {
                Type:               v1.NodeMemoryPressure,
                Status:             v1.ConditionFalse,
                LastHeartbeatTime:  // clock,
                LastTransitionTime: // clock,
                Reason:             "KubeletHasSufficientMemory",
                Message:            "kubelet has sufficient memory available",
            },
            {
                Type:               v1.NodeDiskPressure,
                Status:             v1.ConditionFalse,
                LastHeartbeatTime:  // clock,
                LastTransitionTime: // clock,
                Reason:             "KubeletHasNoDiskPressure",
                Message:            "kubelet has no disk pressure",
            },
            {
                Type:               v1.NodePIDPressure,
                Status:             v1.ConditionFalse,
                LastHeartbeatTime:  // clock,
                LastTransitionTime: // clock,
                Reason:             "KubeletHasSufficientPID",
                Message:            "kubelet has sufficient PID available",
            },
        },
    },
}

Related projects

The design and implementation of this project are inherently inspired by kubernetes, which is licensed under Apache-2.0. Moreover, functions in the following files were obtained from Kubernetes project and modified so that they would be compatible with k8s-cluster-simulator. Please see each file for more detail.

More Repositories

1

sngan_projection

GANs with spectral normalization and projection discriminator
Python
1,079
star
2

chainer-gan-lib

Chainer implementation of recent GAN variants
Python
407
star
3

xfeat

Flexible Feature Engineering & Exploration Library using GPUs and Optuna.
Python
365
star
4

chainer-gogh

Python
302
star
5

menoh

Menoh: fast DNN inference library with multiple programming language support
C++
279
star
6

pfhedge

PyTorch-based framework for Deep Hedging
Python
249
star
7

contextual_augmentation

Contextual augmentation, a text data augmentation using a bidirectional language model.
Python
193
star
8

distilled-feature-fields

Python
178
star
9

nips17-adversarial-attack

Submission to Kaggle NIPS'17 competition on adversarial examples (non-targeted adversarial attack track)
Python
146
star
10

meta-tasnet

A PyTorch implementation of Meta-TasNet from "Meta-learning Extractors for Music Source Separation
Python
135
star
11

FSCS

Fast Soft Color Segmentation
Python
134
star
12

chainer-pix2pix

chainer implementation of pix2pix
Python
131
star
13

chainer-compiler

Experimental toolchain to compile and run Chainer models
Python
112
star
14

graph-nvp

GraphNVP: An Invertible Flow Model for Generating Molecular Graphs
Python
91
star
15

autogbt-alt

An experimental Python package that reimplements AutoGBT using LightGBM and Optuna.
Python
82
star
16

chainer-stylegan

Chainer implementation of Style-based Generator
Python
79
star
17

tgan

The implementation of Temporal Generative Adversarial Nets with Singular Value Clipping
Python
78
star
18

tgan2

The official implementation of "Train Sparsely, Generate Densely: Memory-efficient Unsupervised Training of High-resolution Temporal GAN"
Python
76
star
19

deep-table

Python
76
star
20

git-ghost

Synchronize your working directory efficiently to a remote place without committing the changes.
Go
73
star
21

chainer-graph-cnn

Chainer implementation of 'Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering' (https://arxiv.org/abs/1606.09375)
Python
67
star
22

surface-aligned-nerf

Python
65
star
23

bayesgrad

BayesGrad: Explaining Predictions of Graph Convolutional Networks
Jupyter Notebook
62
star
24

japanese-lm-fin-harness

Japanese Language Model Financial Evaluation Harness
Shell
62
star
25

torch-dftd

pytorch implementation of dftd2 & dftd3
Python
60
star
26

meta-fuse-csi-plugin

A CSI plugin for All FUSE implementations
Go
59
star
27

alertmanager-to-github

This receives webhook requests from Alertmanager and creates GitHub issues.
Go
50
star
28

kaggle-lyft-motion-prediction-4th-place-solution

Kaggle Lyft Motion Prediction for Autonomous Vehicles 4th place solution
Python
48
star
29

einconv

Python
44
star
30

chainer-segnet

SegNet implementation & experiments in Chainer
Python
42
star
31

TabCSDI

A code for the NeurIPS 2022 Table Representation Learning Workshop paper: "Diffusion models for missing value imputation in tabular data"
Python
41
star
32

gcp-workload-identity-federation-webhook

This webhook is for mutating pods that will require GCP Workload Identity Federation access from Kubernetes Cluster.
Go
39
star
33

chainer-trt

Chainer x TensorRT
C++
34
star
34

hyperbolic_wrapped_distribution

Python
32
star
35

picking-instruction

PFN Picking Instructions for Commodities Dataset (PFN-PIC) including images, bounding boxes and text instructions.
31
star
36

pftaskqueue

pftaskqueue: Lightweight task queue tool
Go
30
star
37

multi-stage-blended-diffusion

Python
30
star
38

NoTransactionBandNetwork

Minimal implementation and experiments of "No-Transaction Band Network: A Neural Network Architecture for Efficient Deep Hedging".
Jupyter Notebook
30
star
39

capg

Implementation of clipped action policy gradient (CAPG) with PPO and TRPO
Python
29
star
40

charge_transfer_nnp

Graph neural network potential with charge transfer
Python
28
star
41

node-operation-controller

Kubernetes controller for automated Node operations
Go
26
star
42

menoh-ruby

Ruby binding for Menoh DNN inference library
C
26
star
43

optuna-book

Jupyter Notebook
25
star
44

chainer-ADDA

Adversarial Discriminative Domain Adaptation in Chainer
Python
24
star
45

RJT-RL

RJT-RL: De novo molecular design using a Reversible Junction Tree and Reinforcement Learning
Python
23
star
46

superpixel-align

Official implementation of "Minimizing Supervision for Free-space Segmentation" paper
Jupyter Notebook
23
star
47

label-efficient-brain-tumor-segmentation

Python
21
star
48

chainer-disentanglement-lib

Unsupervised Disentanglement Representation Learning in Chainer
Python
21
star
49

vat_nmt

Implementation of "Effective Adversarial Regularization for Neural Machine Translation", ACL 2019
Python
21
star
50

allreduce-proto

A prototype implementation of AllReduce collective communication routine.
C++
20
star
51

Chainer-DeepFill

Python
19
star
52

pfneumonia

Repo for RSNA pneumonia open-source
Python
18
star
53

chainer-LSGAN

Least Squares Generative Adversarial Network implemented in Chainer
Python
18
star
54

KDD-Cup-AutoML-5

KDD Cup 2019 AutoML Track 5th solution
Python
18
star
55

step-wise-chemical-synthesis-prediction

A GGNN-GWM based step-wise framework for Chemical Synthesis Prediction
Python
17
star
56

ATPG4SV

A prototype of Concolic Testing engine for SystemVerilog, developed as part of PFN summer internship 2018.
OCaml
16
star
57

menoh-sharp

C# binding for Menoh DNN inference library
C#
15
star
58

BMI219-2017-ProteinFolding

UCSF BMI219 Deep Learning (2017), Coding example (Prediction of protein folding with RNN and CNN)
Python
15
star
59

go-menoh

Golang binding for Menoh DNN inference library
Go
14
star
60

hierarchical-molecular-learning

Implementation of "Semi-supervised learning of hierarchical representations of molecules using neural message passing" (arXiv:1711.10168)
Python
14
star
61

kaggle-alaska2-3rd-place-solution

3rd place solution for ALASKA2 Image Steganalysis on Kaggle
Python
13
star
62

menoh-rs

Rust binding for Menoh
Rust
13
star
63

menoh-haskell

Haskell binding for Menoh DNN inference library
Jupyter Notebook
12
star
64

chainer-differentiable-mpc

Differentiable MPC in Chainer, developed as part of PFN summer internship 2019.
Python
12
star
65

asdf-clusterctl

clusterctl plugin for the asdf version manager
Shell
12
star
66

GenerRNA

Python
11
star
67

Deep_visuo-tactile_learning_ICRA2019

11
star
68

menoh-java

Building a Deep Neural Network (DNN) application in Java
Java
11
star
69

treewidth-prediction

Prediction of Treewidth using Graph Neural Network, developed as part of PFN summer internship 2019.
Jupyter Notebook
10
star
70

pml

A ML-like programming language with type-based probabilistic behavior specification, developed as part of PFN summer internship 2018.
C++
10
star
71

optuna-hands-on

Jupyter Notebook
10
star
72

kaggle-hpa-2021-7th-place-solution

7th place solution of Human Protein Atlas - Single Cell Classification on Kaggle
Python
9
star
73

chainer-ev3

Jupyter Notebook
9
star
74

chainer-robotcar-text

8
star
75

tabret

Python
8
star
76

rp-safe-rl

Python
8
star
77

pfmt-bench-fin-ja

pfmt-bench-fin-ja: Preferred Multi-turn Benchmark for Finance in Japanese
Python
8
star
78

batch-metaheuristics

Python
7
star
79

limited-gp

C++
6
star
80

recompute

Python
6
star
81

piekd

This is the official implementation of Periodic Intra-Ensemble Knowledge Distillation (PIEKD).
Python
6
star
82

differentiable-ray-sampling

Jupyter Notebook
6
star
83

BMI219-2017-DeepQSAR

UCSF BMI219 Deep Learning (2017), Coding example (QSAR with Deep multitask learning)
Python
5
star
84

chainer-capsnet

CapsNet implemented in Chainer
Python
5
star
85

node-menoh

NodeJS binding for Menoh DNN inference library
JavaScript
5
star
86

chainer-formulanet

Chainer implementation of FormulaNet
Python
5
star
87

plamo-examples

5
star
88

pocket_detection

Pocket detection
Python
5
star
89

ssdrl

Python
4
star
90

cg-transfer

Python
4
star
91

Finance_data_augmentation_ICAIF2022

Jupyter Notebook
3
star
92

echainer

Elastic Chainer prototype
Python
3
star
93

robust_estimation

repository for robust estimation research
Python
3
star
94

Invisible_marker_IROS2020

3
star
95

ex_matgl

MatGL-based neural network potential that computes excited state energies and forces
Python
3
star
96

timesfm_fin

Python
3
star
97

nms-comp

Neural Multi-scale Compression
Python
2
star
98

transport-control-socket

C++
2
star
99

unsupervised_segmental_empirical_ODM

Python
2
star
100

homebrew-git-ghost

Ruby
2
star