Karpenter
사전 요구 사항
진행 전 Spot Role에 대한 설정을 먼저 해주세요.
NodeGroup Role
const nodeGroupRoleName = "karpenter-ng-role";
const nodeGroupRole = new aws.iam.Role(
	nodeGroupRoleName,
	{
		namePrefix: `${nodeGroupRoleName}-`,
		assumeRolePolicy: {
			Version: "2012-10-17",
			Statement: [
				{
					Effect: "Allow",
					Principal: {
						Service: "ec2.amazonaws.com",
					},
					Action: "sts:AssumeRole",
				},
			],
		},
		tags: {
			Name: nodeGroupRoleName,
			"loliot.net/stack": variable.stackName,
		},
	},
	{ protect: true },
);
const nodeGroupInstanceProfileName = "karpenter-ng-instacne-profile";
const nodeGroupInstanceProfile = new aws.iam.InstanceProfile(
	nodeGroupInstanceProfileName,
	{
		namePrefix: `${nodeGroupInstanceProfileName}-`,
		role: nodeGroupRole.name,
		tags: {
			Name: nodeGroupInstanceProfileName,
			"loliot.net/stack": variable.stackName,
		},
	},
	{ protect: true },
);
const nodeGroupPolicyARNs = {
	"0": "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy",
	"1": "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly",
	"2": "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy",
	"3": "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore", // Karpenter
};
const nodeGroupRpas = Object.entries(nodeGroupPolicyARNs).map(
	([i, arn]) =>
		new aws.iam.RolePolicyAttachment(
			`karpenter-ng-rpa-${i}`,
			{
				policyArn: arn,
				role: nodeGroupRole.name,
			},
			{ protect: true },
		),
);
Karpenter Controller Role
const controllerRoleName = "karpenter-controller-role";
const controllerRole = new aws.iam.Role(
	controllerRoleName,
	{
		namePrefix: `${controllerRoleName}-`,
		assumeRolePolicy: {
			Version: "2012-10-17",
			Statement: [
				{
					Effect: "Allow",
					Principal: {
						Federated: variable.eks.core.eks.apply((eks) => eks.oidcProvider.arn),
					},
					Condition: {
						StringEquals: variable.eks.core.eks.apply((eks) => ({
							[`${eks.oidcProvider.url}:aud`]: "sts.amazonaws.com",
							[`${eks.oidcProvider.url}:sub`]:
								// system:serviceaccount:<namespace>:<serviceAccount>
								"system:serviceaccount:kube-system:karpenter",
						})),
					},
					Action: "sts:AssumeRoleWithWebIdentity",
				},
			],
		},
		tags: {
			Name: controllerRoleName,
			"loliot.net/stack": variable.stackName,
		},
	},
	{ protect: true },
);
const controllerPolicyName = "karpenter-controller-policy";
const controllerPolicy = new aws.iam.Policy(
	controllerPolicyName,
	{
		namePrefix: `${controllerPolicyName}-`,
		policy: {
			Version: "2012-10-17",
			Statement: [
				{
					Sid: "AllowScopedEC2InstanceActions",
					Effect: "Allow",
					Resource: [
						`arn:aws:ec2:${variable.awsRegion}::image/*`,
						`arn:aws:ec2:${variable.awsRegion}::snapshot/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:security-group/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:subnet/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:launch-template/*`,
					],
					Action: ["ec2:RunInstances", "ec2:CreateFleet"],
				},
				{
					Sid: "AllowScopedEC2InstanceActionsWithTags",
					Effect: "Allow",
					Resource: [
						`arn:aws:ec2:${variable.awsRegion}:*:fleet/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:instance/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:volume/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:network-interface/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:launch-template/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:spot-instances-request/*`,
					],
					Action: ["ec2:RunInstances", "ec2:CreateFleet", "ec2:CreateLaunchTemplate"],
					Condition: {
						StringEquals: {
							[`aws:RequestTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
						},
						StringLike: {
							"aws:RequestTag/karpenter.sh/nodepool": "*",
						},
					},
				},
				{
					Sid: "AllowScopedResourceCreationTagging",
					Effect: "Allow",
					Resource: [
						`arn:aws:ec2:${variable.awsRegion}:*:fleet/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:instance/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:volume/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:network-interface/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:launch-template/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:spot-instances-request/*`,
					],
					Action: "ec2:CreateTags",
					Condition: {
						StringEquals: {
							[`aws:RequestTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
							"ec2:CreateAction": ["RunInstances", "CreateFleet", "CreateLaunchTemplate"],
						},
						StringLike: {
							"aws:RequestTag/karpenter.sh/nodepool": "*",
						},
					},
				},
				{
					Sid: "AllowScopedResourceTagging",
					Effect: "Allow",
					Resource: `arn:aws:ec2:${variable.awsRegion}:*:instance/*`,
					Action: "ec2:CreateTags",
					Condition: {
						StringEquals: {
							[`aws:ResourceTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
						},
						StringLike: {
							"aws:ResourceTag/karpenter.sh/nodepool": "*",
						},
						"ForAllValues:StringEquals": {
							"aws:TagKeys": ["karpenter.sh/nodeclaim", "Name"],
						},
					},
				},
				{
					Sid: "AllowScopedDeletion",
					Effect: "Allow",
					Resource: [
						`arn:aws:ec2:${variable.awsRegion}:*:instance/*`,
						`arn:aws:ec2:${variable.awsRegion}:*:launch-template/*`,
					],
					Action: ["ec2:TerminateInstances", "ec2:DeleteLaunchTemplate"],
					Condition: {
						StringEquals: {
							[`aws:ResourceTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
						},
						StringLike: {
							"aws:ResourceTag/karpenter.sh/nodepool": "*",
						},
					},
				},
				{
					Sid: "AllowRegionalReadActions",
					Effect: "Allow",
					Resource: "*",
					Action: [
						"ec2:DescribeAvailabilityZones",
						"ec2:DescribeImages",
						"ec2:DescribeInstances",
						"ec2:DescribeInstanceTypeOfferings",
						"ec2:DescribeInstanceTypes",
						"ec2:DescribeLaunchTemplates",
						"ec2:DescribeSecurityGroups",
						"ec2:DescribeSpotPriceHistory",
						"ec2:DescribeSubnets",
					],
					Condition: {
						StringEquals: {
							"aws:RequestedRegion": `${variable.awsRegion}`,
						},
					},
				},
				{
					Sid: "AllowSSMReadActions",
					Effect: "Allow",
					Resource: `arn:aws:ssm:${variable.awsRegion}::parameter/aws/service/*`,
					Action: "ssm:GetParameter",
				},
				{
					Sid: "AllowPricingReadActions",
					Effect: "Allow",
					Resource: "*",
					Action: "pricing:GetProducts",
				},
				{
					Sid: "AllowPassingInstanceRole",
					Effect: "Allow",
					Resource: nodeGroupRole.arn,
					Action: "iam:PassRole",
					Condition: {
						StringEquals: {
							"iam:PassedToService": "ec2.amazonaws.com",
						},
					},
				},
				{
					Sid: "AllowScopedInstanceProfileCreationActions",
					Effect: "Allow",
					Resource: "*",
					Action: "iam:CreateInstanceProfile",
					Condition: {
						StringEquals: {
							[`aws:RequestTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
							"aws:RequestTag/topology.kubernetes.io/region": `${variable.awsRegion}`,
						},
						StringLike: {
							"aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*",
						},
					},
				},
				{
					Sid: "AllowScopedInstanceProfileTagActions",
					Effect: "Allow",
					Resource: "*",
					Action: "iam:TagInstanceProfile",
					Condition: {
						StringEquals: {
							[`aws:ResourceTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
							"aws:ResourceTag/topology.kubernetes.io/region": `${variable.awsRegion}`,
							[`aws:RequestTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
							"aws:RequestTag/topology.kubernetes.io/region": `${variable.awsRegion}`,
						},
						StringLike: {
							"aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*",
							"aws:RequestTag/karpenter.k8s.aws/ec2nodeclass": "*",
						},
					},
				},
				{
					Sid: "AllowScopedInstanceProfileActions",
					Effect: "Allow",
					Resource: "*",
					Action: ["iam:AddRoleToInstanceProfile", "iam:RemoveRoleFromInstanceProfile", "iam:DeleteInstanceProfile"],
					Condition: {
						StringEquals: {
							[`aws:ResourceTag/kubernetes.io/cluster/${variable.eksClusterName}`]: "owned",
							"aws:ResourceTag/topology.kubernetes.io/region": `${variable.awsRegion}`,
						},
						StringLike: {
							"aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass": "*",
						},
					},
				},
				{
					Sid: "AllowInstanceProfileReadActions",
					Effect: "Allow",
					Resource: "*",
					Action: "iam:GetInstanceProfile",
				},
				{
					Sid: "AllowAPIServerEndpointDiscovery",
					Effect: "Allow",
					Resource: `arn:aws:eks:${variable.awsRegion}:${variable.awsAccountID}:cluster/${variable.eksClusterName}`,
					Action: "eks:DescribeCluster",
				},
			],
		},
		tags: {
			Name: controllerPolicyName,
			"hits.ai/stack": variable.stackName,
		},
	},
	{ protect: true },
);
new aws.iam.RolePolicyAttachment(
	"karpenter-controller-rpa-0",
	{
		policyArn: controllerPolicy.arn,
		role: controllerRole.name,
	},
	{ protect: true },
);
설치
경고
설치 전에 CoreDNS가 작동하는 지 확인하시기 바랍니다.
helm upgrade -i karpenter-crd oci://public.ecr.aws/karpenter/karpenter-crd \
    --version v0.33.0 \
    --history-max 5 \
    -n kube-system
helm show values oci://public.ecr.aws/karpenter/karpenter \
    --version v0.33.0 \
    > karpenter-values.yaml
karpenter-values.yaml
additionalLabels: {}
serviceAccount:
  annotations:
    eks.amazonaws.com/role-arn: arn:aws:iam::<accountId>:role/<controllerRoleName>
replicas: 1
affinity:
  nodeAffinity:
    requiredDuringSchedulingIgnoredDuringExecution:
      nodeSelectorTerms:
        - matchExpressions:
            - key: karpenter.sh/nodepool
              operator: DoesNotExist
tolerations:
  - operator: Exists
controller:
  resources:
    requests:
      cpu: 50m
      memory: 512Mi
    limits:
      memory: 512Mi
webhook:
  # kubernetes 1.25 미만은 true로 설정해야 합니다.
  enabled: false
# ConfigMap: karpenter-global-settings
settings:
  # Pending Pod가 새로 생성되면 batch를 만들려고 시도함
  # 새 Pending Pod가 생기고 batchIdleDuration 시간 만큼 대기
  # 그 안에 새로운 Pending Pod가 생기면 batch에 포함 시키고 다시 batchIdleDuration 시간 대기
  # 이 과정이 반복되다가 마지막 Pending Pod가 생긴 후 batchIdleDuration을 넘기거나
  # 첫 Pending Pod가 생긴 후 batchMaxDuration을 넘기면 batch만들기 종료
  # https://github.com/aws/karpenter/blob/main/pkg/controllers/provisioning/batcher.go
  batchMaxDuration: 20s
  batchIdleDuration: 5s
  clusterName: <clusterName>
  # Rebalance Recommendation/ Spot Interrupt 이벤트를 받을 SQS Queue
  # 설정하지 않으면 해당이벤트를 처리하지 않습니다.
  interruptionQueue: <interruptionQueueName>
helm template karpenter oci://public.ecr.aws/karpenter/karpenter \
    --version v0.33.0 \
    -n kube-system \
    -f karpenter-values.yaml \
    > karpenter.yaml
helm upgrade -i karpenter oci://public.ecr.aws/karpenter/karpenter \
    --version v0.33.0 \
    --history-max 5 \
    -n kube-system \
    -f karpenter-values.yaml
kind: ConfigMap
apiVersion: v1
metadata:
  name: aws-auth
  namespace: kube-system
data:
  mapRoles: |
    - rolearn: arn:aws:iam::<accountId>:role/karpenter-ng-role-xxxxx
      username: system:node:{{EC2PrivateDNSName}}
      groups:
        - system:bootstrappers
        - system:nodes