162 lines
4.8 KiB
YAML
162 lines
4.8 KiB
YAML
# Include{groups}
|
|
portals:
|
|
open:
|
|
# Include{portalLink}
|
|
path: "/docs"
|
|
chat:
|
|
# Include{portalLink}
|
|
path: "/demos/chat"
|
|
playground:
|
|
# Include{portalLink}
|
|
path: "/demos/playground"
|
|
questions:
|
|
# Include{global}
|
|
# Include{credentials}
|
|
# Include{workload}
|
|
# Include{workloadDeployment}
|
|
# Include{replicas1}
|
|
# Include{podSpec}
|
|
# Include{containerMain}
|
|
|
|
- variable: imageSelector
|
|
label: Select Image
|
|
schema:
|
|
type: string
|
|
default: image
|
|
enum:
|
|
- value: image
|
|
description: CPU
|
|
- value: gpuImage
|
|
description: GPU
|
|
|
|
# Include{containerBasic}
|
|
# Include{containerAdvanced}
|
|
|
|
- variable: fabulinus
|
|
group: App Configuration
|
|
label: Fabulinus Configuration
|
|
schema:
|
|
additional_attrs: true
|
|
type: dict
|
|
attrs:
|
|
- variable: device
|
|
label: Device
|
|
description: The device to use.
|
|
schema:
|
|
type: string
|
|
default: cpu
|
|
enum:
|
|
- value: cpu
|
|
description: CPU
|
|
- value: gpu
|
|
description: GPU
|
|
- variable: model
|
|
label: Model
|
|
description: The model name to use.
|
|
schema:
|
|
type: string
|
|
default: "google/flan-t5-small"
|
|
required: true
|
|
- variable: quant_type
|
|
label: Quant Type
|
|
description: The quantization type to use.
|
|
schema:
|
|
type: string
|
|
default: int8
|
|
enum:
|
|
- value: int8
|
|
description: Int8
|
|
- value: float16
|
|
description: Float16
|
|
- value: bfloat16
|
|
description: Bfloat16
|
|
- value: int8_float16
|
|
description: Int8_float16
|
|
- value: int8_bfloat16
|
|
description: Int8_bfloat16
|
|
- variable: max_batch_size
|
|
label: Max Batch Size
|
|
description: The maximum batch size the model can use.
|
|
schema:
|
|
type: int
|
|
default: 32
|
|
min: 1
|
|
- variable: disable_batching
|
|
label: Disable Batching
|
|
description: Whether to allow batching or not. Set to False if not using the /generate end point.
|
|
schema:
|
|
type: boolean
|
|
default: true
|
|
|
|
# Include{containerConfig}
|
|
# Include{podOptions}
|
|
# Include{serviceRoot}
|
|
# Include{serviceMain}
|
|
# Include{serviceSelectorLoadBalancer}
|
|
# Include{serviceSelectorExtras}
|
|
- variable: main
|
|
label: "Main Service Port Configuration"
|
|
schema:
|
|
additional_attrs: true
|
|
type: dict
|
|
attrs:
|
|
- variable: port
|
|
label: "Port"
|
|
description: "This port exposes the container port on the service"
|
|
schema:
|
|
type: int
|
|
default: 10687
|
|
required: true
|
|
# Include{externalInterfaces}
|
|
|
|
# Include{serviceList}
|
|
# Include{persistenceRoot}
|
|
- variable: models
|
|
label: "App Models Storage"
|
|
description: "Stores the Application Models."
|
|
schema:
|
|
additional_attrs: true
|
|
type: dict
|
|
attrs:
|
|
# Include{persistenceBasic}
|
|
# Include{persistenceList}
|
|
# Include{ingressRoot}
|
|
- variable: main
|
|
label: "Main Ingress"
|
|
schema:
|
|
additional_attrs: true
|
|
type: dict
|
|
attrs:
|
|
# Include{ingressDefault}
|
|
# Include{ingressAdvanced}
|
|
# Include{ingressList}
|
|
# Include{securityContextRoot}
|
|
- variable: runAsUser
|
|
label: "runAsUser"
|
|
description: "The UserID of the user running the application"
|
|
schema:
|
|
type: int
|
|
default: 0
|
|
- variable: runAsGroup
|
|
label: "runAsGroup"
|
|
description: "The groupID of the user running the application"
|
|
schema:
|
|
type: int
|
|
default: 0
|
|
# Include{securityContextContainer}
|
|
# Include{securityContextAdvanced}
|
|
# Include{securityContextPod}
|
|
- variable: fsGroup
|
|
label: "fsGroup"
|
|
description: "The group that should own ALL storage."
|
|
schema:
|
|
type: int
|
|
default: 568
|
|
# Include{resources}
|
|
# Include{advanced}
|
|
# Include{addons}
|
|
# Include{codeserver}
|
|
# Include{netshoot}
|
|
# Include{vpn}
|
|
# Include{documentation}
|