# Include{groups} portals: open: # Include{portalLink} path: "/docs" chat: # Include{portalLink} path: "/demos/chat" playground: # Include{portalLink} path: "/demos/playground" questions: # Include{global} # Include{workload} # Include{workloadDeployment} # Include{replicas1} # Include{podSpec} # Include{containerMain} - variable: imageSelector label: Select Image schema: type: string default: image enum: - value: image description: CPU - value: gpuImage description: GPU # Include{containerBasic} # Include{containerAdvanced} - variable: fabulinus group: App Configuration label: Fabulinus Configuration schema: additional_attrs: true type: dict attrs: - variable: device label: Device description: The device to use. schema: type: string default: cpu enum: - value: cpu description: CPU - value: gpu description: GPU - variable: model label: Model description: The model name to use. schema: type: string default: "google/flan-t5-small" required: true - variable: quant_type label: Quant Type description: The quantization type to use. schema: type: string default: int8 enum: - value: int8 description: Int8 - value: float16 description: Float16 - value: bfloat16 description: Bfloat16 - value: int8_float16 description: Int8_float16 - value: int8_bfloat16 description: Int8_bfloat16 - variable: max_batch_size label: Max Batch Size description: The maximum batch size the model can use. schema: type: int default: 32 min: 1 - variable: disable_batching label: Disable Batching description: Whether to allow batching or not. Set to False if not using the /generate end point. schema: type: boolean default: true # Include{containerConfig} # Include{podOptions} # Include{serviceRoot} # Include{serviceMain} # Include{serviceSelectorLoadBalancer} # Include{serviceSelectorExtras} - variable: main label: "Main Service Port Configuration" schema: additional_attrs: true type: dict attrs: - variable: port label: "Port" description: "This port exposes the container port on the service" schema: type: int default: 10687 required: true # Include{serviceExpertRoot} # Include{serviceExpert} # Include{serviceList} # Include{persistenceRoot} - variable: models label: "App Models Storage" description: "Stores the Application Models." schema: additional_attrs: true type: dict attrs: # Include{persistenceBasic} # Include{persistenceList} # Include{ingressRoot} - variable: main label: "Main Ingress" schema: additional_attrs: true type: dict attrs: # Include{ingressDefault} # Include{ingressAdvanced} # Include{ingressList} # Include{securityContextRoot} - variable: runAsUser label: "runAsUser" description: "The UserID of the user running the application" schema: type: int default: 0 - variable: runAsGroup label: "runAsGroup" description: "The groupID of the user running the application" schema: type: int default: 0 # Include{securityContextContainer} # Include{securityContextAdvanced} # Include{securityContextPod} - variable: fsGroup label: "fsGroup" description: "The group that should own ALL storage." schema: type: int default: 568 # Include{resources} # Include{advanced} # Include{addons} # Include{codeserver} # Include{netshoot} # Include{vpn} # Include{documentation}