mirror of
https://github.com/1Panel-dev/1Panel.git
synced 2025-12-17 21:08:25 +08:00
feat: support tensorrt LLM speedup (#10905)
This commit is contained in:
parent
da5668ac27
commit
3b73de32ad
18 changed files with 149 additions and 19 deletions
|
|
@ -14,6 +14,8 @@ type TensorRTLLMCreate struct {
|
||||||
ModelDir string `json:"modelDir" validate:"required"`
|
ModelDir string `json:"modelDir" validate:"required"`
|
||||||
Image string `json:"image" validate:"required"`
|
Image string `json:"image" validate:"required"`
|
||||||
Command string `json:"command" validate:"required"`
|
Command string `json:"command" validate:"required"`
|
||||||
|
ModelType string `json:"modelType"`
|
||||||
|
ModelSpeedup bool `json:"modelSpeedup"`
|
||||||
DockerConfig
|
DockerConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,9 @@ type TensorRTLLM struct {
|
||||||
DockerCompose string `json:"dockerCompose"`
|
DockerCompose string `json:"dockerCompose"`
|
||||||
ContainerName string `json:"containerName"`
|
ContainerName string `json:"containerName"`
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
//Port int `json:"port"`
|
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
Env string `json:"env"`
|
Env string `json:"env"`
|
||||||
TaskID string `json:"taskID"`
|
TaskID string `json:"taskID"`
|
||||||
|
ModelType string `json:"modelType"`
|
||||||
|
ModelSpeedup bool `json:"modelSpeedup"`
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package service
|
package service
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/dto/request"
|
"github.com/1Panel-dev/1Panel/agent/app/dto/request"
|
||||||
"github.com/1Panel-dev/1Panel/agent/app/dto/response"
|
"github.com/1Panel-dev/1Panel/agent/app/dto/response"
|
||||||
|
|
@ -15,7 +16,9 @@ import (
|
||||||
"github.com/1Panel-dev/1Panel/agent/utils/files"
|
"github.com/1Panel-dev/1Panel/agent/utils/files"
|
||||||
"github.com/subosito/gotenv"
|
"github.com/subosito/gotenv"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
@ -52,7 +55,7 @@ func (t TensorRTLLMService) Page(req request.TensorRTLLMSearch) response.TensorR
|
||||||
serverDTO.ModelDir = envs["MODEL_PATH"]
|
serverDTO.ModelDir = envs["MODEL_PATH"]
|
||||||
serverDTO.Dir = path.Join(global.Dir.TensorRTLLMDir, item.Name)
|
serverDTO.Dir = path.Join(global.Dir.TensorRTLLMDir, item.Name)
|
||||||
serverDTO.Image = envs["IMAGE"]
|
serverDTO.Image = envs["IMAGE"]
|
||||||
serverDTO.Command = envs["COMMAND"]
|
serverDTO.Command = getCommand(item.Env)
|
||||||
|
|
||||||
for k, v := range envs {
|
for k, v := range envs {
|
||||||
if strings.Contains(k, "CONTAINER_PORT") || strings.Contains(k, "HOST_PORT") {
|
if strings.Contains(k, "CONTAINER_PORT") || strings.Contains(k, "HOST_PORT") {
|
||||||
|
|
@ -94,7 +97,7 @@ func (t TensorRTLLMService) Page(req request.TensorRTLLMSearch) response.TensorR
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultVolumes = map[string]string{
|
var defaultVolumes = map[string]string{
|
||||||
"${MODEL_PATH}": "/models",
|
"${MODEL_PATH}": "${MODEL_PATH}",
|
||||||
}
|
}
|
||||||
for _, volume := range volumes {
|
for _, volume := range volumes {
|
||||||
exist := false
|
exist := false
|
||||||
|
|
@ -227,14 +230,21 @@ func (t TensorRTLLMService) Create(create request.TensorRTLLMCreate) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
tensorrtLLMDir := path.Join(global.Dir.TensorRTLLMDir, create.Name)
|
tensorrtLLMDir := path.Join(global.Dir.TensorRTLLMDir, create.Name)
|
||||||
filesOP := files.NewFileOp()
|
filesOp := files.NewFileOp()
|
||||||
if !filesOP.Stat(tensorrtLLMDir) {
|
if !filesOp.Stat(tensorrtLLMDir) {
|
||||||
_ = filesOP.CreateDir(tensorrtLLMDir, 0644)
|
_ = filesOp.CreateDir(tensorrtLLMDir, 0644)
|
||||||
|
}
|
||||||
|
if create.ModelSpeedup {
|
||||||
|
if err := handleModelArchive(create.ModelType, create.ModelDir); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tensorrtLLM := &model.TensorRTLLM{
|
tensorrtLLM := &model.TensorRTLLM{
|
||||||
Name: create.Name,
|
Name: create.Name,
|
||||||
ContainerName: create.ContainerName,
|
ContainerName: create.ContainerName,
|
||||||
Status: constant.StatusStarting,
|
Status: constant.StatusStarting,
|
||||||
|
ModelType: create.ModelType,
|
||||||
|
ModelSpeedup: create.ModelSpeedup,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := handleLLMParams(tensorrtLLM, create); err != nil {
|
if err := handleLLMParams(tensorrtLLM, create); err != nil {
|
||||||
|
|
@ -247,7 +257,7 @@ func (t TensorRTLLMService) Create(create request.TensorRTLLMCreate) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
dockerComposePath := path.Join(llmDir, "docker-compose.yml")
|
dockerComposePath := path.Join(llmDir, "docker-compose.yml")
|
||||||
if err := filesOP.SaveFile(dockerComposePath, tensorrtLLM.DockerCompose, 0644); err != nil {
|
if err := filesOp.SaveFile(dockerComposePath, tensorrtLLM.DockerCompose, 0644); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
tensorrtLLM.Status = constant.StatusStarting
|
tensorrtLLM.Status = constant.StatusStarting
|
||||||
|
|
@ -269,8 +279,10 @@ func (t TensorRTLLMService) Update(req request.TensorRTLLMUpdate) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tensorrtLLM.ModelType = req.ModelType
|
||||||
|
tensorrtLLM.ModelSpeedup = req.ModelSpeedup
|
||||||
tensorrtLLM.ContainerName = req.ContainerName
|
tensorrtLLM.ContainerName = req.ContainerName
|
||||||
|
|
||||||
if err := handleLLMParams(tensorrtLLM, req.TensorRTLLMCreate); err != nil {
|
if err := handleLLMParams(tensorrtLLM, req.TensorRTLLMCreate); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
@ -381,3 +393,51 @@ func syncTensorRTLLMContainerStatus(tensorrtLLM *model.TensorRTLLM) error {
|
||||||
}
|
}
|
||||||
return tensorrtLLMRepo.Save(tensorrtLLM)
|
return tensorrtLLMRepo.Save(tensorrtLLM)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func findModelArchive(modelType string) (string, error) {
|
||||||
|
const baseDir = "/home/models"
|
||||||
|
prefix := fmt.Sprintf("FusionXplay_%s_Accelerator", modelType)
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(baseDir)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read %s: %w", baseDir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
if entry.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := entry.Name()
|
||||||
|
if strings.HasPrefix(name, prefix) && strings.HasSuffix(name, ".tar.gz") {
|
||||||
|
return filepath.Join(baseDir, name), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", errors.New(fmt.Sprintf("no FusionXplay_%s_Accelerator*.tar.gz found in /home/models", modelType))
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleModelArchive(modelType string, modelDir string) error {
|
||||||
|
filePath, err := findModelArchive(modelType)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fileOp := files.NewFileOp()
|
||||||
|
if err = fileOp.TarGzExtractPro(filePath, modelDir, ""); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err = fileOp.ChmodR(path.Join(modelDir, "fusionxpark_accelerator"), 0755, false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCommand(envStr string) string {
|
||||||
|
lines := strings.Split(envStr, "\n")
|
||||||
|
for _, line := range lines {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(line, "COMMAND=") {
|
||||||
|
return strings.TrimPrefix(line, "COMMAND=")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ services:
|
||||||
networks:
|
networks:
|
||||||
- 1panel-network
|
- 1panel-network
|
||||||
volumes:
|
volumes:
|
||||||
- ${MODEL_PATH}:/models
|
- ${MODEL_PATH}:${MODEL_PATH}
|
||||||
ipc: host
|
ipc: host
|
||||||
ulimits:
|
ulimits:
|
||||||
memlock:
|
memlock:
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ func InitAgentDB() {
|
||||||
migrations.UpdateMonitorInterval,
|
migrations.UpdateMonitorInterval,
|
||||||
migrations.AddMonitorProcess,
|
migrations.AddMonitorProcess,
|
||||||
migrations.UpdateCronJob,
|
migrations.UpdateCronJob,
|
||||||
|
migrations.UpdateTensorrtLLM,
|
||||||
})
|
})
|
||||||
if err := m.Migrate(); err != nil {
|
if err := m.Migrate(); err != nil {
|
||||||
global.LOG.Error(err)
|
global.LOG.Error(err)
|
||||||
|
|
|
||||||
|
|
@ -673,3 +673,10 @@ var UpdateCronJob = &gormigrate.Migration{
|
||||||
return tx.AutoMigrate(&model.Cronjob{})
|
return tx.AutoMigrate(&model.Cronjob{})
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var UpdateTensorrtLLM = &gormigrate.Migration{
|
||||||
|
ID: "20251110-update-tensorrt-llm",
|
||||||
|
Migrate: func(tx *gorm.DB) error {
|
||||||
|
return tx.AutoMigrate(&model.TensorRTLLM{})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -746,6 +746,8 @@ const message = {
|
||||||
'After /models in the startup command, the model name needs to be completed; if external access is required, set the port in the command to be the same as the application port',
|
'After /models in the startup command, the model name needs to be completed; if external access is required, set the port in the command to be the same as the application port',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'Due to the large image size, it is recommended to manually download the image to the server before installation',
|
'Due to the large image size, it is recommended to manually download the image to the server before installation',
|
||||||
|
modelSpeedup: 'Enable model acceleration',
|
||||||
|
modelType: 'Model type',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -747,6 +747,8 @@ const message = {
|
||||||
'Después de /models en el comando de inicio, se debe completar el nombre del modelo; si se requiere acceso externo, configure el puerto en el comando para que sea el mismo que el puerto de la aplicación',
|
'Después de /models en el comando de inicio, se debe completar el nombre del modelo; si se requiere acceso externo, configure el puerto en el comando para que sea el mismo que el puerto de la aplicación',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'Debido al gran tamaño de la imagen, se recomienda descargar manualmente la imagen al servidor antes de la instalación',
|
'Debido al gran tamaño de la imagen, se recomienda descargar manualmente la imagen al servidor antes de la instalación',
|
||||||
|
modelSpeedup: 'Habilitar aceleración de modelo',
|
||||||
|
modelType: 'Tipo de modelo',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -734,6 +734,8 @@ const message = {
|
||||||
'起動コマンドの /models の後にはモデル名を補完する必要があります;外部アクセスが必要な場合は、コマンド内のポートをアプリケーションポートと同じに設定してください',
|
'起動コマンドの /models の後にはモデル名を補完する必要があります;外部アクセスが必要な場合は、コマンド内のポートをアプリケーションポートと同じに設定してください',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'イメージサイズが大きいため、インストール前にサーバーにイメージを手動でダウンロードすることをお勧めします',
|
'イメージサイズが大きいため、インストール前にサーバーにイメージを手動でダウンロードすることをお勧めします',
|
||||||
|
modelSpeedup: 'モデル加速を有効化',
|
||||||
|
modelType: 'モデルタイプ',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -729,6 +729,8 @@ const message = {
|
||||||
commandHelper:
|
commandHelper:
|
||||||
'시작 명령의 /models 뒤에는 모델 이름을 완성해야 합니다; 외부 액세스가 필요한 경우 명령의 포트를 애플리케이션 포트와 동일하게 설정하세요',
|
'시작 명령의 /models 뒤에는 모델 이름을 완성해야 합니다; 외부 액세스가 필요한 경우 명령의 포트를 애플리케이션 포트와 동일하게 설정하세요',
|
||||||
imageAlert: '이미지 크기가 크므로 설치 전에 서버에 이미지를 수동으로 다운로드하는 것이 좋습니다',
|
imageAlert: '이미지 크기가 크므로 설치 전에 서버에 이미지를 수동으로 다운로드하는 것이 좋습니다',
|
||||||
|
modelSpeedup: '모델 가속 활성화',
|
||||||
|
modelType: '모델 유형',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -747,6 +747,8 @@ const message = {
|
||||||
'Selepas /models dalam arahan permulaan, nama model perlu dilengkapkan; jika akses luar diperlukan, tetapkan port dalam arahan sama dengan port aplikasi',
|
'Selepas /models dalam arahan permulaan, nama model perlu dilengkapkan; jika akses luar diperlukan, tetapkan port dalam arahan sama dengan port aplikasi',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'Disebabkan saiz imej yang besar, disyorkan untuk memuat turun imej secara manual ke pelayan sebelum pemasangan',
|
'Disebabkan saiz imej yang besar, disyorkan untuk memuat turun imej secara manual ke pelayan sebelum pemasangan',
|
||||||
|
modelSpeedup: 'Dayakan pecutan model',
|
||||||
|
modelType: 'Jenis model',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -743,6 +743,8 @@ const message = {
|
||||||
'Após /models no comando de inicialização, o nome do modelo precisa ser completado; se for necessário acesso externo, defina a porta no comando para ser a mesma que a porta do aplicativo',
|
'Após /models no comando de inicialização, o nome do modelo precisa ser completado; se for necessário acesso externo, defina a porta no comando para ser a mesma que a porta do aplicativo',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'Devido ao grande tamanho da imagem, recomenda-se baixar manualmente a imagem para o servidor antes da instalação',
|
'Devido ao grande tamanho da imagem, recomenda-se baixar manualmente a imagem para o servidor antes da instalação',
|
||||||
|
modelSpeedup: 'Ativar aceleração de modelo',
|
||||||
|
modelType: 'Tipo de modelo',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -741,6 +741,8 @@ const message = {
|
||||||
'После /models в команде запуска необходимо указать имя модели; если требуется внешний доступ, установите порт в команде таким же, как порт приложения',
|
'После /models в команде запуска необходимо указать имя модели; если требуется внешний доступ, установите порт в команде таким же, как порт приложения',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'Из-за большого размера образа рекомендуется вручную загрузить образ на сервер перед установкой',
|
'Из-за большого размера образа рекомендуется вручную загрузить образ на сервер перед установкой',
|
||||||
|
modelSpeedup: 'Включить ускорение модели',
|
||||||
|
modelType: 'Тип модели',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -755,6 +755,8 @@ const message = {
|
||||||
'Başlatma komutundaki /models sonrasında model adı tamamlanmalıdır; harici erişim gerekiyorsa, komuttaki bağlantı noktasını uygulama bağlantı noktasıyla aynı olacak şekilde ayarlayın',
|
'Başlatma komutundaki /models sonrasında model adı tamamlanmalıdır; harici erişim gerekiyorsa, komuttaki bağlantı noktasını uygulama bağlantı noktasıyla aynı olacak şekilde ayarlayın',
|
||||||
imageAlert:
|
imageAlert:
|
||||||
'Görüntü boyutu büyük olduğundan, kurulumdan önce görüntüyü sunucuya manuel olarak indirmeniz önerilir',
|
'Görüntü boyutu büyük olduğundan, kurulumdan önce görüntüyü sunucuya manuel olarak indirmeniz önerilir',
|
||||||
|
modelSpeedup: 'Model hızlandırmayı etkinleştir',
|
||||||
|
modelType: 'Model türü',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -716,6 +716,8 @@ const message = {
|
||||||
modelDir: '模型目錄',
|
modelDir: '模型目錄',
|
||||||
commandHelper: '啟動指令中的 /models 後需補全模型名稱;若需外部訪問,請將指令中的埠設定為與應用埠相同',
|
commandHelper: '啟動指令中的 /models 後需補全模型名稱;若需外部訪問,請將指令中的埠設定為與應用埠相同',
|
||||||
imageAlert: '由於鏡像較大,建議先手動將鏡像下載到伺服器後再進行安裝',
|
imageAlert: '由於鏡像較大,建議先手動將鏡像下載到伺服器後再進行安裝',
|
||||||
|
modelSpeedup: '啟用模型加速',
|
||||||
|
modelType: '模型類型',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -714,8 +714,10 @@ const message = {
|
||||||
tensorRT: {
|
tensorRT: {
|
||||||
llm: 'TensorRT LLM',
|
llm: 'TensorRT LLM',
|
||||||
modelDir: '模型目录',
|
modelDir: '模型目录',
|
||||||
commandHelper: '启动命令中的 /models 后需补全模型名称;若需外部访问,请将命令中的端口设置为与应用端口相同',
|
commandHelper: '若需外部访问,请将命令中的端口设置为与应用端口相同',
|
||||||
imageAlert: '由于镜像较大,建议先手动将镜像下载到服务器后再进行安装',
|
imageAlert: '由于镜像较大,建议先手动将镜像下载到服务器后再进行安装',
|
||||||
|
modelSpeedup: '启用模型加速',
|
||||||
|
modelType: '模型类型',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
container: {
|
container: {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,20 @@
|
||||||
<el-form-item :label="$t('app.version')" prop="version">
|
<el-form-item :label="$t('app.version')" prop="version">
|
||||||
<el-input v-model.trim="tensorRTLLM.version" />
|
<el-input v-model.trim="tensorRTLLM.version" />
|
||||||
</el-form-item>
|
</el-form-item>
|
||||||
|
<div v-if="isFxplay">
|
||||||
|
<el-form-item :label="$t('aiTools.tensorRT.modelSpeedup')" prop="modelSpeedup">
|
||||||
|
<el-switch v-model="tensorRTLLM.modelSpeedup" @change="changeModelSpeedup"></el-switch>
|
||||||
|
</el-form-item>
|
||||||
|
<el-form-item
|
||||||
|
:label="$t('aiTools.tensorRT.modelType')"
|
||||||
|
prop="modelType"
|
||||||
|
v-if="tensorRTLLM.modelSpeedup"
|
||||||
|
>
|
||||||
|
<el-select v-model="tensorRTLLM.modelType">
|
||||||
|
<el-option label="Qwen3" value="Qwen3" />
|
||||||
|
</el-select>
|
||||||
|
</el-form-item>
|
||||||
|
</div>
|
||||||
<el-form-item :label="$t('aiTools.tensorRT.modelDir')" prop="modelDir">
|
<el-form-item :label="$t('aiTools.tensorRT.modelDir')" prop="modelDir">
|
||||||
<el-input v-model="tensorRTLLM.modelDir">
|
<el-input v-model="tensorRTLLM.modelDir">
|
||||||
<template #prepend>
|
<template #prepend>
|
||||||
|
|
@ -52,6 +66,8 @@ import i18n from '@/lang';
|
||||||
import { ElForm, FormInstance } from 'element-plus';
|
import { ElForm, FormInstance } from 'element-plus';
|
||||||
import { createTensorRTLLM, updateTensorRTLLM } from '@/api/modules/ai';
|
import { createTensorRTLLM, updateTensorRTLLM } from '@/api/modules/ai';
|
||||||
import { MsgSuccess } from '@/utils/message';
|
import { MsgSuccess } from '@/utils/message';
|
||||||
|
import { useGlobalStore } from '@/composables/useGlobalStore';
|
||||||
|
const { isFxplay } = useGlobalStore();
|
||||||
|
|
||||||
const loading = ref(false);
|
const loading = ref(false);
|
||||||
const mode = ref('create');
|
const mode = ref('create');
|
||||||
|
|
@ -63,11 +79,13 @@ const newTensorRTLLM = () => {
|
||||||
version: '1.2.0rc0',
|
version: '1.2.0rc0',
|
||||||
modelDir: '',
|
modelDir: '',
|
||||||
image: 'nvcr.io/nvidia/tensorrt-llm/release',
|
image: 'nvcr.io/nvidia/tensorrt-llm/release',
|
||||||
command: 'bash -c "trtllm-serve /models/ --host 0.0.0.0 --port 8000"',
|
command: 'bash -c "trtllm-serve ${MODEL_PATH} --host 0.0.0.0 --port 8000"',
|
||||||
exposedPorts: [],
|
exposedPorts: [],
|
||||||
environments: [],
|
environments: [],
|
||||||
extraHosts: [],
|
extraHosts: [],
|
||||||
volumes: [],
|
volumes: [],
|
||||||
|
modelSpeedup: false,
|
||||||
|
modelType: 'Qwen3',
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
const modelDirRef = ref();
|
const modelDirRef = ref();
|
||||||
|
|
@ -106,6 +124,15 @@ const getModelDir = (path: string) => {
|
||||||
tensorRTLLM.value.modelDir = path;
|
tensorRTLLM.value.modelDir = path;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const changeModelSpeedup = () => {
|
||||||
|
if (tensorRTLLM.value.modelSpeedup) {
|
||||||
|
tensorRTLLM.value.command =
|
||||||
|
'bash -c "${MODEL_PATH}/fusionxpark_accelerator --model_path ${MODEL_PATH} --host 0.0.0.0 --port 8000"';
|
||||||
|
} else {
|
||||||
|
tensorRTLLM.value.command = 'bash -c "trtllm-serve ${MODEL_PATH} --host 0.0.0.0 --port 8000"';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const rules = reactive({
|
const rules = reactive({
|
||||||
name: [Rules.requiredInput],
|
name: [Rules.requiredInput],
|
||||||
version: [Rules.requiredInput],
|
version: [Rules.requiredInput],
|
||||||
|
|
@ -113,6 +140,7 @@ const rules = reactive({
|
||||||
containerName: [Rules.requiredInput],
|
containerName: [Rules.requiredInput],
|
||||||
image: [Rules.requiredInput],
|
image: [Rules.requiredInput],
|
||||||
command: [Rules.requiredInput],
|
command: [Rules.requiredInput],
|
||||||
|
modelType: [Rules.requiredSelect],
|
||||||
});
|
});
|
||||||
|
|
||||||
const formRef = ref<FormInstance>();
|
const formRef = ref<FormInstance>();
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,9 @@
|
||||||
<el-tab-pane :label="$t('runtime.environment')">
|
<el-tab-pane :label="$t('runtime.environment')">
|
||||||
<Environment :environments="runtime.environments" />
|
<Environment :environments="runtime.environments" />
|
||||||
</el-tab-pane>
|
</el-tab-pane>
|
||||||
<el-tab-pane :label="$t('container.mount')"><Volumes :volumes="runtime.volumes" /></el-tab-pane>
|
<el-tab-pane :label="$t('container.mount')">
|
||||||
|
<Volumes :volumes="runtime.volumes" />
|
||||||
|
</el-tab-pane>
|
||||||
<el-tab-pane :label="$t('runtime.extraHosts')">
|
<el-tab-pane :label="$t('runtime.extraHosts')">
|
||||||
<ExtraHosts :extraHosts="runtime.extraHosts" />
|
<ExtraHosts :extraHosts="runtime.extraHosts" />
|
||||||
</el-tab-pane>
|
</el-tab-pane>
|
||||||
|
|
@ -18,17 +20,26 @@ import PortConfig from '@/views/website/runtime/components/port/index.vue';
|
||||||
import Environment from '@/views/website/runtime/components/environment/index.vue';
|
import Environment from '@/views/website/runtime/components/environment/index.vue';
|
||||||
import Volumes from '@/views/website/runtime/components/volume/index.vue';
|
import Volumes from '@/views/website/runtime/components/volume/index.vue';
|
||||||
import ExtraHosts from '@/views/website/runtime/components/extra_hosts/index.vue';
|
import ExtraHosts from '@/views/website/runtime/components/extra_hosts/index.vue';
|
||||||
|
|
||||||
import { Runtime } from '@/api/interface/runtime';
|
|
||||||
import { useVModel } from '@vueuse/core';
|
import { useVModel } from '@vueuse/core';
|
||||||
|
import type { PropType } from 'vue';
|
||||||
|
|
||||||
|
interface RuntimeConfigFields {
|
||||||
|
exposedPorts: any[];
|
||||||
|
environments: any[];
|
||||||
|
volumes: any[];
|
||||||
|
extraHosts: any[];
|
||||||
|
}
|
||||||
|
|
||||||
const props = defineProps({
|
const props = defineProps({
|
||||||
modelValue: {
|
modelValue: {
|
||||||
type: Object as PropType<Runtime.Runtime>,
|
type: Object as PropType<RuntimeConfigFields>,
|
||||||
required: true,
|
required: true,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const emit = defineEmits(['update:modelValue']);
|
const emit = defineEmits<{
|
||||||
|
'update:modelValue': [value: RuntimeConfigFields];
|
||||||
|
}>();
|
||||||
|
|
||||||
const runtime = useVModel(props, 'modelValue', emit);
|
const runtime = useVModel(props, 'modelValue', emit);
|
||||||
</script>
|
</script>
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue