feat: support tensorrt LLM speedup (#10905)

This commit is contained in:
CityFun 2025-11-10 16:40:37 +08:00 committed by GitHub
parent da5668ac27
commit 3b73de32ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 149 additions and 19 deletions

View file

@ -14,6 +14,8 @@ type TensorRTLLMCreate struct {
ModelDir string `json:"modelDir" validate:"required"`
Image string `json:"image" validate:"required"`
Command string `json:"command" validate:"required"`
ModelType string `json:"modelType"`
ModelSpeedup bool `json:"modelSpeedup"`
DockerConfig
}

View file

@ -6,8 +6,9 @@ type TensorRTLLM struct {
DockerCompose string `json:"dockerCompose"`
ContainerName string `json:"containerName"`
Message string `json:"message"`
//Port int `json:"port"`
Status string `json:"status"`
Env string `json:"env"`
TaskID string `json:"taskID"`
ModelType string `json:"modelType"`
ModelSpeedup bool `json:"modelSpeedup"`
}

View file

@ -1,6 +1,7 @@
package service
import (
"errors"
"fmt"
"github.com/1Panel-dev/1Panel/agent/app/dto/request"
"github.com/1Panel-dev/1Panel/agent/app/dto/response"
@ -15,7 +16,9 @@ import (
"github.com/1Panel-dev/1Panel/agent/utils/files"
"github.com/subosito/gotenv"
"gopkg.in/yaml.v3"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
@ -52,7 +55,7 @@ func (t TensorRTLLMService) Page(req request.TensorRTLLMSearch) response.TensorR
serverDTO.ModelDir = envs["MODEL_PATH"]
serverDTO.Dir = path.Join(global.Dir.TensorRTLLMDir, item.Name)
serverDTO.Image = envs["IMAGE"]
serverDTO.Command = envs["COMMAND"]
serverDTO.Command = getCommand(item.Env)
for k, v := range envs {
if strings.Contains(k, "CONTAINER_PORT") || strings.Contains(k, "HOST_PORT") {
@ -94,7 +97,7 @@ func (t TensorRTLLMService) Page(req request.TensorRTLLMSearch) response.TensorR
}
var defaultVolumes = map[string]string{
"${MODEL_PATH}": "/models",
"${MODEL_PATH}": "${MODEL_PATH}",
}
for _, volume := range volumes {
exist := false
@ -227,14 +230,21 @@ func (t TensorRTLLMService) Create(create request.TensorRTLLMCreate) error {
}
tensorrtLLMDir := path.Join(global.Dir.TensorRTLLMDir, create.Name)
filesOP := files.NewFileOp()
if !filesOP.Stat(tensorrtLLMDir) {
_ = filesOP.CreateDir(tensorrtLLMDir, 0644)
filesOp := files.NewFileOp()
if !filesOp.Stat(tensorrtLLMDir) {
_ = filesOp.CreateDir(tensorrtLLMDir, 0644)
}
if create.ModelSpeedup {
if err := handleModelArchive(create.ModelType, create.ModelDir); err != nil {
return err
}
}
tensorrtLLM := &model.TensorRTLLM{
Name: create.Name,
ContainerName: create.ContainerName,
Status: constant.StatusStarting,
ModelType: create.ModelType,
ModelSpeedup: create.ModelSpeedup,
}
if err := handleLLMParams(tensorrtLLM, create); err != nil {
@ -247,7 +257,7 @@ func (t TensorRTLLMService) Create(create request.TensorRTLLMCreate) error {
return err
}
dockerComposePath := path.Join(llmDir, "docker-compose.yml")
if err := filesOP.SaveFile(dockerComposePath, tensorrtLLM.DockerCompose, 0644); err != nil {
if err := filesOp.SaveFile(dockerComposePath, tensorrtLLM.DockerCompose, 0644); err != nil {
return err
}
tensorrtLLM.Status = constant.StatusStarting
@ -269,8 +279,10 @@ func (t TensorRTLLMService) Update(req request.TensorRTLLMUpdate) error {
return err
}
}
tensorrtLLM.ModelType = req.ModelType
tensorrtLLM.ModelSpeedup = req.ModelSpeedup
tensorrtLLM.ContainerName = req.ContainerName
if err := handleLLMParams(tensorrtLLM, req.TensorRTLLMCreate); err != nil {
return err
}
@ -381,3 +393,51 @@ func syncTensorRTLLMContainerStatus(tensorrtLLM *model.TensorRTLLM) error {
}
return tensorrtLLMRepo.Save(tensorrtLLM)
}
func findModelArchive(modelType string) (string, error) {
const baseDir = "/home/models"
prefix := fmt.Sprintf("FusionXplay_%s_Accelerator", modelType)
entries, err := os.ReadDir(baseDir)
if err != nil {
return "", fmt.Errorf("failed to read %s: %w", baseDir, err)
}
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
if strings.HasPrefix(name, prefix) && strings.HasSuffix(name, ".tar.gz") {
return filepath.Join(baseDir, name), nil
}
}
return "", errors.New(fmt.Sprintf("no FusionXplay_%s_Accelerator*.tar.gz found in /home/models", modelType))
}
func handleModelArchive(modelType string, modelDir string) error {
filePath, err := findModelArchive(modelType)
if err != nil {
return err
}
fileOp := files.NewFileOp()
if err = fileOp.TarGzExtractPro(filePath, modelDir, ""); err != nil {
return err
}
if err = fileOp.ChmodR(path.Join(modelDir, "fusionxpark_accelerator"), 0755, false); err != nil {
return err
}
return nil
}
func getCommand(envStr string) string {
lines := strings.Split(envStr, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "COMMAND=") {
return strings.TrimPrefix(line, "COMMAND=")
}
}
return ""
}

View file

@ -14,7 +14,7 @@ services:
networks:
- 1panel-network
volumes:
- ${MODEL_PATH}:/models
- ${MODEL_PATH}:${MODEL_PATH}
ipc: host
ulimits:
memlock:

View file

@ -50,6 +50,7 @@ func InitAgentDB() {
migrations.UpdateMonitorInterval,
migrations.AddMonitorProcess,
migrations.UpdateCronJob,
migrations.UpdateTensorrtLLM,
})
if err := m.Migrate(); err != nil {
global.LOG.Error(err)

View file

@ -673,3 +673,10 @@ var UpdateCronJob = &gormigrate.Migration{
return tx.AutoMigrate(&model.Cronjob{})
},
}
var UpdateTensorrtLLM = &gormigrate.Migration{
ID: "20251110-update-tensorrt-llm",
Migrate: func(tx *gorm.DB) error {
return tx.AutoMigrate(&model.TensorRTLLM{})
},
}

View file

@ -746,6 +746,8 @@ const message = {
'After /models in the startup command, the model name needs to be completed; if external access is required, set the port in the command to be the same as the application port',
imageAlert:
'Due to the large image size, it is recommended to manually download the image to the server before installation',
modelSpeedup: 'Enable model acceleration',
modelType: 'Model type',
},
},
container: {

View file

@ -747,6 +747,8 @@ const message = {
'Después de /models en el comando de inicio, se debe completar el nombre del modelo; si se requiere acceso externo, configure el puerto en el comando para que sea el mismo que el puerto de la aplicación',
imageAlert:
'Debido al gran tamaño de la imagen, se recomienda descargar manualmente la imagen al servidor antes de la instalación',
modelSpeedup: 'Habilitar aceleración de modelo',
modelType: 'Tipo de modelo',
},
},
container: {

View file

@ -734,6 +734,8 @@ const message = {
'起動コマンドの /models の後にはモデル名を補完する必要があります外部アクセスが必要な場合はコマンド内のポートをアプリケーションポートと同じに設定してください',
imageAlert:
'イメージサイズが大きいためインストール前にサーバーにイメージを手動でダウンロードすることをお勧めします',
modelSpeedup: 'モデル加速を有効化',
modelType: 'モデルタイプ',
},
},
container: {

View file

@ -729,6 +729,8 @@ const message = {
commandHelper:
'시작 명령의 /models 뒤에는 모델 이름을 완성해야 합니다; 외부 액세스가 필요한 경우 명령의 포트를 애플리케이션 포트와 동일하게 설정하세요',
imageAlert: '이미지 크기가 크므로 설치 전에 서버에 이미지를 수동으로 다운로드하는 것이 좋습니다',
modelSpeedup: '모델 가속 활성화',
modelType: '모델 유형',
},
},
container: {

View file

@ -747,6 +747,8 @@ const message = {
'Selepas /models dalam arahan permulaan, nama model perlu dilengkapkan; jika akses luar diperlukan, tetapkan port dalam arahan sama dengan port aplikasi',
imageAlert:
'Disebabkan saiz imej yang besar, disyorkan untuk memuat turun imej secara manual ke pelayan sebelum pemasangan',
modelSpeedup: 'Dayakan pecutan model',
modelType: 'Jenis model',
},
},
container: {

View file

@ -743,6 +743,8 @@ const message = {
'Após /models no comando de inicialização, o nome do modelo precisa ser completado; se for necessário acesso externo, defina a porta no comando para ser a mesma que a porta do aplicativo',
imageAlert:
'Devido ao grande tamanho da imagem, recomenda-se baixar manualmente a imagem para o servidor antes da instalação',
modelSpeedup: 'Ativar aceleração de modelo',
modelType: 'Tipo de modelo',
},
},
container: {

View file

@ -741,6 +741,8 @@ const message = {
'После /models в команде запуска необходимо указать имя модели; если требуется внешний доступ, установите порт в команде таким же, как порт приложения',
imageAlert:
'Из-за большого размера образа рекомендуется вручную загрузить образ на сервер перед установкой',
modelSpeedup: 'Включить ускорение модели',
modelType: 'Тип модели',
},
},
container: {

View file

@ -755,6 +755,8 @@ const message = {
'Başlatma komutundaki /models sonrasında model adı tamamlanmalıdır; harici erişim gerekiyorsa, komuttaki bağlantı noktasını uygulama bağlantı noktasıyla aynı olacak şekilde ayarlayın',
imageAlert:
'Görüntü boyutu büyük olduğundan, kurulumdan önce görüntüyü sunucuya manuel olarak indirmeniz önerilir',
modelSpeedup: 'Model hızlandırmayı etkinleştir',
modelType: 'Model türü',
},
},
container: {

View file

@ -716,6 +716,8 @@ const message = {
modelDir: '模型目錄',
commandHelper: '啟動指令中的 /models 後需補全模型名稱若需外部訪問請將指令中的埠設定為與應用埠相同',
imageAlert: '由於鏡像較大建議先手動將鏡像下載到伺服器後再進行安裝',
modelSpeedup: '啟用模型加速',
modelType: '模型類型',
},
},
container: {

View file

@ -714,8 +714,10 @@ const message = {
tensorRT: {
llm: 'TensorRT LLM',
modelDir: '模型目录',
commandHelper: '启动命令中的 /models 后需补全模型名称若需外部访问请将命令中的端口设置为与应用端口相同',
commandHelper: '若需外部访问请将命令中的端口设置为与应用端口相同',
imageAlert: '由于镜像较大建议先手动将镜像下载到服务器后再进行安装',
modelSpeedup: '启用模型加速',
modelType: '模型类型',
},
},
container: {

View file

@ -14,6 +14,20 @@
<el-form-item :label="$t('app.version')" prop="version">
<el-input v-model.trim="tensorRTLLM.version" />
</el-form-item>
<div v-if="isFxplay">
<el-form-item :label="$t('aiTools.tensorRT.modelSpeedup')" prop="modelSpeedup">
<el-switch v-model="tensorRTLLM.modelSpeedup" @change="changeModelSpeedup"></el-switch>
</el-form-item>
<el-form-item
:label="$t('aiTools.tensorRT.modelType')"
prop="modelType"
v-if="tensorRTLLM.modelSpeedup"
>
<el-select v-model="tensorRTLLM.modelType">
<el-option label="Qwen3" value="Qwen3" />
</el-select>
</el-form-item>
</div>
<el-form-item :label="$t('aiTools.tensorRT.modelDir')" prop="modelDir">
<el-input v-model="tensorRTLLM.modelDir">
<template #prepend>
@ -52,6 +66,8 @@ import i18n from '@/lang';
import { ElForm, FormInstance } from 'element-plus';
import { createTensorRTLLM, updateTensorRTLLM } from '@/api/modules/ai';
import { MsgSuccess } from '@/utils/message';
import { useGlobalStore } from '@/composables/useGlobalStore';
const { isFxplay } = useGlobalStore();
const loading = ref(false);
const mode = ref('create');
@ -63,11 +79,13 @@ const newTensorRTLLM = () => {
version: '1.2.0rc0',
modelDir: '',
image: 'nvcr.io/nvidia/tensorrt-llm/release',
command: 'bash -c "trtllm-serve /models/ --host 0.0.0.0 --port 8000"',
command: 'bash -c "trtllm-serve ${MODEL_PATH} --host 0.0.0.0 --port 8000"',
exposedPorts: [],
environments: [],
extraHosts: [],
volumes: [],
modelSpeedup: false,
modelType: 'Qwen3',
};
};
const modelDirRef = ref();
@ -106,6 +124,15 @@ const getModelDir = (path: string) => {
tensorRTLLM.value.modelDir = path;
};
const changeModelSpeedup = () => {
if (tensorRTLLM.value.modelSpeedup) {
tensorRTLLM.value.command =
'bash -c "${MODEL_PATH}/fusionxpark_accelerator --model_path ${MODEL_PATH} --host 0.0.0.0 --port 8000"';
} else {
tensorRTLLM.value.command = 'bash -c "trtllm-serve ${MODEL_PATH} --host 0.0.0.0 --port 8000"';
}
};
const rules = reactive({
name: [Rules.requiredInput],
version: [Rules.requiredInput],
@ -113,6 +140,7 @@ const rules = reactive({
containerName: [Rules.requiredInput],
image: [Rules.requiredInput],
command: [Rules.requiredInput],
modelType: [Rules.requiredSelect],
});
const formRef = ref<FormInstance>();

View file

@ -6,7 +6,9 @@
<el-tab-pane :label="$t('runtime.environment')">
<Environment :environments="runtime.environments" />
</el-tab-pane>
<el-tab-pane :label="$t('container.mount')"><Volumes :volumes="runtime.volumes" /></el-tab-pane>
<el-tab-pane :label="$t('container.mount')">
<Volumes :volumes="runtime.volumes" />
</el-tab-pane>
<el-tab-pane :label="$t('runtime.extraHosts')">
<ExtraHosts :extraHosts="runtime.extraHosts" />
</el-tab-pane>
@ -18,17 +20,26 @@ import PortConfig from '@/views/website/runtime/components/port/index.vue';
import Environment from '@/views/website/runtime/components/environment/index.vue';
import Volumes from '@/views/website/runtime/components/volume/index.vue';
import ExtraHosts from '@/views/website/runtime/components/extra_hosts/index.vue';
import { Runtime } from '@/api/interface/runtime';
import { useVModel } from '@vueuse/core';
import type { PropType } from 'vue';
interface RuntimeConfigFields {
exposedPorts: any[];
environments: any[];
volumes: any[];
extraHosts: any[];
}
const props = defineProps({
modelValue: {
type: Object as PropType<Runtime.Runtime>,
type: Object as PropType<RuntimeConfigFields>,
required: true,
},
});
const emit = defineEmits(['update:modelValue']);
const emit = defineEmits<{
'update:modelValue': [value: RuntimeConfigFields];
}>();
const runtime = useVModel(props, 'modelValue', emit);
</script>