mirror of
https://github.com/1Panel-dev/1Panel.git
synced 2025-12-17 12:58:51 +08:00
feat: support tensorrt LLM speedup (#10905)
This commit is contained in:
parent
da5668ac27
commit
3b73de32ad
18 changed files with 149 additions and 19 deletions
|
|
@ -14,6 +14,8 @@ type TensorRTLLMCreate struct {
|
|||
ModelDir string `json:"modelDir" validate:"required"`
|
||||
Image string `json:"image" validate:"required"`
|
||||
Command string `json:"command" validate:"required"`
|
||||
ModelType string `json:"modelType"`
|
||||
ModelSpeedup bool `json:"modelSpeedup"`
|
||||
DockerConfig
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,9 @@ type TensorRTLLM struct {
|
|||
DockerCompose string `json:"dockerCompose"`
|
||||
ContainerName string `json:"containerName"`
|
||||
Message string `json:"message"`
|
||||
//Port int `json:"port"`
|
||||
Status string `json:"status"`
|
||||
Env string `json:"env"`
|
||||
TaskID string `json:"taskID"`
|
||||
Status string `json:"status"`
|
||||
Env string `json:"env"`
|
||||
TaskID string `json:"taskID"`
|
||||
ModelType string `json:"modelType"`
|
||||
ModelSpeedup bool `json:"modelSpeedup"`
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package service
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/1Panel-dev/1Panel/agent/app/dto/request"
|
||||
"github.com/1Panel-dev/1Panel/agent/app/dto/response"
|
||||
|
|
@ -15,7 +16,9 @@ import (
|
|||
"github.com/1Panel-dev/1Panel/agent/utils/files"
|
||||
"github.com/subosito/gotenv"
|
||||
"gopkg.in/yaml.v3"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
|
@ -52,7 +55,7 @@ func (t TensorRTLLMService) Page(req request.TensorRTLLMSearch) response.TensorR
|
|||
serverDTO.ModelDir = envs["MODEL_PATH"]
|
||||
serverDTO.Dir = path.Join(global.Dir.TensorRTLLMDir, item.Name)
|
||||
serverDTO.Image = envs["IMAGE"]
|
||||
serverDTO.Command = envs["COMMAND"]
|
||||
serverDTO.Command = getCommand(item.Env)
|
||||
|
||||
for k, v := range envs {
|
||||
if strings.Contains(k, "CONTAINER_PORT") || strings.Contains(k, "HOST_PORT") {
|
||||
|
|
@ -94,7 +97,7 @@ func (t TensorRTLLMService) Page(req request.TensorRTLLMSearch) response.TensorR
|
|||
}
|
||||
|
||||
var defaultVolumes = map[string]string{
|
||||
"${MODEL_PATH}": "/models",
|
||||
"${MODEL_PATH}": "${MODEL_PATH}",
|
||||
}
|
||||
for _, volume := range volumes {
|
||||
exist := false
|
||||
|
|
@ -227,14 +230,21 @@ func (t TensorRTLLMService) Create(create request.TensorRTLLMCreate) error {
|
|||
}
|
||||
|
||||
tensorrtLLMDir := path.Join(global.Dir.TensorRTLLMDir, create.Name)
|
||||
filesOP := files.NewFileOp()
|
||||
if !filesOP.Stat(tensorrtLLMDir) {
|
||||
_ = filesOP.CreateDir(tensorrtLLMDir, 0644)
|
||||
filesOp := files.NewFileOp()
|
||||
if !filesOp.Stat(tensorrtLLMDir) {
|
||||
_ = filesOp.CreateDir(tensorrtLLMDir, 0644)
|
||||
}
|
||||
if create.ModelSpeedup {
|
||||
if err := handleModelArchive(create.ModelType, create.ModelDir); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
tensorrtLLM := &model.TensorRTLLM{
|
||||
Name: create.Name,
|
||||
ContainerName: create.ContainerName,
|
||||
Status: constant.StatusStarting,
|
||||
ModelType: create.ModelType,
|
||||
ModelSpeedup: create.ModelSpeedup,
|
||||
}
|
||||
|
||||
if err := handleLLMParams(tensorrtLLM, create); err != nil {
|
||||
|
|
@ -247,7 +257,7 @@ func (t TensorRTLLMService) Create(create request.TensorRTLLMCreate) error {
|
|||
return err
|
||||
}
|
||||
dockerComposePath := path.Join(llmDir, "docker-compose.yml")
|
||||
if err := filesOP.SaveFile(dockerComposePath, tensorrtLLM.DockerCompose, 0644); err != nil {
|
||||
if err := filesOp.SaveFile(dockerComposePath, tensorrtLLM.DockerCompose, 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
tensorrtLLM.Status = constant.StatusStarting
|
||||
|
|
@ -269,8 +279,10 @@ func (t TensorRTLLMService) Update(req request.TensorRTLLMUpdate) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
tensorrtLLM.ModelType = req.ModelType
|
||||
tensorrtLLM.ModelSpeedup = req.ModelSpeedup
|
||||
tensorrtLLM.ContainerName = req.ContainerName
|
||||
|
||||
if err := handleLLMParams(tensorrtLLM, req.TensorRTLLMCreate); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -381,3 +393,51 @@ func syncTensorRTLLMContainerStatus(tensorrtLLM *model.TensorRTLLM) error {
|
|||
}
|
||||
return tensorrtLLMRepo.Save(tensorrtLLM)
|
||||
}
|
||||
|
||||
func findModelArchive(modelType string) (string, error) {
|
||||
const baseDir = "/home/models"
|
||||
prefix := fmt.Sprintf("FusionXplay_%s_Accelerator", modelType)
|
||||
|
||||
entries, err := os.ReadDir(baseDir)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read %s: %w", baseDir, err)
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := entry.Name()
|
||||
if strings.HasPrefix(name, prefix) && strings.HasSuffix(name, ".tar.gz") {
|
||||
return filepath.Join(baseDir, name), nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", errors.New(fmt.Sprintf("no FusionXplay_%s_Accelerator*.tar.gz found in /home/models", modelType))
|
||||
}
|
||||
|
||||
func handleModelArchive(modelType string, modelDir string) error {
|
||||
filePath, err := findModelArchive(modelType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fileOp := files.NewFileOp()
|
||||
if err = fileOp.TarGzExtractPro(filePath, modelDir, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = fileOp.ChmodR(path.Join(modelDir, "fusionxpark_accelerator"), 0755, false); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCommand(envStr string) string {
|
||||
lines := strings.Split(envStr, "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "COMMAND=") {
|
||||
return strings.TrimPrefix(line, "COMMAND=")
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ services:
|
|||
networks:
|
||||
- 1panel-network
|
||||
volumes:
|
||||
- ${MODEL_PATH}:/models
|
||||
- ${MODEL_PATH}:${MODEL_PATH}
|
||||
ipc: host
|
||||
ulimits:
|
||||
memlock:
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ func InitAgentDB() {
|
|||
migrations.UpdateMonitorInterval,
|
||||
migrations.AddMonitorProcess,
|
||||
migrations.UpdateCronJob,
|
||||
migrations.UpdateTensorrtLLM,
|
||||
})
|
||||
if err := m.Migrate(); err != nil {
|
||||
global.LOG.Error(err)
|
||||
|
|
|
|||
|
|
@ -673,3 +673,10 @@ var UpdateCronJob = &gormigrate.Migration{
|
|||
return tx.AutoMigrate(&model.Cronjob{})
|
||||
},
|
||||
}
|
||||
|
||||
var UpdateTensorrtLLM = &gormigrate.Migration{
|
||||
ID: "20251110-update-tensorrt-llm",
|
||||
Migrate: func(tx *gorm.DB) error {
|
||||
return tx.AutoMigrate(&model.TensorRTLLM{})
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -746,6 +746,8 @@ const message = {
|
|||
'After /models in the startup command, the model name needs to be completed; if external access is required, set the port in the command to be the same as the application port',
|
||||
imageAlert:
|
||||
'Due to the large image size, it is recommended to manually download the image to the server before installation',
|
||||
modelSpeedup: 'Enable model acceleration',
|
||||
modelType: 'Model type',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -747,6 +747,8 @@ const message = {
|
|||
'Después de /models en el comando de inicio, se debe completar el nombre del modelo; si se requiere acceso externo, configure el puerto en el comando para que sea el mismo que el puerto de la aplicación',
|
||||
imageAlert:
|
||||
'Debido al gran tamaño de la imagen, se recomienda descargar manualmente la imagen al servidor antes de la instalación',
|
||||
modelSpeedup: 'Habilitar aceleración de modelo',
|
||||
modelType: 'Tipo de modelo',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -734,6 +734,8 @@ const message = {
|
|||
'起動コマンドの /models の後にはモデル名を補完する必要があります;外部アクセスが必要な場合は、コマンド内のポートをアプリケーションポートと同じに設定してください',
|
||||
imageAlert:
|
||||
'イメージサイズが大きいため、インストール前にサーバーにイメージを手動でダウンロードすることをお勧めします',
|
||||
modelSpeedup: 'モデル加速を有効化',
|
||||
modelType: 'モデルタイプ',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -729,6 +729,8 @@ const message = {
|
|||
commandHelper:
|
||||
'시작 명령의 /models 뒤에는 모델 이름을 완성해야 합니다; 외부 액세스가 필요한 경우 명령의 포트를 애플리케이션 포트와 동일하게 설정하세요',
|
||||
imageAlert: '이미지 크기가 크므로 설치 전에 서버에 이미지를 수동으로 다운로드하는 것이 좋습니다',
|
||||
modelSpeedup: '모델 가속 활성화',
|
||||
modelType: '모델 유형',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -747,6 +747,8 @@ const message = {
|
|||
'Selepas /models dalam arahan permulaan, nama model perlu dilengkapkan; jika akses luar diperlukan, tetapkan port dalam arahan sama dengan port aplikasi',
|
||||
imageAlert:
|
||||
'Disebabkan saiz imej yang besar, disyorkan untuk memuat turun imej secara manual ke pelayan sebelum pemasangan',
|
||||
modelSpeedup: 'Dayakan pecutan model',
|
||||
modelType: 'Jenis model',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -743,6 +743,8 @@ const message = {
|
|||
'Após /models no comando de inicialização, o nome do modelo precisa ser completado; se for necessário acesso externo, defina a porta no comando para ser a mesma que a porta do aplicativo',
|
||||
imageAlert:
|
||||
'Devido ao grande tamanho da imagem, recomenda-se baixar manualmente a imagem para o servidor antes da instalação',
|
||||
modelSpeedup: 'Ativar aceleração de modelo',
|
||||
modelType: 'Tipo de modelo',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -741,6 +741,8 @@ const message = {
|
|||
'После /models в команде запуска необходимо указать имя модели; если требуется внешний доступ, установите порт в команде таким же, как порт приложения',
|
||||
imageAlert:
|
||||
'Из-за большого размера образа рекомендуется вручную загрузить образ на сервер перед установкой',
|
||||
modelSpeedup: 'Включить ускорение модели',
|
||||
modelType: 'Тип модели',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -755,6 +755,8 @@ const message = {
|
|||
'Başlatma komutundaki /models sonrasında model adı tamamlanmalıdır; harici erişim gerekiyorsa, komuttaki bağlantı noktasını uygulama bağlantı noktasıyla aynı olacak şekilde ayarlayın',
|
||||
imageAlert:
|
||||
'Görüntü boyutu büyük olduğundan, kurulumdan önce görüntüyü sunucuya manuel olarak indirmeniz önerilir',
|
||||
modelSpeedup: 'Model hızlandırmayı etkinleştir',
|
||||
modelType: 'Model türü',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -716,6 +716,8 @@ const message = {
|
|||
modelDir: '模型目錄',
|
||||
commandHelper: '啟動指令中的 /models 後需補全模型名稱;若需外部訪問,請將指令中的埠設定為與應用埠相同',
|
||||
imageAlert: '由於鏡像較大,建議先手動將鏡像下載到伺服器後再進行安裝',
|
||||
modelSpeedup: '啟用模型加速',
|
||||
modelType: '模型類型',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -714,8 +714,10 @@ const message = {
|
|||
tensorRT: {
|
||||
llm: 'TensorRT LLM',
|
||||
modelDir: '模型目录',
|
||||
commandHelper: '启动命令中的 /models 后需补全模型名称;若需外部访问,请将命令中的端口设置为与应用端口相同',
|
||||
commandHelper: '若需外部访问,请将命令中的端口设置为与应用端口相同',
|
||||
imageAlert: '由于镜像较大,建议先手动将镜像下载到服务器后再进行安装',
|
||||
modelSpeedup: '启用模型加速',
|
||||
modelType: '模型类型',
|
||||
},
|
||||
},
|
||||
container: {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,20 @@
|
|||
<el-form-item :label="$t('app.version')" prop="version">
|
||||
<el-input v-model.trim="tensorRTLLM.version" />
|
||||
</el-form-item>
|
||||
<div v-if="isFxplay">
|
||||
<el-form-item :label="$t('aiTools.tensorRT.modelSpeedup')" prop="modelSpeedup">
|
||||
<el-switch v-model="tensorRTLLM.modelSpeedup" @change="changeModelSpeedup"></el-switch>
|
||||
</el-form-item>
|
||||
<el-form-item
|
||||
:label="$t('aiTools.tensorRT.modelType')"
|
||||
prop="modelType"
|
||||
v-if="tensorRTLLM.modelSpeedup"
|
||||
>
|
||||
<el-select v-model="tensorRTLLM.modelType">
|
||||
<el-option label="Qwen3" value="Qwen3" />
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
</div>
|
||||
<el-form-item :label="$t('aiTools.tensorRT.modelDir')" prop="modelDir">
|
||||
<el-input v-model="tensorRTLLM.modelDir">
|
||||
<template #prepend>
|
||||
|
|
@ -52,6 +66,8 @@ import i18n from '@/lang';
|
|||
import { ElForm, FormInstance } from 'element-plus';
|
||||
import { createTensorRTLLM, updateTensorRTLLM } from '@/api/modules/ai';
|
||||
import { MsgSuccess } from '@/utils/message';
|
||||
import { useGlobalStore } from '@/composables/useGlobalStore';
|
||||
const { isFxplay } = useGlobalStore();
|
||||
|
||||
const loading = ref(false);
|
||||
const mode = ref('create');
|
||||
|
|
@ -63,11 +79,13 @@ const newTensorRTLLM = () => {
|
|||
version: '1.2.0rc0',
|
||||
modelDir: '',
|
||||
image: 'nvcr.io/nvidia/tensorrt-llm/release',
|
||||
command: 'bash -c "trtllm-serve /models/ --host 0.0.0.0 --port 8000"',
|
||||
command: 'bash -c "trtllm-serve ${MODEL_PATH} --host 0.0.0.0 --port 8000"',
|
||||
exposedPorts: [],
|
||||
environments: [],
|
||||
extraHosts: [],
|
||||
volumes: [],
|
||||
modelSpeedup: false,
|
||||
modelType: 'Qwen3',
|
||||
};
|
||||
};
|
||||
const modelDirRef = ref();
|
||||
|
|
@ -106,6 +124,15 @@ const getModelDir = (path: string) => {
|
|||
tensorRTLLM.value.modelDir = path;
|
||||
};
|
||||
|
||||
const changeModelSpeedup = () => {
|
||||
if (tensorRTLLM.value.modelSpeedup) {
|
||||
tensorRTLLM.value.command =
|
||||
'bash -c "${MODEL_PATH}/fusionxpark_accelerator --model_path ${MODEL_PATH} --host 0.0.0.0 --port 8000"';
|
||||
} else {
|
||||
tensorRTLLM.value.command = 'bash -c "trtllm-serve ${MODEL_PATH} --host 0.0.0.0 --port 8000"';
|
||||
}
|
||||
};
|
||||
|
||||
const rules = reactive({
|
||||
name: [Rules.requiredInput],
|
||||
version: [Rules.requiredInput],
|
||||
|
|
@ -113,6 +140,7 @@ const rules = reactive({
|
|||
containerName: [Rules.requiredInput],
|
||||
image: [Rules.requiredInput],
|
||||
command: [Rules.requiredInput],
|
||||
modelType: [Rules.requiredSelect],
|
||||
});
|
||||
|
||||
const formRef = ref<FormInstance>();
|
||||
|
|
|
|||
|
|
@ -6,7 +6,9 @@
|
|||
<el-tab-pane :label="$t('runtime.environment')">
|
||||
<Environment :environments="runtime.environments" />
|
||||
</el-tab-pane>
|
||||
<el-tab-pane :label="$t('container.mount')"><Volumes :volumes="runtime.volumes" /></el-tab-pane>
|
||||
<el-tab-pane :label="$t('container.mount')">
|
||||
<Volumes :volumes="runtime.volumes" />
|
||||
</el-tab-pane>
|
||||
<el-tab-pane :label="$t('runtime.extraHosts')">
|
||||
<ExtraHosts :extraHosts="runtime.extraHosts" />
|
||||
</el-tab-pane>
|
||||
|
|
@ -18,17 +20,26 @@ import PortConfig from '@/views/website/runtime/components/port/index.vue';
|
|||
import Environment from '@/views/website/runtime/components/environment/index.vue';
|
||||
import Volumes from '@/views/website/runtime/components/volume/index.vue';
|
||||
import ExtraHosts from '@/views/website/runtime/components/extra_hosts/index.vue';
|
||||
|
||||
import { Runtime } from '@/api/interface/runtime';
|
||||
import { useVModel } from '@vueuse/core';
|
||||
import type { PropType } from 'vue';
|
||||
|
||||
interface RuntimeConfigFields {
|
||||
exposedPorts: any[];
|
||||
environments: any[];
|
||||
volumes: any[];
|
||||
extraHosts: any[];
|
||||
}
|
||||
|
||||
const props = defineProps({
|
||||
modelValue: {
|
||||
type: Object as PropType<Runtime.Runtime>,
|
||||
type: Object as PropType<RuntimeConfigFields>,
|
||||
required: true,
|
||||
},
|
||||
});
|
||||
|
||||
const emit = defineEmits(['update:modelValue']);
|
||||
const emit = defineEmits<{
|
||||
'update:modelValue': [value: RuntimeConfigFields];
|
||||
}>();
|
||||
|
||||
const runtime = useVModel(props, 'modelValue', emit);
|
||||
</script>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue