feat: 增加LM Studio服务器支持

- 新增LM Studio服务器支持,可以通过--lms-url参数指定LM Studio服务器
- 优化wake-url参数为可选配置,不再强制要求配置唤醒服务器
- 根据服务器类型动态调整API端点路径
- 改进错误信息和日志输出,更好地区分服务器类型
- 重构配置验证逻辑,确保OLLAMA_URL和LMS_URL不会同时配置
This commit is contained in:
yshtcn 2025-02-03 17:48:38 +08:00
parent 7ca4144913
commit f3944e5a62
2 changed files with 179 additions and 38 deletions

View File

@ -14,19 +14,21 @@ logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 解析命令行参数
parser = argparse.ArgumentParser(description='Ollama代理服务器')
parser = argparse.ArgumentParser(description='代理服务器')
parser.add_argument('--ollama-url', help='Ollama服务器URL')
parser.add_argument('--wake-url', help='唤醒服务器URL')
parser.add_argument('--lms-url', help='LM Studio服务器URL')
parser.add_argument('--wake-url', help='唤醒服务器URL可选')
parser.add_argument('--timeout', type=int, help='简单请求的超时时间(秒)')
parser.add_argument('--model-timeout', type=int, help='模型推理请求的超时时间(秒)')
parser.add_argument('--port', type=int, help='代理服务器端口')
parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)')
parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)仅在配置wake-url时有效')
parser.add_argument('--cache-duration', type=int, help='模型列表缓存有效期(分钟)默认1440分钟(1天)')
args = parser.parse_args()
# 配置常量,优先使用环境变量,其次使用命令行参数
OLLAMA_URL = os.getenv('OLLAMA_URL') or args.ollama_url
LMS_URL = os.getenv('LMS_URL') or args.lms_url
WAKE_URL = os.getenv('WAKE_URL') or args.wake_url
TIMEOUT_SECONDS = os.getenv('TIMEOUT_SECONDS') or args.timeout
MODEL_TIMEOUT_SECONDS = int(os.getenv('MODEL_TIMEOUT_SECONDS') or args.model_timeout or 30) # 默认30秒
@ -34,12 +36,28 @@ PORT = os.getenv('PORT') or args.port
WAKE_INTERVAL = int(os.getenv('WAKE_INTERVAL') or args.wake_interval)
CACHE_DURATION = int(os.getenv('CACHE_DURATION') or args.cache_duration or 1440) # 默认1天
# 检查必要参数
# 检查URL配置
if OLLAMA_URL and LMS_URL:
logger.error("不能同时配置 OLLAMA_URL 和 LMS_URL请只选择其中一个")
sys.exit(1)
elif not (OLLAMA_URL or LMS_URL):
logger.error("必须配置 OLLAMA_URL 或 LMS_URL 其中之一")
sys.exit(1)
# 设置服务器类型和基础URL
if OLLAMA_URL:
server_type = 'ollama'
BASE_URL = OLLAMA_URL
MODEL_LIST_PATH = 'api/tags'
GENERATE_ENDPOINTS = ["api/generate", "api/chat"]
else:
server_type = 'lmstudio'
BASE_URL = LMS_URL
MODEL_LIST_PATH = 'v1/models'
GENERATE_ENDPOINTS = ["v1/chat/completions"]
# 检查其他必要参数
missing_params = []
if not OLLAMA_URL:
missing_params.append("OLLAMA_URL")
if not WAKE_URL:
missing_params.append("WAKE_URL")
if not TIMEOUT_SECONDS:
missing_params.append("TIMEOUT_SECONDS")
if not PORT:
@ -67,13 +85,19 @@ models_cache_time = None
async def should_wake():
"""检查是否需要发送唤醒请求"""
if not WAKE_URL: # 如果没有配置WAKE_URL永远不需要唤醒
return False
global last_wake_time
if last_wake_time is None:
return True
return datetime.now() - last_wake_time > timedelta(minutes=WAKE_INTERVAL)
async def wake_ollama():
"""唤醒 Ollama 服务器"""
"""唤醒服务器"""
if not WAKE_URL: # 如果没有配置WAKE_URL直接返回
return
global last_wake_time
try:
async with httpx.AsyncClient() as client:
@ -99,16 +123,6 @@ async def update_models_cache(data):
models_cache_time = datetime.now()
logger.info("模型列表缓存已更新")
# 输出当前配置
logger.info(f"使用配置:")
logger.info(f"OLLAMA_URL: {OLLAMA_URL}")
logger.info(f"WAKE_URL: {WAKE_URL}")
logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
logger.info(f"PORT: {PORT}")
logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
logger.info(f"CACHE_DURATION: {CACHE_DURATION} minutes")
app = FastAPI()
@app.get("/health")
@ -124,7 +138,7 @@ async def list_models():
async with httpx.AsyncClient() as client:
response = await client.get(
f"{OLLAMA_URL}/api/tags",
f"{BASE_URL}/{MODEL_LIST_PATH}",
timeout=TIMEOUT_SECONDS # 使用较短的超时时间
)
# 更新缓存并返回最新数据
@ -158,12 +172,12 @@ async def proxy(request: Request, path: str):
return await health_check()
# 其他请求的处理逻辑
if await should_wake():
if WAKE_URL and await should_wake():
logger.info("距离上次唤醒已超过设定时间,发送预防性唤醒请求")
await wake_ollama()
try:
target_url = f"{OLLAMA_URL}/{path}"
target_url = f"{BASE_URL}/{path}"
headers = dict(request.headers)
headers.pop('host', None)
headers.pop('connection', None)
@ -172,10 +186,10 @@ async def proxy(request: Request, path: str):
headers.pop('transfer-encoding', None)
# 根据请求类型选择不同的超时时间
timeout = TIMEOUT_SECONDS if path == "api/tags" else MODEL_TIMEOUT_SECONDS
timeout = TIMEOUT_SECONDS if path == MODEL_LIST_PATH else MODEL_TIMEOUT_SECONDS
# 检查是否为生成相关的端点
is_generate_endpoint = path in ["api/generate", "api/chat"]
is_generate_endpoint = path in GENERATE_ENDPOINTS
if is_generate_endpoint and request.method == "POST":
request_body = await request.json()
@ -223,7 +237,7 @@ async def proxy(request: Request, path: str):
)
# 如果是标签列表请求且成功,更新缓存
if path == "api/tags" and request.method == "GET" and response.status_code == 200:
if path == MODEL_LIST_PATH and request.method == "GET" and response.status_code == 200:
await update_models_cache(response.json())
return Response(
@ -233,25 +247,31 @@ async def proxy(request: Request, path: str):
)
except httpx.TimeoutException:
logger.warning("Ollama服务器超时发送唤醒请求")
# 如果是标签列表请求,尝试返回缓存
if path == "api/tags" and request.method == "GET":
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的标签列表")
return JSONResponse(content=cached_models)
# 直接异步发送唤醒请求,不等待结果
asyncio.create_task(wake_ollama())
error_msg = "服务器超时"
if WAKE_URL:
error_msg += ",正在尝试唤醒"
logger.warning(f"{error_msg}")
# 如果是模型列表请求,尝试返回缓存
if path == MODEL_LIST_PATH and request.method == "GET":
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的模型列表")
return JSONResponse(content=cached_models)
# 直接异步发送唤醒请求,不等待结果
asyncio.create_task(wake_ollama())
else:
logger.warning(error_msg)
return JSONResponse(
status_code=503,
content={"message": "服务器正在唤醒中,请稍后重试"}
content={"message": f"{error_msg},请稍后重试"}
)
except httpx.RequestError as e:
logger.error(f"请求错误: {str(e)}")
# 如果是标签列表请求,尝试返回缓存
if path == "api/tags" and request.method == "GET":
if path == MODEL_LIST_PATH and request.method == "GET":
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的标签列表")
@ -259,7 +279,7 @@ async def proxy(request: Request, path: str):
return JSONResponse(
status_code=502,
content={"message": f"无法连接到Ollama服务器: {str(e)}"}
content={"message": f"无法连接到服务器: {str(e)}"}
)
except Exception as e:
@ -269,6 +289,20 @@ async def proxy(request: Request, path: str):
content={"message": f"代理请求失败: {str(e)}"}
)
# 输出当前配置
logger.info(f"使用配置:")
logger.info(f"服务器类型: {server_type}")
logger.info(f"BASE_URL: {BASE_URL}")
if WAKE_URL:
logger.info(f"WAKE_URL: {WAKE_URL}")
logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
else:
logger.info("未配置唤醒功能")
logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
logger.info(f"PORT: {PORT}")
logger.info(f"CACHE_DURATION: {CACHE_DURATION} minutes")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=PORT)

View File

@ -0,0 +1,107 @@
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser
# 检查是否以管理员权限运行
if (-NOT ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")) {
# 请求管理员权限
Start-Process powershell -ArgumentList "-NoProfile -ExecutionPolicy Bypass -File `"$PSCommandPath`"" -Verb RunAs
exit
}
# 检查 Docker 是否已安装并可用
$dockerPath = Get-Command docker -ErrorAction SilentlyContinue
if (-not $dockerPath) {
Write-Host "未找到 Docker 命令。请检查:" -ForegroundColor Red
Write-Host "1. Docker Desktop 是否已安装" -ForegroundColor Yellow
Write-Host "2. Docker Desktop 是否正在运行" -ForegroundColor Yellow
Write-Host "3. 环境变量是否正确设置" -ForegroundColor Yellow
Write-Host "`n典型的 Docker 安装路径为C:\Program Files\Docker\Docker\resources\bin" -ForegroundColor Yellow
Write-Host "您可能需要将此路径添加到系统的 PATH 环境变量中" -ForegroundColor Yellow
$response = Read-Host "是否要打开系统环境变量设置?(Y/N)"
if ($response -eq 'Y' -or $response -eq 'y') {
Start-Process "SystemPropertiesAdvanced.exe"
}
exit
}
# 检查 Docker 服务是否运行
try {
$dockerVersion = docker version
if ($LASTEXITCODE -ne 0) {
throw "Docker 服务未运行"
}
} catch {
Write-Host "Docker 服务似乎没有正常运行。请检查:" -ForegroundColor Red
Write-Host "1. Docker Desktop 是否已启动" -ForegroundColor Yellow
Write-Host "2. 等待 Docker Desktop 完全启动" -ForegroundColor Yellow
exit
}
# 切换到脚本所在目录
Set-Location $PSScriptRoot
Write-Host "当前目录已切换为脚本所在目录: $PSScriptRoot"
# 获取当前日期和时间
$dateTime = Get-Date -Format "yyyyMMdd"
Write-Host "当前日期: $dateTime"
# 提示输入并获取版本号最后一位
$revision = Read-Host -Prompt "请输入Test版本号 ($dateTime,如果没有次数,请直接回车)"
Write-Host "输入的版本号: $revision"
# 构造版本号
if ([string]::IsNullOrWhiteSpace($revision)) {
$version = "$dateTime"
} else {
$version = "$dateTime" + "Test_$revision"
}
Write-Host "完整的版本号: $version"
# 构建带完整版本号标签的 Docker 镜像
Write-Host "正在构建 Docker 镜像..."
$tempFileBuild = [System.IO.Path]::GetTempFileName()
docker build -t yshtcn/ollama-proxy:$version . 2> $tempFileBuild
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像构建失败" -ForegroundColor Red
Write-Host (Get-Content $tempFileBuild) -ForegroundColor Red
Remove-Item $tempFileBuild
exit
}
Write-Host "Docker 镜像构建成功"
Remove-Item $tempFileBuild
# 推送带完整版本号标签的 Docker 镜像到 Docker Hub
Write-Host "正在推送 Docker 镜像到 Docker Hub..."
$tempFilePush = [System.IO.Path]::GetTempFileName()
docker push yshtcn/ollama-proxy:$version 2> $tempFilePush
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像推送失败" -ForegroundColor Red
Write-Host (Get-Content $tempFilePush) -ForegroundColor Red
Remove-Item $tempFilePush
exit
}
Write-Host "Docker 镜像推送成功"
Remove-Item $tempFilePush
# 为镜像打上 'latest' 标签并推送
Write-Host "正在为镜像打上 'test' 标签并推送..."
$tempFilePushLatest = [System.IO.Path]::GetTempFileName()
docker tag yshtcn/ollama-proxy:$version yshtcn/ollama-proxy:test
docker push yshtcn/ollama-proxy:test 2> $tempFilePushLatest
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像 'test' 标签推送失败" -ForegroundColor Red
Write-Host (Get-Content $tempFilePushLatest) -ForegroundColor Red
Remove-Item $tempFilePushLatest
exit
}
Write-Host "Docker 镜像 'test' 标签推送成功"
Remove-Item $tempFilePushLatest
Write-Host "Docker 镜像构建和推送全部完成"
# 等待用户确认后再关闭
Write-Host "`n按回车键退出..." -ForegroundColor Green
$null = Read-Host