feat: 增加LM Studio服务器支持

- 新增LM Studio服务器支持，可以通过--lms-url参数指定LM Studio服务器 - 优化wake-url参数为可选配置，不再强制要求配置唤醒服务器 - 根据服务器类型动态调整API端点路径 - 改进错误信息和日志输出，更好地区分服务器类型 - 重构配置验证逻辑，确保OLLAMA_URL和LMS_URL不会同时配置
fix: 优化流式传输的超时处理
2026-04-30 01:46:58 +08:00 · 2025-02-03 17:48:38 +08:00 · 2025-01-27 18:40:15 +08:00 · 2025-01-27 18:35:30 +08:00
4 changed files with 278 additions and 85 deletions
--- a/.env.example
+++ b/.env.example
@@ -3,3 +3,5 @@ WAKE_URL=http://your-wake-server:9090/wol?mac=XX:XX:XX:XX:XX:XX
 TIMEOUT_SECONDS=1
 PORT=11434
 MODEL_TIMEOUT_SECONDS=30  # 模型推理请求的超时时间（秒）
 WAKE_INTERVAL=10  # 唤醒间隔时间（分钟）
 CACHE_DURATION=1440  # 模型列表缓存有效期（分钟，默认1天）
--- a/README.md
+++ b/README.md
@@ -43,11 +43,11 @@ Ollama Proxy 是一个为 Ollama 服务设计的智能代理服务器，它提
 ### 3. 模型列表缓存
 - 缓存 `/api/tags` 接口返回的模型列表
- 缓存有效期为30分钟
+- 可配置缓存有效期，默认为1440分钟（1天）
- 当主服务不可用时返回缓存数据
+- 当主服务不可用时返回缓存数据，确保客户端始终可以获取模型列表
 ### 4. 健康检查
- 提供 `/health` 端点进行健康状态检查
+- 提供 `  ` 端点进行健康状态检查
 - Docker 容器集成了健康检查配置
 ## 配置参数
@@ -62,6 +62,7 @@ Ollama Proxy 是一个为 Ollama 服务设计的智能代理服务器，它提
 | `--model-timeout` | `MODEL_TIMEOUT_SECONDS` | 模型推理请求超时时间(秒) | 30 |
 | `--port` | `PORT` | 代理服务器端口 | 11434 |
 | `--wake-interval` | `WAKE_INTERVAL` | 唤醒间隔时间(分钟) | 10 |
 | `--cache-duration` | `CACHE_DURATION` | 模型列表缓存有效期(分钟) | 1440 |
 ## 部署方式
@@ -81,6 +82,9 @@ docker run -d \
  -e OLLAMA_URL=http://localhost:11434 \
  -e WAKE_URL=http://localhost:11434/api/generate \
  -e TIMEOUT_SECONDS=10 \
  -e MODEL_TIMEOUT_SECONDS=30 \
  -e WAKE_INTERVAL=10 \
  -e CACHE_DURATION=1440 \
  -e PORT=11434 \
  yshtcn/ollama-proxy:latest
 ```
@@ -98,6 +102,9 @@ python ollama_proxy.py \
  --ollama-url http://localhost:11434 \
  --wake-url http://localhost:11434/api/generate \
  --timeout 10 \
  --model-timeout 30 \
  --wake-interval 10 \
  --cache-duration 1440 \
  --port 11434
 ```
--- a/ollama_proxy.py
+++ b/ollama_proxy.py
@@ -1,5 +1,5 @@
 from fastapi import FastAPI, Request, Response, HTTPException
-from fastapi.responses import JSONResponse
+from fastapi.responses import JSONResponse, StreamingResponse
 import httpx
 import asyncio
 import logging
@@ -7,36 +7,57 @@ import os
 import argparse
 import sys
 from datetime import datetime, timedelta
 import json
 # 配置日志
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # 解析命令行参数
-parser = argparse.ArgumentParser(description='Ollama代理服务器')
+parser = argparse.ArgumentParser(description='代理服务器')
 parser.add_argument('--ollama-url', help='Ollama服务器URL')
-parser.add_argument('--wake-url', help='唤醒服务器URL')
+parser.add_argument('--lms-url', help='LM Studio服务器URL')
 parser.add_argument('--wake-url', help='唤醒服务器URL（可选）')
 parser.add_argument('--timeout', type=int, help='简单请求的超时时间(秒)')
 parser.add_argument('--model-timeout', type=int, help='模型推理请求的超时时间(秒)')
 parser.add_argument('--port', type=int, help='代理服务器端口')
-parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)')
+parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)，仅在配置wake-url时有效')
 parser.add_argument('--cache-duration', type=int, help='模型列表缓存有效期(分钟)，默认1440分钟(1天)')
 args = parser.parse_args()
 # 配置常量，优先使用环境变量，其次使用命令行参数
 OLLAMA_URL = os.getenv('OLLAMA_URL') or args.ollama_url
 LMS_URL = os.getenv('LMS_URL') or args.lms_url
 WAKE_URL = os.getenv('WAKE_URL') or args.wake_url
 TIMEOUT_SECONDS = os.getenv('TIMEOUT_SECONDS') or args.timeout
 MODEL_TIMEOUT_SECONDS = int(os.getenv('MODEL_TIMEOUT_SECONDS') or args.model_timeout or 30)  # 默认30秒
 PORT = os.getenv('PORT') or args.port
 WAKE_INTERVAL = int(os.getenv('WAKE_INTERVAL') or args.wake_interval)
 CACHE_DURATION = int(os.getenv('CACHE_DURATION') or args.cache_duration or 1440)  # 默认1天
-# 检查必要参数
+# 检查URL配置
 if OLLAMA_URL and LMS_URL:
    logger.error("不能同时配置 OLLAMA_URL 和 LMS_URL，请只选择其中一个")
    sys.exit(1)
 elif not (OLLAMA_URL or LMS_URL):
    logger.error("必须配置 OLLAMA_URL 或 LMS_URL 其中之一")
    sys.exit(1)
 # 设置服务器类型和基础URL
 if OLLAMA_URL:
    server_type = 'ollama'
    BASE_URL = OLLAMA_URL
    MODEL_LIST_PATH = 'api/tags'
    GENERATE_ENDPOINTS = ["api/generate", "api/chat"]
 else:
    server_type = 'lmstudio'
    BASE_URL = LMS_URL
    MODEL_LIST_PATH = 'v1/models'
    GENERATE_ENDPOINTS = ["v1/chat/completions"]
 # 检查其他必要参数
 missing_params = []
 if not OLLAMA_URL:
    missing_params.append("OLLAMA_URL")
 if not WAKE_URL:
    missing_params.append("WAKE_URL")
 if not TIMEOUT_SECONDS:
    missing_params.append("TIMEOUT_SECONDS")
 if not PORT:
@@ -61,17 +82,22 @@ last_wake_time = None
 # 添加缓存相关的变量
 models_cache = None
 models_cache_time = None
 CACHE_DURATION = timedelta(minutes=30)  # 缓存有效期30分钟
 async def should_wake():
    """检查是否需要发送唤醒请求"""
    if not WAKE_URL:  # 如果没有配置WAKE_URL，永远不需要唤醒
        return False
    global last_wake_time
    if last_wake_time is None:
        return True
    return datetime.now() - last_wake_time > timedelta(minutes=WAKE_INTERVAL)
 async def wake_ollama():
-    """唤醒 Ollama 服务器"""
+    """唤醒服务器"""
    if not WAKE_URL:  # 如果没有配置WAKE_URL，直接返回
        return
    global last_wake_time
    try:
        async with httpx.AsyncClient() as client:
@@ -86,7 +112,7 @@ async def get_models_from_cache():
    global models_cache, models_cache_time
    if models_cache is None or models_cache_time is None:
        return None
-    if datetime.now() - models_cache_time > CACHE_DURATION:
+    if datetime.now() - models_cache_time > timedelta(minutes=CACHE_DURATION):
        return None
    return models_cache
@@ -97,15 +123,6 @@ async def update_models_cache(data):
    models_cache_time = datetime.now()
    logger.info("模型列表缓存已更新")
 # 输出当前配置
 logger.info(f"使用配置:")
 logger.info(f"OLLAMA_URL: {OLLAMA_URL}")
 logger.info(f"WAKE_URL: {WAKE_URL}")
 logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
 logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
 logger.info(f"PORT: {PORT}")
 logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
 app = FastAPI()
@app.get("/health")
@@ -121,7 +138,7 @@ async def list_models():
        async with httpx.AsyncClient() as client:
            response = await client.get(
-                f"{OLLAMA_URL}/api/tags",
+                f"{BASE_URL}/{MODEL_LIST_PATH}",
                timeout=TIMEOUT_SECONDS  # 使用较短的超时时间
            )
            # 更新缓存并返回最新数据
@@ -155,32 +172,72 @@ async def proxy(request: Request, path: str):
        return await health_check()
    # 其他请求的处理逻辑
-    if await should_wake():
+    if WAKE_URL and await should_wake():
        logger.info("距离上次唤醒已超过设定时间，发送预防性唤醒请求")
        await wake_ollama()
    async with httpx.AsyncClient() as client:
    try:
-            target_url = f"{OLLAMA_URL}/{path}"
+        target_url = f"{BASE_URL}/{path}"
            body = await request.body()
        headers = dict(request.headers)
        headers.pop('host', None)
        headers.pop('connection', None)
        # 移除可能导致问题的头部
        headers.pop('content-length', None)
        headers.pop('transfer-encoding', None)
        # 根据请求类型选择不同的超时时间
-            timeout = TIMEOUT_SECONDS if path == "api/tags" else MODEL_TIMEOUT_SECONDS
+        timeout = TIMEOUT_SECONDS if path == MODEL_LIST_PATH else MODEL_TIMEOUT_SECONDS
        # 检查是否为生成相关的端点
        is_generate_endpoint = path in GENERATE_ENDPOINTS
        if is_generate_endpoint and request.method == "POST":
            request_body = await request.json()
            # 强制设置stream为true以启用流式传输
            request_body["stream"] = True
            async def generate_stream():
                client = httpx.AsyncClient()
                try:
                    async with client.stream(
                        method=request.method,
                        url=target_url,
                        json=request_body,
                        headers=headers,
                        timeout=None  # 流式传输不设置整体超时
                    ) as response:
                        async for line in response.aiter_lines():
                            if line.strip():  # 忽略空行
                                yield line.encode('utf-8') + b'\n'
                except httpx.TimeoutError as e:
                    logger.error(f"流式传输超时: {str(e)}")
                    raise
                except Exception as e:
                    logger.error(f"流式传输时发生错误: {str(e)}")
                    raise
                finally:
                    await client.aclose()
            return StreamingResponse(
                generate_stream(),
                media_type="application/x-ndjson",
                headers={'Transfer-Encoding': 'chunked'}  # 使用分块传输编码
            )
        else:
            # 非生成请求的处理
            async with httpx.AsyncClient() as client:
                body = await request.body()
                response = await client.request(
                    method=request.method,
                    url=target_url,
                    content=body,
                    headers=headers,
-                timeout=timeout,  # 使用动态超时时间
+                    timeout=timeout,
                    follow_redirects=True
                )
                # 如果是标签列表请求且成功，更新缓存
-            if path == "api/tags" and request.method == "GET" and response.status_code == 200:
+                if path == MODEL_LIST_PATH and request.method == "GET" and response.status_code == 200:
                    await update_models_cache(response.json())
                return Response(
@@ -190,25 +247,31 @@ async def proxy(request: Request, path: str):
                )
    except httpx.TimeoutException:
-            logger.warning("Ollama服务器超时，发送唤醒请求")
+        error_msg = "服务器超时"
-            # 如果是标签列表请求，尝试返回缓存
+        if WAKE_URL:
-            if path == "api/tags" and request.method == "GET":
+            error_msg += "，正在尝试唤醒"
            logger.warning(f"{error_msg}")
            # 如果是模型列表请求，尝试返回缓存
            if path == MODEL_LIST_PATH and request.method == "GET":
                cached_models = await get_models_from_cache()
                if cached_models is not None:
-                    logger.info("返回缓存的标签列表")
+                    logger.info("返回缓存的模型列表")
                    return JSONResponse(content=cached_models)
            # 直接异步发送唤醒请求，不等待结果
            asyncio.create_task(wake_ollama())
        else:
            logger.warning(error_msg)
        return JSONResponse(
            status_code=503,
-                content={"message": "服务器正在唤醒中，请稍后重试"}
+            content={"message": f"{error_msg}，请稍后重试"}
        )
    except httpx.RequestError as e:
        logger.error(f"请求错误: {str(e)}")
        # 如果是标签列表请求，尝试返回缓存
-            if path == "api/tags" and request.method == "GET":
+        if path == MODEL_LIST_PATH and request.method == "GET":
            cached_models = await get_models_from_cache()
            if cached_models is not None:
                logger.info("返回缓存的标签列表")
@@ -216,7 +279,7 @@ async def proxy(request: Request, path: str):
        return JSONResponse(
            status_code=502,
-                content={"message": f"无法连接到Ollama服务器: {str(e)}"}
+            content={"message": f"无法连接到服务器: {str(e)}"}
        )
    except Exception as e:
@@ -226,6 +289,20 @@ async def proxy(request: Request, path: str):
            content={"message": f"代理请求失败: {str(e)}"}
        )
 # 输出当前配置
 logger.info(f"使用配置:")
 logger.info(f"服务器类型: {server_type}")
 logger.info(f"BASE_URL: {BASE_URL}")
 if WAKE_URL:
    logger.info(f"WAKE_URL: {WAKE_URL}")
    logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
 else:
    logger.info("未配置唤醒功能")
 logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
 logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
 logger.info(f"PORT: {PORT}")
 logger.info(f"CACHE_DURATION: {CACHE_DURATION} minutes")
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=PORT) 
--- a/ollama_proxy_docker_TestVerBuilder.ps1
+++ b/ollama_proxy_docker_TestVerBuilder.ps1
@@ -0,0 +1,107 @@
 # Set-ExecutionPolicy RemoteSigned -Scope CurrentUser
 # 检查是否以管理员权限运行
 if (-NOT ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")) {
    # 请求管理员权限
    Start-Process powershell -ArgumentList "-NoProfile -ExecutionPolicy Bypass -File `"$PSCommandPath`"" -Verb RunAs
    exit
 }
 # 检查 Docker 是否已安装并可用
 $dockerPath = Get-Command docker -ErrorAction SilentlyContinue
 if (-not $dockerPath) {
    Write-Host "未找到 Docker 命令。请检查：" -ForegroundColor Red
    Write-Host "1. Docker Desktop 是否已安装" -ForegroundColor Yellow
    Write-Host "2. Docker Desktop 是否正在运行" -ForegroundColor Yellow
    Write-Host "3. 环境变量是否正确设置" -ForegroundColor Yellow
    Write-Host "`n典型的 Docker 安装路径为：C:\Program Files\Docker\Docker\resources\bin" -ForegroundColor Yellow
    Write-Host "您可能需要将此路径添加到系统的 PATH 环境变量中" -ForegroundColor Yellow
    $response = Read-Host "是否要打开系统环境变量设置？(Y/N)"
    if ($response -eq 'Y' -or $response -eq 'y') {
        Start-Process "SystemPropertiesAdvanced.exe"
    }
    exit
 }
 # 检查 Docker 服务是否运行
 try {
    $dockerVersion = docker version
    if ($LASTEXITCODE -ne 0) {
        throw "Docker 服务未运行"
    }
 } catch {
    Write-Host "Docker 服务似乎没有正常运行。请检查：" -ForegroundColor Red
    Write-Host "1. Docker Desktop 是否已启动" -ForegroundColor Yellow
    Write-Host "2. 等待 Docker Desktop 完全启动" -ForegroundColor Yellow
    exit
 }
 # 切换到脚本所在目录
 Set-Location $PSScriptRoot
 Write-Host "当前目录已切换为脚本所在目录: $PSScriptRoot"
 # 获取当前日期和时间
 $dateTime = Get-Date -Format "yyyyMMdd"
 Write-Host "当前日期: $dateTime"
 # 提示输入并获取版本号最后一位
 $revision = Read-Host -Prompt "请输入Test版本号 ($dateTime,如果没有次数，请直接回车)"
 Write-Host "输入的版本号: $revision"
 # 构造版本号
 if ([string]::IsNullOrWhiteSpace($revision)) {
    $version = "$dateTime"
 } else {
    $version = "$dateTime" + "Test_$revision"
 }
 Write-Host "完整的版本号: $version"
 # 构建带完整版本号标签的 Docker 镜像
 Write-Host "正在构建 Docker 镜像..."
 $tempFileBuild = [System.IO.Path]::GetTempFileName()
 docker build -t yshtcn/ollama-proxy:$version . 2> $tempFileBuild
 if ($LASTEXITCODE -ne 0) {
    Write-Host "Docker 镜像构建失败" -ForegroundColor Red
    Write-Host (Get-Content $tempFileBuild) -ForegroundColor Red
    Remove-Item $tempFileBuild
    exit
 }
 Write-Host "Docker 镜像构建成功"
 Remove-Item $tempFileBuild
 # 推送带完整版本号标签的 Docker 镜像到 Docker Hub
 Write-Host "正在推送 Docker 镜像到 Docker Hub..."
 $tempFilePush = [System.IO.Path]::GetTempFileName()
 docker push yshtcn/ollama-proxy:$version 2> $tempFilePush
 if ($LASTEXITCODE -ne 0) {
    Write-Host "Docker 镜像推送失败" -ForegroundColor Red
    Write-Host (Get-Content $tempFilePush) -ForegroundColor Red
    Remove-Item $tempFilePush
    exit
 }
 Write-Host "Docker 镜像推送成功"
 Remove-Item $tempFilePush
 # 为镜像打上 'latest' 标签并推送
 Write-Host "正在为镜像打上 'test' 标签并推送..."
 $tempFilePushLatest = [System.IO.Path]::GetTempFileName()
 docker tag yshtcn/ollama-proxy:$version yshtcn/ollama-proxy:test
 docker push yshtcn/ollama-proxy:test 2> $tempFilePushLatest
 if ($LASTEXITCODE -ne 0) {
    Write-Host "Docker 镜像 'test' 标签推送失败" -ForegroundColor Red
    Write-Host (Get-Content $tempFilePushLatest) -ForegroundColor Red
    Remove-Item $tempFilePushLatest
    exit
 }
 Write-Host "Docker 镜像 'test' 标签推送成功"
 Remove-Item $tempFilePushLatest
 Write-Host "Docker 镜像构建和推送全部完成" 
 # 等待用户确认后再关闭
 Write-Host "`n按回车键退出..." -ForegroundColor Green
 $null = Read-Host