Compare commits

..

3 Commits

Author SHA1 Message Date
yshtcn
f3944e5a62 feat: 增加LM Studio服务器支持
- 新增LM Studio服务器支持,可以通过--lms-url参数指定LM Studio服务器
- 优化wake-url参数为可选配置,不再强制要求配置唤醒服务器
- 根据服务器类型动态调整API端点路径
- 改进错误信息和日志输出,更好地区分服务器类型
- 重构配置验证逻辑,确保OLLAMA_URL和LMS_URL不会同时配置
2025-02-03 17:48:38 +08:00
yshtcn
7ca4144913 fix: 优化流式传输的超时处理
1. 移除流式传输的整体超时限制
2. 添加专门的超时错误处理
3. 避免与MODEL_TIMEOUT_SECONDS参数冲突
2025-01-27 18:40:15 +08:00
yshtcn
a40fbadf7b feat: 增强缓存功能
1. 添加可配置的缓存时间参数(--cache-duration)
2. 默认缓存时间从30分钟改为1天(1440分钟)
3. 支持通过环境变量CACHE_DURATION配置
4. 更新文档和配置示例
5. 修复了流式传输的问题
2025-01-27 18:35:30 +08:00
4 changed files with 278 additions and 85 deletions

View File

@@ -3,3 +3,5 @@ WAKE_URL=http://your-wake-server:9090/wol?mac=XX:XX:XX:XX:XX:XX
TIMEOUT_SECONDS=1 TIMEOUT_SECONDS=1
PORT=11434 PORT=11434
MODEL_TIMEOUT_SECONDS=30 # 模型推理请求的超时时间(秒) MODEL_TIMEOUT_SECONDS=30 # 模型推理请求的超时时间(秒)
WAKE_INTERVAL=10 # 唤醒间隔时间(分钟)
CACHE_DURATION=1440 # 模型列表缓存有效期分钟默认1天

View File

@@ -43,11 +43,11 @@ Ollama Proxy 是一个为 Ollama 服务设计的智能代理服务器,它提
### 3. 模型列表缓存 ### 3. 模型列表缓存
- 缓存 `/api/tags` 接口返回的模型列表 - 缓存 `/api/tags` 接口返回的模型列表
- 缓存有效期为30分钟 - 可配置缓存有效期默认为1440分钟1天
- 当主服务不可用时返回缓存数据 - 当主服务不可用时返回缓存数据,确保客户端始终可以获取模型列表
### 4. 健康检查 ### 4. 健康检查
- 提供 `/health` 端点进行健康状态检查 - 提供 ` ` 端点进行健康状态检查
- Docker 容器集成了健康检查配置 - Docker 容器集成了健康检查配置
## 配置参数 ## 配置参数
@@ -62,6 +62,7 @@ Ollama Proxy 是一个为 Ollama 服务设计的智能代理服务器,它提
| `--model-timeout` | `MODEL_TIMEOUT_SECONDS` | 模型推理请求超时时间(秒) | 30 | | `--model-timeout` | `MODEL_TIMEOUT_SECONDS` | 模型推理请求超时时间(秒) | 30 |
| `--port` | `PORT` | 代理服务器端口 | 11434 | | `--port` | `PORT` | 代理服务器端口 | 11434 |
| `--wake-interval` | `WAKE_INTERVAL` | 唤醒间隔时间(分钟) | 10 | | `--wake-interval` | `WAKE_INTERVAL` | 唤醒间隔时间(分钟) | 10 |
| `--cache-duration` | `CACHE_DURATION` | 模型列表缓存有效期(分钟) | 1440 |
## 部署方式 ## 部署方式
@@ -81,6 +82,9 @@ docker run -d \
-e OLLAMA_URL=http://localhost:11434 \ -e OLLAMA_URL=http://localhost:11434 \
-e WAKE_URL=http://localhost:11434/api/generate \ -e WAKE_URL=http://localhost:11434/api/generate \
-e TIMEOUT_SECONDS=10 \ -e TIMEOUT_SECONDS=10 \
-e MODEL_TIMEOUT_SECONDS=30 \
-e WAKE_INTERVAL=10 \
-e CACHE_DURATION=1440 \
-e PORT=11434 \ -e PORT=11434 \
yshtcn/ollama-proxy:latest yshtcn/ollama-proxy:latest
``` ```
@@ -98,6 +102,9 @@ python ollama_proxy.py \
--ollama-url http://localhost:11434 \ --ollama-url http://localhost:11434 \
--wake-url http://localhost:11434/api/generate \ --wake-url http://localhost:11434/api/generate \
--timeout 10 \ --timeout 10 \
--model-timeout 30 \
--wake-interval 10 \
--cache-duration 1440 \
--port 11434 --port 11434
``` ```

View File

@@ -1,5 +1,5 @@
from fastapi import FastAPI, Request, Response, HTTPException from fastapi import FastAPI, Request, Response, HTTPException
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse, StreamingResponse
import httpx import httpx
import asyncio import asyncio
import logging import logging
@@ -7,36 +7,57 @@ import os
import argparse import argparse
import sys import sys
from datetime import datetime, timedelta from datetime import datetime, timedelta
import json
# 配置日志 # 配置日志
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# 解析命令行参数 # 解析命令行参数
parser = argparse.ArgumentParser(description='Ollama代理服务器') parser = argparse.ArgumentParser(description='代理服务器')
parser.add_argument('--ollama-url', help='Ollama服务器URL') parser.add_argument('--ollama-url', help='Ollama服务器URL')
parser.add_argument('--wake-url', help='唤醒服务器URL') parser.add_argument('--lms-url', help='LM Studio服务器URL')
parser.add_argument('--wake-url', help='唤醒服务器URL可选')
parser.add_argument('--timeout', type=int, help='简单请求的超时时间(秒)') parser.add_argument('--timeout', type=int, help='简单请求的超时时间(秒)')
parser.add_argument('--model-timeout', type=int, help='模型推理请求的超时时间(秒)') parser.add_argument('--model-timeout', type=int, help='模型推理请求的超时时间(秒)')
parser.add_argument('--port', type=int, help='代理服务器端口') parser.add_argument('--port', type=int, help='代理服务器端口')
parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)') parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)仅在配置wake-url时有效')
parser.add_argument('--cache-duration', type=int, help='模型列表缓存有效期(分钟)默认1440分钟(1天)')
args = parser.parse_args() args = parser.parse_args()
# 配置常量,优先使用环境变量,其次使用命令行参数 # 配置常量,优先使用环境变量,其次使用命令行参数
OLLAMA_URL = os.getenv('OLLAMA_URL') or args.ollama_url OLLAMA_URL = os.getenv('OLLAMA_URL') or args.ollama_url
LMS_URL = os.getenv('LMS_URL') or args.lms_url
WAKE_URL = os.getenv('WAKE_URL') or args.wake_url WAKE_URL = os.getenv('WAKE_URL') or args.wake_url
TIMEOUT_SECONDS = os.getenv('TIMEOUT_SECONDS') or args.timeout TIMEOUT_SECONDS = os.getenv('TIMEOUT_SECONDS') or args.timeout
MODEL_TIMEOUT_SECONDS = int(os.getenv('MODEL_TIMEOUT_SECONDS') or args.model_timeout or 30) # 默认30秒 MODEL_TIMEOUT_SECONDS = int(os.getenv('MODEL_TIMEOUT_SECONDS') or args.model_timeout or 30) # 默认30秒
PORT = os.getenv('PORT') or args.port PORT = os.getenv('PORT') or args.port
WAKE_INTERVAL = int(os.getenv('WAKE_INTERVAL') or args.wake_interval) WAKE_INTERVAL = int(os.getenv('WAKE_INTERVAL') or args.wake_interval)
CACHE_DURATION = int(os.getenv('CACHE_DURATION') or args.cache_duration or 1440) # 默认1天
# 检查必要参数 # 检查URL配置
if OLLAMA_URL and LMS_URL:
logger.error("不能同时配置 OLLAMA_URL 和 LMS_URL请只选择其中一个")
sys.exit(1)
elif not (OLLAMA_URL or LMS_URL):
logger.error("必须配置 OLLAMA_URL 或 LMS_URL 其中之一")
sys.exit(1)
# 设置服务器类型和基础URL
if OLLAMA_URL:
server_type = 'ollama'
BASE_URL = OLLAMA_URL
MODEL_LIST_PATH = 'api/tags'
GENERATE_ENDPOINTS = ["api/generate", "api/chat"]
else:
server_type = 'lmstudio'
BASE_URL = LMS_URL
MODEL_LIST_PATH = 'v1/models'
GENERATE_ENDPOINTS = ["v1/chat/completions"]
# 检查其他必要参数
missing_params = [] missing_params = []
if not OLLAMA_URL:
missing_params.append("OLLAMA_URL")
if not WAKE_URL:
missing_params.append("WAKE_URL")
if not TIMEOUT_SECONDS: if not TIMEOUT_SECONDS:
missing_params.append("TIMEOUT_SECONDS") missing_params.append("TIMEOUT_SECONDS")
if not PORT: if not PORT:
@@ -61,17 +82,22 @@ last_wake_time = None
# 添加缓存相关的变量 # 添加缓存相关的变量
models_cache = None models_cache = None
models_cache_time = None models_cache_time = None
CACHE_DURATION = timedelta(minutes=30) # 缓存有效期30分钟
async def should_wake(): async def should_wake():
"""检查是否需要发送唤醒请求""" """检查是否需要发送唤醒请求"""
if not WAKE_URL: # 如果没有配置WAKE_URL永远不需要唤醒
return False
global last_wake_time global last_wake_time
if last_wake_time is None: if last_wake_time is None:
return True return True
return datetime.now() - last_wake_time > timedelta(minutes=WAKE_INTERVAL) return datetime.now() - last_wake_time > timedelta(minutes=WAKE_INTERVAL)
async def wake_ollama(): async def wake_ollama():
"""唤醒 Ollama 服务器""" """唤醒服务器"""
if not WAKE_URL: # 如果没有配置WAKE_URL直接返回
return
global last_wake_time global last_wake_time
try: try:
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
@@ -86,7 +112,7 @@ async def get_models_from_cache():
global models_cache, models_cache_time global models_cache, models_cache_time
if models_cache is None or models_cache_time is None: if models_cache is None or models_cache_time is None:
return None return None
if datetime.now() - models_cache_time > CACHE_DURATION: if datetime.now() - models_cache_time > timedelta(minutes=CACHE_DURATION):
return None return None
return models_cache return models_cache
@@ -97,15 +123,6 @@ async def update_models_cache(data):
models_cache_time = datetime.now() models_cache_time = datetime.now()
logger.info("模型列表缓存已更新") logger.info("模型列表缓存已更新")
# 输出当前配置
logger.info(f"使用配置:")
logger.info(f"OLLAMA_URL: {OLLAMA_URL}")
logger.info(f"WAKE_URL: {WAKE_URL}")
logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
logger.info(f"PORT: {PORT}")
logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
app = FastAPI() app = FastAPI()
@app.get("/health") @app.get("/health")
@@ -121,7 +138,7 @@ async def list_models():
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
response = await client.get( response = await client.get(
f"{OLLAMA_URL}/api/tags", f"{BASE_URL}/{MODEL_LIST_PATH}",
timeout=TIMEOUT_SECONDS # 使用较短的超时时间 timeout=TIMEOUT_SECONDS # 使用较短的超时时间
) )
# 更新缓存并返回最新数据 # 更新缓存并返回最新数据
@@ -155,76 +172,136 @@ async def proxy(request: Request, path: str):
return await health_check() return await health_check()
# 其他请求的处理逻辑 # 其他请求的处理逻辑
if await should_wake(): if WAKE_URL and await should_wake():
logger.info("距离上次唤醒已超过设定时间,发送预防性唤醒请求") logger.info("距离上次唤醒已超过设定时间,发送预防性唤醒请求")
await wake_ollama() await wake_ollama()
async with httpx.AsyncClient() as client: try:
try: target_url = f"{BASE_URL}/{path}"
target_url = f"{OLLAMA_URL}/{path}" headers = dict(request.headers)
body = await request.body() headers.pop('host', None)
headers = dict(request.headers) headers.pop('connection', None)
headers.pop('host', None) # 移除可能导致问题的头部
headers.pop('connection', None) headers.pop('content-length', None)
headers.pop('transfer-encoding', None)
# 根据请求类型选择不同的超时时间 # 根据请求类型选择不同的超时时间
timeout = TIMEOUT_SECONDS if path == "api/tags" else MODEL_TIMEOUT_SECONDS timeout = TIMEOUT_SECONDS if path == MODEL_LIST_PATH else MODEL_TIMEOUT_SECONDS
response = await client.request( # 检查是否为生成相关的端点
method=request.method, is_generate_endpoint = path in GENERATE_ENDPOINTS
url=target_url,
content=body, if is_generate_endpoint and request.method == "POST":
headers=headers, request_body = await request.json()
timeout=timeout, # 使用动态超时时间 # 强制设置stream为true以启用流式传输
follow_redirects=True request_body["stream"] = True
async def generate_stream():
client = httpx.AsyncClient()
try:
async with client.stream(
method=request.method,
url=target_url,
json=request_body,
headers=headers,
timeout=None # 流式传输不设置整体超时
) as response:
async for line in response.aiter_lines():
if line.strip(): # 忽略空行
yield line.encode('utf-8') + b'\n'
except httpx.TimeoutError as e:
logger.error(f"流式传输超时: {str(e)}")
raise
except Exception as e:
logger.error(f"流式传输时发生错误: {str(e)}")
raise
finally:
await client.aclose()
return StreamingResponse(
generate_stream(),
media_type="application/x-ndjson",
headers={'Transfer-Encoding': 'chunked'} # 使用分块传输编码
) )
else:
# 非生成请求的处理
async with httpx.AsyncClient() as client:
body = await request.body()
response = await client.request(
method=request.method,
url=target_url,
content=body,
headers=headers,
timeout=timeout,
follow_redirects=True
)
# 如果是标签列表请求且成功,更新缓存 # 如果是标签列表请求且成功,更新缓存
if path == "api/tags" and request.method == "GET" and response.status_code == 200: if path == MODEL_LIST_PATH and request.method == "GET" and response.status_code == 200:
await update_models_cache(response.json()) await update_models_cache(response.json())
return Response( return Response(
content=response.content, content=response.content,
status_code=response.status_code, status_code=response.status_code,
headers=dict(response.headers) headers=dict(response.headers)
) )
except httpx.TimeoutException: except httpx.TimeoutException:
logger.warning("Ollama服务器超时发送唤醒请求") error_msg = "服务器超时"
# 如果是标签列表请求,尝试返回缓存 if WAKE_URL:
if path == "api/tags" and request.method == "GET": error_msg += ",正在尝试唤醒"
logger.warning(f"{error_msg}")
# 如果是模型列表请求,尝试返回缓存
if path == MODEL_LIST_PATH and request.method == "GET":
cached_models = await get_models_from_cache() cached_models = await get_models_from_cache()
if cached_models is not None: if cached_models is not None:
logger.info("返回缓存的标签列表") logger.info("返回缓存的模型列表")
return JSONResponse(content=cached_models) return JSONResponse(content=cached_models)
# 直接异步发送唤醒请求,不等待结果 # 直接异步发送唤醒请求,不等待结果
asyncio.create_task(wake_ollama()) asyncio.create_task(wake_ollama())
return JSONResponse( else:
status_code=503, logger.warning(error_msg)
content={"message": "服务器正在唤醒中,请稍后重试"}
)
except httpx.RequestError as e: return JSONResponse(
logger.error(f"请求错误: {str(e)}") status_code=503,
# 如果是标签列表请求,尝试返回缓存 content={"message": f"{error_msg},请稍后重试"}
if path == "api/tags" and request.method == "GET": )
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的标签列表")
return JSONResponse(content=cached_models)
return JSONResponse( except httpx.RequestError as e:
status_code=502, logger.error(f"请求错误: {str(e)}")
content={"message": f"无法连接到Ollama服务器: {str(e)}"} # 如果是标签列表请求,尝试返回缓存
) if path == MODEL_LIST_PATH and request.method == "GET":
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的标签列表")
return JSONResponse(content=cached_models)
except Exception as e: return JSONResponse(
logger.error(f"代理请求失败: {str(e)}") status_code=502,
return JSONResponse( content={"message": f"无法连接到服务器: {str(e)}"}
status_code=500, )
content={"message": f"代理请求失败: {str(e)}"}
) except Exception as e:
logger.error(f"代理请求失败: {str(e)}")
return JSONResponse(
status_code=500,
content={"message": f"代理请求失败: {str(e)}"}
)
# 输出当前配置
logger.info(f"使用配置:")
logger.info(f"服务器类型: {server_type}")
logger.info(f"BASE_URL: {BASE_URL}")
if WAKE_URL:
logger.info(f"WAKE_URL: {WAKE_URL}")
logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
else:
logger.info("未配置唤醒功能")
logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
logger.info(f"PORT: {PORT}")
logger.info(f"CACHE_DURATION: {CACHE_DURATION} minutes")
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn

View File

@@ -0,0 +1,107 @@
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser
# 检查是否以管理员权限运行
if (-NOT ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")) {
# 请求管理员权限
Start-Process powershell -ArgumentList "-NoProfile -ExecutionPolicy Bypass -File `"$PSCommandPath`"" -Verb RunAs
exit
}
# 检查 Docker 是否已安装并可用
$dockerPath = Get-Command docker -ErrorAction SilentlyContinue
if (-not $dockerPath) {
Write-Host "未找到 Docker 命令。请检查:" -ForegroundColor Red
Write-Host "1. Docker Desktop 是否已安装" -ForegroundColor Yellow
Write-Host "2. Docker Desktop 是否正在运行" -ForegroundColor Yellow
Write-Host "3. 环境变量是否正确设置" -ForegroundColor Yellow
Write-Host "`n典型的 Docker 安装路径为C:\Program Files\Docker\Docker\resources\bin" -ForegroundColor Yellow
Write-Host "您可能需要将此路径添加到系统的 PATH 环境变量中" -ForegroundColor Yellow
$response = Read-Host "是否要打开系统环境变量设置?(Y/N)"
if ($response -eq 'Y' -or $response -eq 'y') {
Start-Process "SystemPropertiesAdvanced.exe"
}
exit
}
# 检查 Docker 服务是否运行
try {
$dockerVersion = docker version
if ($LASTEXITCODE -ne 0) {
throw "Docker 服务未运行"
}
} catch {
Write-Host "Docker 服务似乎没有正常运行。请检查:" -ForegroundColor Red
Write-Host "1. Docker Desktop 是否已启动" -ForegroundColor Yellow
Write-Host "2. 等待 Docker Desktop 完全启动" -ForegroundColor Yellow
exit
}
# 切换到脚本所在目录
Set-Location $PSScriptRoot
Write-Host "当前目录已切换为脚本所在目录: $PSScriptRoot"
# 获取当前日期和时间
$dateTime = Get-Date -Format "yyyyMMdd"
Write-Host "当前日期: $dateTime"
# 提示输入并获取版本号最后一位
$revision = Read-Host -Prompt "请输入Test版本号 ($dateTime,如果没有次数,请直接回车)"
Write-Host "输入的版本号: $revision"
# 构造版本号
if ([string]::IsNullOrWhiteSpace($revision)) {
$version = "$dateTime"
} else {
$version = "$dateTime" + "Test_$revision"
}
Write-Host "完整的版本号: $version"
# 构建带完整版本号标签的 Docker 镜像
Write-Host "正在构建 Docker 镜像..."
$tempFileBuild = [System.IO.Path]::GetTempFileName()
docker build -t yshtcn/ollama-proxy:$version . 2> $tempFileBuild
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像构建失败" -ForegroundColor Red
Write-Host (Get-Content $tempFileBuild) -ForegroundColor Red
Remove-Item $tempFileBuild
exit
}
Write-Host "Docker 镜像构建成功"
Remove-Item $tempFileBuild
# 推送带完整版本号标签的 Docker 镜像到 Docker Hub
Write-Host "正在推送 Docker 镜像到 Docker Hub..."
$tempFilePush = [System.IO.Path]::GetTempFileName()
docker push yshtcn/ollama-proxy:$version 2> $tempFilePush
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像推送失败" -ForegroundColor Red
Write-Host (Get-Content $tempFilePush) -ForegroundColor Red
Remove-Item $tempFilePush
exit
}
Write-Host "Docker 镜像推送成功"
Remove-Item $tempFilePush
# 为镜像打上 'latest' 标签并推送
Write-Host "正在为镜像打上 'test' 标签并推送..."
$tempFilePushLatest = [System.IO.Path]::GetTempFileName()
docker tag yshtcn/ollama-proxy:$version yshtcn/ollama-proxy:test
docker push yshtcn/ollama-proxy:test 2> $tempFilePushLatest
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像 'test' 标签推送失败" -ForegroundColor Red
Write-Host (Get-Content $tempFilePushLatest) -ForegroundColor Red
Remove-Item $tempFilePushLatest
exit
}
Write-Host "Docker 镜像 'test' 标签推送成功"
Remove-Item $tempFilePushLatest
Write-Host "Docker 镜像构建和推送全部完成"
# 等待用户确认后再关闭
Write-Host "`n按回车键退出..." -ForegroundColor Green
$null = Read-Host