Initial commit: Add Ollama Proxy project files

This commit is contained in:
yshtcn 2025-01-23 00:13:12 +08:00
commit 49b834ff93
11 changed files with 662 additions and 0 deletions

20
.dockerignore Normal file
View File

@ -0,0 +1,20 @@
__pycache__
*.pyc
*.pyo
*.pyd
.Python
env
pip-log.txt
pip-delete-this-directory.txt
.tox
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.log
.pytest_cache
.env
.venv
.DS_Store

5
.env.example Normal file
View File

@ -0,0 +1,5 @@
OLLAMA_URL=http://your-ollama-server:11434
WAKE_URL=http://your-wake-server:9090/wol?mac=XX:XX:XX:XX:XX:XX
TIMEOUT_SECONDS=1
PORT=11434
MODEL_TIMEOUT_SECONDS=30 # 模型推理请求的超时时间(秒)

50
.github/workflows/docker-publish.yml vendored Normal file
View File

@ -0,0 +1,50 @@
name: Docker Image CI
on:
push:
branches: [ "main" ]
tags: [ 'v*.*.*' ]
pull_request:
branches: [ "main" ]
env:
REGISTRY: docker.io
IMAGE_NAME: yshtcn/ollama-proxy
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log into registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

37
.gitignore vendored Normal file
View File

@ -0,0 +1,37 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual Environment
venv/
ENV/
env/
.env
# IDE
.idea/
.vscode/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db

23
Dockerfile Normal file
View File

@ -0,0 +1,23 @@
FROM python:3.9-slim
WORKDIR /app
# 设置 Python 环境
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
# 添加新的环境变量默认值
ENV WAKE_INTERVAL=10
# 安装依赖
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 复制应用代码
COPY ollama_proxy.py .
# 暴露端口
EXPOSE 11434
# 启动应用
CMD ["python", "ollama_proxy.py"]

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 yshtcn
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

146
README.md Normal file
View File

@ -0,0 +1,146 @@
# Ollama Proxy
## 项目背景
随着大语言模型的普及,越来越多的个人用户选择在本地部署 Ollama 服务来使用 AI 模型。然而,这带来了一个普遍的问题:
- Ollama 通常需要部署在高性能台式机上配备强大的GPU
- 24小时开机运行会导致较高的电费支出
- 设置电脑定时睡眠可以节省电力,但会导致 Ollama 服务不可用
- 用户需要手动唤醒电脑才能继续使用服务
Ollama Proxy 正是为解决这个问题而设计:它允许用户在保持节能的同时,仍然可以随时便捷地使用 Ollama 服务。项目采用了两个关键策略来提升用户体验:
1. **智能唤醒机制**:通过请求管理,在需要时自动唤醒服务器,在空闲时允许系统进入睡眠状态,实现了服务可用性和节能环保的平衡。
2. **模型信息缓存**:即使在服务器处于睡眠状态时,也能立即响应模型列表查询请求。这意味着:
- 用户可以随时查看可用的模型列表
- 客户端应用无需等待服务器唤醒即可获取基本信息
- 提供更流畅的用户体验,减少等待时间
通过这种设计Ollama Proxy 不仅解决了节能问题,还确保了服务响应的及时性,为用户提供了一个既环保又高效的解决方案。
Ollama Proxy 是一个为 Ollama 服务设计的智能代理服务器,它提供了以下主要功能:
1. 自动唤醒功能
2. 请求转发
3. 模型列表缓存
4. 健康检查
5. 超时控制
## 主要特性
### 1. 自动唤醒功能
- 定期发送唤醒请求,防止 Ollama 服务进入休眠状态
- 可配置唤醒间隔时间
- 在请求超时时自动触发唤醒
### 2. 智能请求转发
- 支持所有 Ollama API 端点的请求转发
- 动态超时控制:对不同类型的请求使用不同的超时时间
- 普通请求:可配置的短超时时间
- 模型推理请求较长的超时时间默认30秒
### 3. 模型列表缓存
- 缓存 `/api/tags` 接口返回的模型列表
- 缓存有效期为30分钟
- 当主服务不可用时返回缓存数据
### 4. 健康检查
- 提供 `/health` 端点进行健康状态检查
- Docker 容器集成了健康检查配置
## 配置参数
支持通过环境变量或命令行参数进行配置:
| 参数 | 环境变量 | 说明 | 默认值 |
|------|----------|------|--------|
| `--ollama-url` | `OLLAMA_URL` | Ollama服务器URL | http://localhost:11434 |
| `--wake-url` | `WAKE_URL` | 唤醒服务器URL | http://localhost:11434/api/generate |
| `--timeout` | `TIMEOUT_SECONDS` | 简单请求超时时间(秒) | 10 |
| `--model-timeout` | `MODEL_TIMEOUT_SECONDS` | 模型推理请求超时时间(秒) | 30 |
| `--port` | `PORT` | 代理服务器端口 | 11434 |
| `--wake-interval` | `WAKE_INTERVAL` | 唤醒间隔时间(分钟) | 10 |
## 部署方式
### 使用 Docker Compose推荐
1. 创建 `.env` 文件(可选)并配置环境变量
2. 使用以下命令启动服务:
```bash
docker-compose up -d
```
### 使用 Docker
```bash
docker run -d \
-p 11434:11434 \
-e OLLAMA_URL=http://localhost:11434 \
-e WAKE_URL=http://localhost:11434/api/generate \
-e TIMEOUT_SECONDS=10 \
-e PORT=11434 \
yshtcn/ollama-proxy:latest
```
### 手动部署
1. 安装依赖:
```bash
pip install -r requirements.txt
```
2. 运行服务:
```bash
python ollama_proxy.py \
--ollama-url http://localhost:11434 \
--wake-url http://localhost:11434/api/generate \
--timeout 10 \
--port 11434
```
## 构建 Docker 镜像
项目提供了 PowerShell 脚本 `ollama_proxy_docker_builder.ps1` 用于自动构建和推送 Docker 镜像:
1. 以管理员权限运行脚本
2. 脚本会自动:
- 检查 Docker 环境
- 构建镜像并添加版本标签
- 推送镜像到 Docker Hub
- 更新 latest 标签
## 依赖项
- Python 3.9+
- FastAPI
- Uvicorn
- HTTPX
## 注意事项
1. 确保 Ollama 服务正在运行且可访问
2. 配置正确的 OLLAMA_URL 和 WAKE_URL
3. 根据网络环境调整超时时间
4. Docker 部署时注意端口映射和网络配置
5. 可以搭配 [WolGoWeb](https://github.com/xiaoxinpro/WolGoWeb) 项目使用,实现远程唤醒功能:
- WolGoWeb 提供了网络唤醒WOL功能
- 可以通过 HTTP API 远程唤醒目标主机
- 支持多种部署方式Docker、直接部署等
- 配置 WAKE_URL 为 WolGoWeb 的唤醒接口,即可实现远程唤醒 Ollama 服务器
## 健康检查
服务提供了 `/health` 端点,返回格式如下:
```json
{
"status": "healthy"
}
```
Docker 容器配置了自动健康检查:
- 检查间隔30秒
- 超时时间10秒
- 重试次数3次

19
docker-compose.yml Normal file
View File

@ -0,0 +1,19 @@
version: '3'
services:
ollama-proxy:
build: .
container_name: ollama-proxy
ports:
- "11434:11434"
environment:
- OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434}
- WAKE_URL=${WAKE_URL:-http://localhost:11434/api/generate}
- TIMEOUT_SECONDS=${TIMEOUT_SECONDS:-10}
- PORT=${PORT:-11434}
- WAKE_INTERVAL=${WAKE_INTERVAL:-10}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/health"]
interval: 30s
timeout: 10s
retries: 3

231
ollama_proxy.py Normal file
View File

@ -0,0 +1,231 @@
from fastapi import FastAPI, Request, Response, HTTPException
from fastapi.responses import JSONResponse
import httpx
import asyncio
import logging
import os
import argparse
import sys
from datetime import datetime, timedelta
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 解析命令行参数
parser = argparse.ArgumentParser(description='Ollama代理服务器')
parser.add_argument('--ollama-url', help='Ollama服务器URL')
parser.add_argument('--wake-url', help='唤醒服务器URL')
parser.add_argument('--timeout', type=int, help='简单请求的超时时间(秒)')
parser.add_argument('--model-timeout', type=int, help='模型推理请求的超时时间(秒)')
parser.add_argument('--port', type=int, help='代理服务器端口')
parser.add_argument('--wake-interval', type=int, default=10, help='唤醒间隔时间(分钟)')
args = parser.parse_args()
# 配置常量,优先使用环境变量,其次使用命令行参数
OLLAMA_URL = os.getenv('OLLAMA_URL') or args.ollama_url
WAKE_URL = os.getenv('WAKE_URL') or args.wake_url
TIMEOUT_SECONDS = os.getenv('TIMEOUT_SECONDS') or args.timeout
MODEL_TIMEOUT_SECONDS = int(os.getenv('MODEL_TIMEOUT_SECONDS') or args.model_timeout or 30) # 默认30秒
PORT = os.getenv('PORT') or args.port
WAKE_INTERVAL = int(os.getenv('WAKE_INTERVAL') or args.wake_interval)
# 检查必要参数
missing_params = []
if not OLLAMA_URL:
missing_params.append("OLLAMA_URL")
if not WAKE_URL:
missing_params.append("WAKE_URL")
if not TIMEOUT_SECONDS:
missing_params.append("TIMEOUT_SECONDS")
if not PORT:
missing_params.append("PORT")
if missing_params:
logger.error(f"缺少必要参数: {', '.join(missing_params)}")
logger.error("请通过环境变量或命令行参数指定这些值")
sys.exit(1)
# 确保数值类型正确
try:
TIMEOUT_SECONDS = int(TIMEOUT_SECONDS)
PORT = int(PORT)
except ValueError as e:
logger.error("TIMEOUT_SECONDS 和 PORT 必须是整数")
sys.exit(1)
# 添加上次唤醒时间的全局变量
last_wake_time = None
# 添加缓存相关的变量
models_cache = None
models_cache_time = None
CACHE_DURATION = timedelta(minutes=30) # 缓存有效期30分钟
async def should_wake():
"""检查是否需要发送唤醒请求"""
global last_wake_time
if last_wake_time is None:
return True
return datetime.now() - last_wake_time > timedelta(minutes=WAKE_INTERVAL)
async def wake_ollama():
"""唤醒 Ollama 服务器"""
global last_wake_time
try:
async with httpx.AsyncClient() as client:
await client.get(WAKE_URL)
last_wake_time = datetime.now()
logger.info(f"已发送唤醒请求,更新唤醒时间: {last_wake_time}")
except Exception as e:
logger.error(f"唤醒请求失败: {str(e)}")
async def get_models_from_cache():
"""从缓存获取模型列表"""
global models_cache, models_cache_time
if models_cache is None or models_cache_time is None:
return None
if datetime.now() - models_cache_time > CACHE_DURATION:
return None
return models_cache
async def update_models_cache(data):
"""更新模型列表缓存"""
global models_cache, models_cache_time
models_cache = data
models_cache_time = datetime.now()
logger.info("模型列表缓存已更新")
# 输出当前配置
logger.info(f"使用配置:")
logger.info(f"OLLAMA_URL: {OLLAMA_URL}")
logger.info(f"WAKE_URL: {WAKE_URL}")
logger.info(f"TIMEOUT_SECONDS: {TIMEOUT_SECONDS}")
logger.info(f"MODEL_TIMEOUT_SECONDS: {MODEL_TIMEOUT_SECONDS}")
logger.info(f"PORT: {PORT}")
logger.info(f"WAKE_INTERVAL: {WAKE_INTERVAL} minutes")
app = FastAPI()
@app.get("/health")
async def health_check():
logger.info("收到健康检查请求")
return {"status": "healthy"}
@app.get("/api/tags")
async def list_models():
try:
# 首先尝试从缓存获取
cached_models = await get_models_from_cache()
async with httpx.AsyncClient() as client:
response = await client.get(
f"{OLLAMA_URL}/api/tags",
timeout=TIMEOUT_SECONDS # 使用较短的超时时间
)
# 更新缓存并返回最新数据
await update_models_cache(response.json())
return response.json()
except (httpx.TimeoutException, httpx.ConnectError) as e:
# 发生超时或连接错误时,触发唤醒
logger.warning(f"获取标签列表失败,正在唤醒服务器: {str(e)}")
asyncio.create_task(wake_ollama())
# 如果有缓存,返回缓存数据
if cached_models is not None:
logger.info("返回缓存的标签列表")
return JSONResponse(content=cached_models)
# 如果没有缓存返回503
return JSONResponse(
status_code=503,
content={"message": "服务器正在唤醒中,请稍后重试"}
)
except Exception as e:
logger.error(f"获取标签列表时发生未知错误: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(request: Request, path: str):
# 避免代理 /health 请求
if path == "health":
return await health_check()
# 其他请求的处理逻辑
if await should_wake():
logger.info("距离上次唤醒已超过设定时间,发送预防性唤醒请求")
await wake_ollama()
async with httpx.AsyncClient() as client:
try:
target_url = f"{OLLAMA_URL}/{path}"
body = await request.body()
headers = dict(request.headers)
headers.pop('host', None)
headers.pop('connection', None)
# 根据请求类型选择不同的超时时间
timeout = TIMEOUT_SECONDS if path == "api/tags" else MODEL_TIMEOUT_SECONDS
response = await client.request(
method=request.method,
url=target_url,
content=body,
headers=headers,
timeout=timeout, # 使用动态超时时间
follow_redirects=True
)
# 如果是标签列表请求且成功,更新缓存
if path == "api/tags" and request.method == "GET" and response.status_code == 200:
await update_models_cache(response.json())
return Response(
content=response.content,
status_code=response.status_code,
headers=dict(response.headers)
)
except httpx.TimeoutException:
logger.warning("Ollama服务器超时发送唤醒请求")
# 如果是标签列表请求,尝试返回缓存
if path == "api/tags" and request.method == "GET":
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的标签列表")
return JSONResponse(content=cached_models)
# 直接异步发送唤醒请求,不等待结果
asyncio.create_task(wake_ollama())
return JSONResponse(
status_code=503,
content={"message": "服务器正在唤醒中,请稍后重试"}
)
except httpx.RequestError as e:
logger.error(f"请求错误: {str(e)}")
# 如果是标签列表请求,尝试返回缓存
if path == "api/tags" and request.method == "GET":
cached_models = await get_models_from_cache()
if cached_models is not None:
logger.info("返回缓存的标签列表")
return JSONResponse(content=cached_models)
return JSONResponse(
status_code=502,
content={"message": f"无法连接到Ollama服务器: {str(e)}"}
)
except Exception as e:
logger.error(f"代理请求失败: {str(e)}")
return JSONResponse(
status_code=500,
content={"message": f"代理请求失败: {str(e)}"}
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=PORT)

View File

@ -0,0 +1,107 @@
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser
# 检查是否以管理员权限运行
if (-NOT ([Security.Principal.WindowsPrincipal][Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole] "Administrator")) {
# 请求管理员权限
Start-Process powershell -ArgumentList "-NoProfile -ExecutionPolicy Bypass -File `"$PSCommandPath`"" -Verb RunAs
exit
}
# 检查 Docker 是否已安装并可用
$dockerPath = Get-Command docker -ErrorAction SilentlyContinue
if (-not $dockerPath) {
Write-Host "未找到 Docker 命令。请检查:" -ForegroundColor Red
Write-Host "1. Docker Desktop 是否已安装" -ForegroundColor Yellow
Write-Host "2. Docker Desktop 是否正在运行" -ForegroundColor Yellow
Write-Host "3. 环境变量是否正确设置" -ForegroundColor Yellow
Write-Host "`n典型的 Docker 安装路径为C:\Program Files\Docker\Docker\resources\bin" -ForegroundColor Yellow
Write-Host "您可能需要将此路径添加到系统的 PATH 环境变量中" -ForegroundColor Yellow
$response = Read-Host "是否要打开系统环境变量设置?(Y/N)"
if ($response -eq 'Y' -or $response -eq 'y') {
Start-Process "SystemPropertiesAdvanced.exe"
}
exit
}
# 检查 Docker 服务是否运行
try {
$dockerVersion = docker version
if ($LASTEXITCODE -ne 0) {
throw "Docker 服务未运行"
}
} catch {
Write-Host "Docker 服务似乎没有正常运行。请检查:" -ForegroundColor Red
Write-Host "1. Docker Desktop 是否已启动" -ForegroundColor Yellow
Write-Host "2. 等待 Docker Desktop 完全启动" -ForegroundColor Yellow
exit
}
# 切换到脚本所在目录
Set-Location $PSScriptRoot
Write-Host "当前目录已切换为脚本所在目录: $PSScriptRoot"
# 获取当前日期和时间
$dateTime = Get-Date -Format "yyyyMMdd"
Write-Host "当前日期: $dateTime"
# 提示输入并获取版本号最后一位
$revision = Read-Host -Prompt "请输入版本号 ($dateTime,如果没有次数,请直接回车)"
Write-Host "输入的版本号: $revision"
# 构造版本号
if ([string]::IsNullOrWhiteSpace($revision)) {
$version = "$dateTime"
} else {
$version = "$dateTime" + "_$revision"
}
Write-Host "完整的版本号: $version"
# 构建带完整版本号标签的 Docker 镜像
Write-Host "正在构建 Docker 镜像..."
$tempFileBuild = [System.IO.Path]::GetTempFileName()
docker build -t yshtcn/ollama-proxy:$version . 2> $tempFileBuild
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像构建失败" -ForegroundColor Red
Write-Host (Get-Content $tempFileBuild) -ForegroundColor Red
Remove-Item $tempFileBuild
exit
}
Write-Host "Docker 镜像构建成功"
Remove-Item $tempFileBuild
# 推送带完整版本号标签的 Docker 镜像到 Docker Hub
Write-Host "正在推送 Docker 镜像到 Docker Hub..."
$tempFilePush = [System.IO.Path]::GetTempFileName()
docker push yshtcn/ollama-proxy:$version 2> $tempFilePush
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像推送失败" -ForegroundColor Red
Write-Host (Get-Content $tempFilePush) -ForegroundColor Red
Remove-Item $tempFilePush
exit
}
Write-Host "Docker 镜像推送成功"
Remove-Item $tempFilePush
# 为镜像打上 'latest' 标签并推送
Write-Host "正在为镜像打上 'latest' 标签并推送..."
$tempFilePushLatest = [System.IO.Path]::GetTempFileName()
docker tag yshtcn/ollama-proxy:$version yshtcn/ollama-proxy:latest
docker push yshtcn/ollama-proxy:latest 2> $tempFilePushLatest
if ($LASTEXITCODE -ne 0) {
Write-Host "Docker 镜像 'latest' 标签推送失败" -ForegroundColor Red
Write-Host (Get-Content $tempFilePushLatest) -ForegroundColor Red
Remove-Item $tempFilePushLatest
exit
}
Write-Host "Docker 镜像 'latest' 标签推送成功"
Remove-Item $tempFilePushLatest
Write-Host "Docker 镜像构建和推送全部完成"
# 等待用户确认后再关闭
Write-Host "`n按回车键退出..." -ForegroundColor Green
$null = Read-Host

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
fastapi
uvicorn
httpx