mirror of
https://github.com/mofeng-git/One-KVM.git
synced 2026-06-19 02:11:50 +08:00
feat: 新增 Computer Use Agent 初步支持
This commit is contained in:
@@ -454,6 +454,90 @@ export const hidApi = {
|
||||
isWebSocketConnected: () => hidWs.connected.value,
|
||||
}
|
||||
|
||||
export type ComputerUseStatus =
|
||||
| 'idle'
|
||||
| 'waiting_screenshot'
|
||||
| 'thinking'
|
||||
| 'executing'
|
||||
| 'completed'
|
||||
| 'failed'
|
||||
| 'stopped'
|
||||
|
||||
export type ComputerUseButton = 'left' | 'middle' | 'right'
|
||||
|
||||
export type ComputerUseAction =
|
||||
| { type: 'click'; x: number; y: number; button?: ComputerUseButton }
|
||||
| { type: 'double_click'; x: number; y: number; button?: ComputerUseButton }
|
||||
| { type: 'move'; x: number; y: number }
|
||||
| { type: 'drag'; path: Array<{ x: number; y: number }>; button?: ComputerUseButton }
|
||||
| { type: 'scroll'; x: number; y: number; dx?: number; dy?: number }
|
||||
| { type: 'type'; text: string }
|
||||
| { type: 'keypress'; keys: string[] }
|
||||
| { type: 'wait'; ms: number }
|
||||
| { type: 'screenshot' }
|
||||
|
||||
export interface ComputerUseScreenshot {
|
||||
data_url: string
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export type ComputerUseConversationMessage =
|
||||
| { role: 'user'; text: string }
|
||||
| { role: 'assistant'; text: string }
|
||||
|
||||
export interface ComputerUseConfig {
|
||||
enabled: boolean
|
||||
provider: string
|
||||
base_url: string
|
||||
model: string
|
||||
max_steps: number
|
||||
timeout_seconds: number
|
||||
api_key_configured: boolean
|
||||
api_key_source: string
|
||||
}
|
||||
|
||||
export interface ComputerUseSession {
|
||||
id: string | null
|
||||
status: ComputerUseStatus
|
||||
prompt: string | null
|
||||
step: number
|
||||
max_steps: number
|
||||
last_error: string | null
|
||||
final_message: string | null
|
||||
}
|
||||
|
||||
export const computerUseApi = {
|
||||
config: () => request<ComputerUseConfig>('/config/computer-use'),
|
||||
|
||||
updateConfig: (data: {
|
||||
enabled?: boolean
|
||||
base_url?: string
|
||||
model?: string
|
||||
max_steps?: number
|
||||
timeout_seconds?: number
|
||||
openai_api_key?: string
|
||||
clear_openai_api_key?: boolean
|
||||
}) =>
|
||||
request<ComputerUseConfig>('/config/computer-use', {
|
||||
method: 'PATCH',
|
||||
body: JSON.stringify(data),
|
||||
}),
|
||||
|
||||
session: () => request<ComputerUseSession>('/computer-use/session'),
|
||||
|
||||
start: (data: { prompt: string; continue_conversation?: boolean; client_id: string; max_steps?: number; timeout_seconds?: number }) =>
|
||||
request<ComputerUseSession>('/computer-use/session', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(data),
|
||||
}),
|
||||
|
||||
stop: () =>
|
||||
request<ComputerUseSession>('/computer-use/session/stop', {
|
||||
method: 'POST',
|
||||
}),
|
||||
}
|
||||
|
||||
export const atxApi = {
|
||||
status: () =>
|
||||
request<{
|
||||
|
||||
@@ -39,6 +39,7 @@ import {
|
||||
BarChart3,
|
||||
Terminal,
|
||||
MoreHorizontal,
|
||||
Bot,
|
||||
} from 'lucide-vue-next'
|
||||
import PasteModal from '@/components/PasteModal.vue'
|
||||
import AtxPopover from '@/components/AtxPopover.vue'
|
||||
@@ -77,6 +78,7 @@ const emit = defineEmits<{
|
||||
(e: 'reset'): void
|
||||
(e: 'wol', macAddress: string): void
|
||||
(e: 'openTerminal'): void
|
||||
(e: 'openComputerUse'): void
|
||||
}>()
|
||||
|
||||
const pasteOpen = ref(false)
|
||||
@@ -385,6 +387,26 @@ const hasOverflow = computed(() => {
|
||||
|
||||
<div v-if="isVisible('stats') || isVisible('extension') || isVisible('settings')" class="h-5 w-px bg-slate-200 dark:bg-slate-700" />
|
||||
|
||||
<!-- Computer Use - Always visible -->
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger as-child>
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
class="h-7 w-7 sm:h-8 sm:w-auto p-0 sm:px-2 sm:gap-1.5 text-xs"
|
||||
@click="emit('openComputerUse')"
|
||||
>
|
||||
<Bot class="h-3.5 w-3.5 sm:h-4 sm:w-4" />
|
||||
<span class="hidden xl:inline">AI</span>
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>Computer Use</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
|
||||
<!-- Virtual Keyboard - Always visible -->
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
|
||||
355
web/src/components/ComputerUseSheet.vue
Normal file
355
web/src/components/ComputerUseSheet.vue
Normal file
@@ -0,0 +1,355 @@
|
||||
<script setup lang="ts">
|
||||
import { computed, nextTick, onMounted, ref, watch } from 'vue'
|
||||
import { Bot, ChevronDown, Image, KeyRound, Play, Square } from 'lucide-vue-next'
|
||||
import { toast } from 'vue-sonner'
|
||||
import { computerUseApi, type ComputerUseAction, type ComputerUseConfig, type ComputerUseSession } from '@/api'
|
||||
import type { ComputerUseTimelineItem } from '@/types/computerUseTimeline'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import { Textarea } from '@/components/ui/textarea'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import { Switch } from '@/components/ui/switch'
|
||||
import { Tabs, TabsContent } from '@/components/ui/tabs'
|
||||
|
||||
const props = defineProps<{
|
||||
open: boolean
|
||||
connected: boolean
|
||||
wsError: string | null
|
||||
session: ComputerUseSession | null
|
||||
timeline: ComputerUseTimelineItem[]
|
||||
}>()
|
||||
|
||||
const emit = defineEmits<{
|
||||
(e: 'update:open', value: boolean): void
|
||||
(e: 'start', prompt: string): void
|
||||
(e: 'stop'): void
|
||||
(e: 'clear'): void
|
||||
}>()
|
||||
|
||||
const config = ref<ComputerUseConfig | null>(null)
|
||||
const prompt = ref('')
|
||||
const apiKey = ref('')
|
||||
const savingConfig = ref(false)
|
||||
const starting = ref(false)
|
||||
const activeTab = ref('chat')
|
||||
const messagesRef = ref<HTMLDivElement | null>(null)
|
||||
|
||||
const defaultModel = computed({
|
||||
get: () => config.value?.model ?? 'gpt-5.5',
|
||||
set: (value: string) => {
|
||||
if (config.value) config.value.model = value
|
||||
},
|
||||
})
|
||||
const defaultBaseUrl = computed({
|
||||
get: () => config.value?.base_url ?? 'https://api.openai.com/v1/responses',
|
||||
set: (value: string) => {
|
||||
if (config.value) config.value.base_url = value
|
||||
},
|
||||
})
|
||||
const defaultMaxSteps = computed({
|
||||
get: () => String(config.value?.max_steps ?? 30),
|
||||
set: (value: string) => {
|
||||
if (config.value) config.value.max_steps = Number(value) || 30
|
||||
},
|
||||
})
|
||||
const defaultTimeoutSeconds = computed({
|
||||
get: () => String(config.value?.timeout_seconds ?? 600),
|
||||
set: (value: string) => {
|
||||
if (config.value) config.value.timeout_seconds = Number(value) || 600
|
||||
},
|
||||
})
|
||||
|
||||
const status = computed(() => props.session?.status ?? 'idle')
|
||||
const isRunning = computed(() => ['waiting_screenshot', 'thinking', 'executing'].includes(status.value))
|
||||
const canStart = computed(() => !!config.value?.enabled && !!config.value?.api_key_configured && prompt.value.trim().length > 0 && !isRunning.value)
|
||||
const showWelcome = computed(() => props.timeline.length === 0 && !props.session?.last_error && !props.session?.final_message)
|
||||
|
||||
const statusLabel = computed(() => {
|
||||
switch (status.value) {
|
||||
case 'waiting_screenshot': return '截屏中'
|
||||
case 'thinking': return '思考中'
|
||||
case 'executing': return '执行中'
|
||||
case 'completed': return '已完成'
|
||||
case 'failed': return '失败'
|
||||
case 'stopped': return '已停止'
|
||||
default: return '空闲'
|
||||
}
|
||||
})
|
||||
|
||||
async function loadConfig() {
|
||||
config.value = await computerUseApi.config()
|
||||
}
|
||||
|
||||
async function saveConfig() {
|
||||
savingConfig.value = true
|
||||
try {
|
||||
config.value = await computerUseApi.updateConfig({
|
||||
enabled: config.value?.enabled ?? true,
|
||||
base_url: config.value?.base_url || 'https://api.openai.com/v1/responses',
|
||||
model: config.value?.model || 'gpt-5.5',
|
||||
max_steps: config.value?.max_steps || 30,
|
||||
timeout_seconds: config.value?.timeout_seconds || 600,
|
||||
openai_api_key: apiKey.value.trim() || undefined,
|
||||
})
|
||||
apiKey.value = ''
|
||||
toast.success('Computer Use 配置已保存')
|
||||
} finally {
|
||||
savingConfig.value = false
|
||||
}
|
||||
}
|
||||
|
||||
async function clearApiKey() {
|
||||
savingConfig.value = true
|
||||
try {
|
||||
config.value = await computerUseApi.updateConfig({
|
||||
clear_openai_api_key: true,
|
||||
})
|
||||
apiKey.value = ''
|
||||
toast.success('OpenAI API Key 已清除')
|
||||
} finally {
|
||||
savingConfig.value = false
|
||||
}
|
||||
}
|
||||
|
||||
async function start() {
|
||||
if (!canStart.value) return
|
||||
const text = prompt.value.trim()
|
||||
starting.value = true
|
||||
try {
|
||||
emit('start', text)
|
||||
prompt.value = ''
|
||||
} finally {
|
||||
starting.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function formatAction(action: ComputerUseAction): string {
|
||||
switch (action.type) {
|
||||
case 'click':
|
||||
return `点击 (${action.x}, ${action.y}) ${action.button ?? 'left'}`
|
||||
case 'double_click':
|
||||
return `双击 (${action.x}, ${action.y}) ${action.button ?? 'left'}`
|
||||
case 'move':
|
||||
return `移动到 (${action.x}, ${action.y})`
|
||||
case 'drag':
|
||||
return `拖拽 ${action.path.length} 个点`
|
||||
case 'scroll':
|
||||
return `滚动 (${action.x}, ${action.y}) dx=${action.dx ?? 0} dy=${action.dy ?? 0}`
|
||||
case 'type':
|
||||
return `输入 ${action.text.length} 字符`
|
||||
case 'keypress':
|
||||
return `按键 ${action.keys.join('+')}`
|
||||
case 'wait':
|
||||
return `等待 ${action.ms}ms`
|
||||
case 'screenshot':
|
||||
return '请求截图'
|
||||
}
|
||||
}
|
||||
|
||||
function scrollToBottom() {
|
||||
nextTick(() => {
|
||||
const el = messagesRef.value
|
||||
if (!el) return
|
||||
el.scrollTop = el.scrollHeight
|
||||
})
|
||||
}
|
||||
|
||||
watch(() => props.timeline.length, scrollToBottom)
|
||||
watch(() => props.open, (open) => {
|
||||
if (open) scrollToBottom()
|
||||
})
|
||||
|
||||
onMounted(loadConfig)
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<aside
|
||||
v-show="open"
|
||||
class="absolute inset-y-0 right-0 z-30 h-full min-h-0 w-[min(100%,420px)] border-l bg-background/98 shadow-xl backdrop-blur md:relative md:z-auto md:w-[420px] xl:w-[460px]"
|
||||
>
|
||||
<div class="flex h-full min-h-0 flex-col">
|
||||
<div class="flex h-12 shrink-0 items-center justify-between border-b px-3">
|
||||
<div class="flex min-w-0 items-center gap-2">
|
||||
<Bot class="h-5 w-5 shrink-0" />
|
||||
<div class="min-w-0">
|
||||
<div class="truncate text-sm font-semibold">Computer Use</div>
|
||||
<div class="truncate text-[11px] text-muted-foreground">
|
||||
WebSocket {{ connected ? '已连接' : '未连接' }}
|
||||
<span v-if="wsError"> · {{ wsError }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex items-center gap-1.5">
|
||||
<Badge :variant="status === 'failed' ? 'destructive' : 'secondary'">
|
||||
{{ statusLabel }}
|
||||
</Badge>
|
||||
<Button variant="ghost" size="icon" class="h-8 w-8" @click="emit('update:open', false)">
|
||||
<ChevronDown class="h-4 w-4 rotate-90" />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Tabs v-model="activeTab" class="flex min-h-0 flex-1 flex-col">
|
||||
<div class="px-3 py-2">
|
||||
<div class="grid grid-cols-2 rounded-md bg-muted p-1">
|
||||
<button
|
||||
type="button"
|
||||
:class="[
|
||||
'rounded-sm px-3 py-1.5 text-sm font-medium transition-colors',
|
||||
activeTab === 'chat' ? 'bg-background text-foreground shadow-sm' : 'text-muted-foreground hover:text-foreground'
|
||||
]"
|
||||
@click="activeTab = 'chat'"
|
||||
>
|
||||
对话
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
:class="[
|
||||
'rounded-sm px-3 py-1.5 text-sm font-medium transition-colors',
|
||||
activeTab === 'settings' ? 'bg-background text-foreground shadow-sm' : 'text-muted-foreground hover:text-foreground'
|
||||
]"
|
||||
@click="activeTab = 'settings'"
|
||||
>
|
||||
设置
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<TabsContent value="chat" class="m-0 flex min-h-0 flex-1 flex-col data-[state=inactive]:hidden">
|
||||
<div ref="messagesRef" class="min-h-0 flex-1 space-y-3 overflow-y-auto p-3">
|
||||
<div v-if="showWelcome" class="rounded-md border border-dashed p-4 text-center text-xs text-muted-foreground">
|
||||
发送任务后,这里会显示对话、截图和坐标操作。
|
||||
</div>
|
||||
|
||||
<template v-for="item in timeline" :key="item.id">
|
||||
<div v-if="item.type === 'user'" class="flex justify-end">
|
||||
<div class="max-w-[86%] rounded-md bg-primary px-3 py-2 text-sm text-primary-foreground">
|
||||
{{ item.text }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-else-if="item.type === 'assistant'" class="flex justify-start">
|
||||
<div class="max-w-[86%] rounded-md border bg-muted/50 px-3 py-2 text-sm">
|
||||
{{ item.text }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-else-if="item.type === 'screenshot'" class="rounded-md border bg-card p-2">
|
||||
<div class="mb-2 flex items-center justify-between text-xs text-muted-foreground">
|
||||
<span class="inline-flex items-center gap-1.5"><Image class="h-3.5 w-3.5" />截图</span>
|
||||
<span>{{ item.screenshot.width }}x{{ item.screenshot.height }}</span>
|
||||
</div>
|
||||
<div
|
||||
class="w-full overflow-hidden rounded-sm bg-black"
|
||||
:style="{ aspectRatio: `${item.screenshot.width} / ${item.screenshot.height}` }"
|
||||
>
|
||||
<img :src="item.screenshot.data_url" class="h-full w-full object-cover" alt="Computer Use screenshot" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-else-if="item.type === 'actions_executed'" class="rounded-md border bg-emerald-50 p-2 text-emerald-950 dark:bg-emerald-950/20 dark:text-emerald-100">
|
||||
<div class="mb-2 text-xs font-medium">已执行</div>
|
||||
<div class="space-y-1">
|
||||
<div v-for="(action, index) in item.actions" :key="index" class="rounded-sm bg-background/60 px-2 py-1.5 text-xs">
|
||||
{{ formatAction(action) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-else-if="item.type === 'error'" class="rounded-md border border-destructive/40 bg-destructive/10 px-3 py-2 text-xs text-destructive">
|
||||
{{ item.text }}
|
||||
</div>
|
||||
|
||||
<div v-else class="text-center text-xs text-muted-foreground">
|
||||
{{ item.text }}
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<div class="shrink-0 border-t p-3">
|
||||
<Textarea
|
||||
v-model="prompt"
|
||||
rows="3"
|
||||
placeholder="继续输入任务或追问"
|
||||
:disabled="isRunning"
|
||||
@keydown.meta.enter.prevent="start"
|
||||
@keydown.ctrl.enter.prevent="start"
|
||||
/>
|
||||
<div class="mt-2 flex gap-2">
|
||||
<Button class="flex-1 gap-2" :disabled="!canStart || starting" @click="start">
|
||||
<Play class="h-4 w-4" />
|
||||
发送
|
||||
</Button>
|
||||
<Button variant="outline" class="gap-2" :disabled="!isRunning" @click="emit('stop')">
|
||||
<Square class="h-4 w-4" />
|
||||
停止
|
||||
</Button>
|
||||
<Button variant="ghost" size="sm" :disabled="isRunning || timeline.length === 0" @click="emit('clear')">
|
||||
清空
|
||||
</Button>
|
||||
</div>
|
||||
<p v-if="!config?.api_key_configured" class="mt-2 text-xs text-muted-foreground">
|
||||
需要先在设置里保存 OpenAI API Key。
|
||||
</p>
|
||||
</div>
|
||||
</TabsContent>
|
||||
|
||||
<TabsContent value="settings" class="m-0 min-h-0 flex-1 overflow-y-auto p-3 data-[state=inactive]:hidden">
|
||||
<div class="space-y-4">
|
||||
<div class="flex items-center justify-between rounded-md border p-3">
|
||||
<div>
|
||||
<div class="text-sm font-medium">启用 AI 操作</div>
|
||||
<div class="text-xs text-muted-foreground">配置保存后立即生效</div>
|
||||
</div>
|
||||
<Switch
|
||||
:model-value="config?.enabled ?? false"
|
||||
@update:model-value="(value) => { if (config) config.enabled = value }"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class="space-y-3 rounded-md border p-3">
|
||||
<div class="grid grid-cols-2 gap-2">
|
||||
<div class="space-y-1">
|
||||
<Label class="text-xs">模型</Label>
|
||||
<Input v-model="defaultModel" :disabled="!config" placeholder="gpt-5.5" />
|
||||
</div>
|
||||
<div class="space-y-1">
|
||||
<Label class="text-xs">最大步数</Label>
|
||||
<Input v-model="defaultMaxSteps" type="number" min="1" max="100" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="space-y-1">
|
||||
<Label class="text-xs">超时秒数</Label>
|
||||
<Input v-model="defaultTimeoutSeconds" type="number" min="30" max="3600" />
|
||||
</div>
|
||||
<div class="space-y-1">
|
||||
<Label class="text-xs">API URL</Label>
|
||||
<Input v-model="defaultBaseUrl" :disabled="!config" placeholder="https://api.openai.com/v1/responses" />
|
||||
</div>
|
||||
<div class="space-y-1">
|
||||
<Label class="text-xs flex items-center gap-1">
|
||||
<KeyRound class="h-3.5 w-3.5" />
|
||||
OpenAI API Key
|
||||
</Label>
|
||||
<Input
|
||||
v-model="apiKey"
|
||||
type="password"
|
||||
:placeholder="config?.api_key_configured ? `已配置:${config.api_key_source}` : 'sk-...'"
|
||||
/>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-2">
|
||||
<Button size="sm" :disabled="savingConfig || !config" @click="saveConfig">
|
||||
保存配置
|
||||
</Button>
|
||||
<Button size="sm" variant="outline" :disabled="savingConfig || !config?.api_key_configured" @click="clearApiKey">
|
||||
清除 Key
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</TabsContent>
|
||||
</Tabs>
|
||||
</div>
|
||||
</aside>
|
||||
</template>
|
||||
92
web/src/composables/useComputerUseSocket.ts
Normal file
92
web/src/composables/useComputerUseSocket.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import { ref, onUnmounted } from 'vue'
|
||||
import { buildWsUrl } from '@/types/websocket'
|
||||
import type { ComputerUseScreenshot, ComputerUseSession, ComputerUseAction } from '@/api'
|
||||
|
||||
export type ComputerUseServerMessage =
|
||||
| { type: 'session_updated'; session: ComputerUseSession }
|
||||
| { type: 'screenshot_requested'; request_id: string }
|
||||
| { type: 'screenshot_captured'; screenshot: ComputerUseScreenshot }
|
||||
| { type: 'step_started'; step: number }
|
||||
| { type: 'actions_executed'; actions: ComputerUseAction[] }
|
||||
| { type: 'error'; message: string }
|
||||
|
||||
export function useComputerUseSocket(options: {
|
||||
onMessage: (message: ComputerUseServerMessage) => void
|
||||
onScreenshotRequested: (requestId: string) => Promise<ComputerUseScreenshot | null>
|
||||
}) {
|
||||
const connected = ref(false)
|
||||
const error = ref<string | null>(null)
|
||||
const clientId = crypto.randomUUID()
|
||||
let ws: WebSocket | null = null
|
||||
let connectPromise: Promise<void> | null = null
|
||||
|
||||
function connect(): Promise<void> {
|
||||
if (ws && ws.readyState === WebSocket.OPEN) return Promise.resolve()
|
||||
if (connectPromise) return connectPromise
|
||||
|
||||
ws = new WebSocket(buildWsUrl(`/api/ws/computer-use?client_id=${encodeURIComponent(clientId)}`))
|
||||
|
||||
connectPromise = new Promise((resolve, reject) => {
|
||||
if (!ws) {
|
||||
reject(new Error('Computer use WebSocket failed'))
|
||||
return
|
||||
}
|
||||
|
||||
ws.onopen = () => {
|
||||
connected.value = true
|
||||
error.value = null
|
||||
connectPromise = null
|
||||
resolve()
|
||||
}
|
||||
|
||||
ws.onerror = () => {
|
||||
error.value = 'Computer use WebSocket failed'
|
||||
connectPromise = null
|
||||
reject(new Error(error.value))
|
||||
}
|
||||
})
|
||||
|
||||
ws.onclose = () => {
|
||||
connected.value = false
|
||||
connectPromise = null
|
||||
}
|
||||
|
||||
ws.onmessage = async (event) => {
|
||||
try {
|
||||
const message = JSON.parse(event.data) as ComputerUseServerMessage
|
||||
options.onMessage(message)
|
||||
if (message.type === 'screenshot_requested') {
|
||||
const screenshot = await options.onScreenshotRequested(message.request_id)
|
||||
if (screenshot && ws?.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify({
|
||||
type: 'screenshot_result',
|
||||
request_id: message.request_id,
|
||||
screenshot,
|
||||
}))
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[ComputerUse] Failed to handle WS message:', err)
|
||||
}
|
||||
}
|
||||
|
||||
return connectPromise
|
||||
}
|
||||
|
||||
function disconnect() {
|
||||
ws?.close()
|
||||
ws = null
|
||||
connected.value = false
|
||||
connectPromise = null
|
||||
}
|
||||
|
||||
onUnmounted(disconnect)
|
||||
|
||||
return {
|
||||
connected,
|
||||
error,
|
||||
clientId,
|
||||
connect,
|
||||
disconnect,
|
||||
}
|
||||
}
|
||||
15
web/src/types/computerUseTimeline.ts
Normal file
15
web/src/types/computerUseTimeline.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import type { ComputerUseAction, ComputerUseScreenshot } from '@/api'
|
||||
|
||||
export type ComputerUseTimelineItem =
|
||||
| { id: string; type: 'user'; text: string }
|
||||
| { id: string; type: 'assistant'; text: string }
|
||||
| { id: string; type: 'screenshot'; screenshot: ComputerUseScreenshot }
|
||||
| { id: string; type: 'actions_executed'; actions: ComputerUseAction[] }
|
||||
| { id: string; type: 'error'; text: string }
|
||||
| { id: string; type: 'status'; text: string }
|
||||
|
||||
export type NewComputerUseTimelineItem = ComputerUseTimelineItem extends infer Item
|
||||
? Item extends { id: string }
|
||||
? Omit<Item, 'id'>
|
||||
: never
|
||||
: never
|
||||
@@ -10,8 +10,10 @@ import { useConsoleEvents } from '@/composables/useConsoleEvents'
|
||||
import { useHidWebSocket } from '@/composables/useHidWebSocket'
|
||||
import { useWebRTC } from '@/composables/useWebRTC'
|
||||
import { useVideoSession } from '@/composables/useVideoSession'
|
||||
import { useComputerUseSocket, type ComputerUseServerMessage } from '@/composables/useComputerUseSocket'
|
||||
import { getUnifiedAudio } from '@/composables/useUnifiedAudio'
|
||||
import { streamApi, hidApi, atxApi, atxConfigApi, authApi } from '@/api'
|
||||
import { streamApi, hidApi, atxApi, atxConfigApi, authApi, computerUseApi } from '@/api'
|
||||
import type { ComputerUseScreenshot, ComputerUseSession } from '@/api'
|
||||
import { CanonicalKey, HidBackend } from '@/types/generated'
|
||||
import type { HidKeyboardEvent, HidMouseEvent } from '@/types/hid'
|
||||
import { keyboardEventToCanonicalKey, updateModifierMaskForKey } from '@/lib/keyboardMappings'
|
||||
@@ -29,6 +31,8 @@ import ActionBar from '@/components/ActionBar.vue'
|
||||
import InfoBar from '@/components/InfoBar.vue'
|
||||
import VirtualKeyboard from '@/components/VirtualKeyboard.vue'
|
||||
import StatsSheet from '@/components/StatsSheet.vue'
|
||||
import ComputerUseSheet from '@/components/ComputerUseSheet.vue'
|
||||
import type { ComputerUseTimelineItem, NewComputerUseTimelineItem } from '@/types/computerUseTimeline'
|
||||
import LanguageToggleButton from '@/components/LanguageToggleButton.vue'
|
||||
import BrandMark from '@/components/BrandMark.vue'
|
||||
import { Button } from '@/components/ui/button'
|
||||
@@ -88,6 +92,11 @@ const consoleEvents = useConsoleEvents({
|
||||
})
|
||||
|
||||
const videoMode = ref<VideoMode>('mjpeg')
|
||||
const computerUseOpen = ref(false)
|
||||
const computerUseSession = ref<ComputerUseSession | null>(null)
|
||||
const computerUseTimeline = ref<ComputerUseTimelineItem[]>([])
|
||||
const computerUseConversationStarted = ref(false)
|
||||
let computerUseTimelineSeq = 0
|
||||
|
||||
const videoRef = ref<HTMLImageElement | null>(null)
|
||||
const webrtcVideoRef = ref<HTMLVideoElement | null>(null)
|
||||
@@ -118,6 +127,11 @@ const clientsStats = ref<Record<string, ClientStat>>({})
|
||||
|
||||
const myClientId = generateUUID()
|
||||
|
||||
const computerUseSocket = useComputerUseSocket({
|
||||
onMessage: handleComputerUseMessage,
|
||||
onScreenshotRequested: captureComputerUseFrame,
|
||||
})
|
||||
|
||||
const mouseMode = ref<'absolute' | 'relative'>('absolute')
|
||||
const pressedKeys = ref<CanonicalKey[]>([])
|
||||
const keyboardLed = computed(() => ({
|
||||
@@ -617,6 +631,8 @@ const videoContainerStyle = computed(() => {
|
||||
}
|
||||
})
|
||||
|
||||
const computerUsePanelVisible = computed(() => computerUseOpen.value && !isFullscreen.value)
|
||||
|
||||
const showMsdStatusCard = computed(() => {
|
||||
return !!(systemStore.msd?.available && systemStore.hid?.backend !== 'ch9329')
|
||||
})
|
||||
@@ -677,6 +693,114 @@ async function captureFrameOverlay() {
|
||||
}
|
||||
}
|
||||
|
||||
async function captureComputerUseFrame(): Promise<ComputerUseScreenshot | null> {
|
||||
try {
|
||||
const canvas = document.createElement('canvas')
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (!ctx) return null
|
||||
|
||||
const MAX_WIDTH = 1920
|
||||
|
||||
if (videoMode.value === 'mjpeg') {
|
||||
const img = videoRef.value
|
||||
if (!img || !img.naturalWidth || !img.naturalHeight) return null
|
||||
|
||||
const scale = Math.min(1, MAX_WIDTH / img.naturalWidth)
|
||||
canvas.width = Math.max(1, Math.round(img.naturalWidth * scale))
|
||||
canvas.height = Math.max(1, Math.round(img.naturalHeight * scale))
|
||||
ctx.drawImage(img, 0, 0, canvas.width, canvas.height)
|
||||
} else {
|
||||
const video = webrtcVideoRef.value
|
||||
if (!video || !video.videoWidth || !video.videoHeight) return null
|
||||
|
||||
const scale = Math.min(1, MAX_WIDTH / video.videoWidth)
|
||||
canvas.width = Math.max(1, Math.round(video.videoWidth * scale))
|
||||
canvas.height = Math.max(1, Math.round(video.videoHeight * scale))
|
||||
ctx.drawImage(video, 0, 0, canvas.width, canvas.height)
|
||||
}
|
||||
|
||||
return {
|
||||
data_url: canvas.toDataURL('image/jpeg', 0.82),
|
||||
width: canvas.width,
|
||||
height: canvas.height,
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[ComputerUse] Failed to capture frame:', err)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function handleComputerUseMessage(message: ComputerUseServerMessage) {
|
||||
switch (message.type) {
|
||||
case 'session_updated':
|
||||
computerUseSession.value = message.session
|
||||
if (message.session.last_error) {
|
||||
pushComputerUseTimeline({ type: 'error', text: message.session.last_error })
|
||||
}
|
||||
if (message.session.final_message) {
|
||||
pushComputerUseTimeline({ type: 'assistant', text: message.session.final_message })
|
||||
}
|
||||
break
|
||||
case 'screenshot_captured':
|
||||
pushComputerUseTimeline({ type: 'screenshot', screenshot: message.screenshot })
|
||||
break
|
||||
case 'actions_executed':
|
||||
pushComputerUseTimeline({ type: 'actions_executed', actions: message.actions })
|
||||
break
|
||||
case 'error':
|
||||
pushComputerUseTimeline({ type: 'error', text: message.message })
|
||||
toast.error('Computer Use failed', { description: message.message })
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
function pushComputerUseTimeline(item: NewComputerUseTimelineItem) {
|
||||
const last = computerUseTimeline.value[computerUseTimeline.value.length - 1]
|
||||
if (last?.type === item.type) {
|
||||
if ('text' in last && 'text' in item && last.text === item.text) return
|
||||
if (last.type === 'actions_executed' && item.type === 'actions_executed' && JSON.stringify(last.actions) === JSON.stringify(item.actions)) return
|
||||
}
|
||||
computerUseTimeline.value.push({
|
||||
id: `${Date.now()}-${computerUseTimelineSeq++}`,
|
||||
...item,
|
||||
} as ComputerUseTimelineItem)
|
||||
}
|
||||
|
||||
function clearComputerUseTimeline() {
|
||||
computerUseTimeline.value = []
|
||||
computerUseConversationStarted.value = false
|
||||
}
|
||||
|
||||
async function openComputerUse() {
|
||||
computerUseOpen.value = true
|
||||
await computerUseSocket.connect().catch(() => {})
|
||||
computerUseSession.value = await computerUseApi.session().catch(() => computerUseSession.value)
|
||||
}
|
||||
|
||||
async function startComputerUse(prompt: string) {
|
||||
try {
|
||||
await computerUseSocket.connect()
|
||||
pushComputerUseTimeline({ type: 'user', text: prompt })
|
||||
computerUseSession.value = await computerUseApi.start({
|
||||
prompt,
|
||||
continue_conversation: computerUseConversationStarted.value,
|
||||
client_id: computerUseSocket.clientId,
|
||||
})
|
||||
computerUseConversationStarted.value = true
|
||||
} catch (err: any) {
|
||||
pushComputerUseTimeline({ type: 'error', text: err?.message ?? 'Computer Use start failed' })
|
||||
toast.error('Computer Use start failed', { description: err?.message })
|
||||
}
|
||||
}
|
||||
|
||||
async function stopComputerUse() {
|
||||
try {
|
||||
computerUseSession.value = await computerUseApi.stop()
|
||||
} catch (err: any) {
|
||||
toast.error('Computer Use stop failed', { description: err?.message })
|
||||
}
|
||||
}
|
||||
|
||||
function waitForVideoFirstFrame(el: HTMLVideoElement, timeoutMs = 2000): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
let done = false
|
||||
@@ -2706,6 +2830,7 @@ onUnmounted(() => {
|
||||
@reset="handleReset"
|
||||
@wol="handleWol"
|
||||
@open-terminal="openTerminal"
|
||||
@open-computer-use="openComputerUse"
|
||||
/>
|
||||
<div class="flex-1 overflow-hidden relative">
|
||||
<div
|
||||
@@ -2715,7 +2840,11 @@ onUnmounted(() => {
|
||||
background-size: 20px 20px;
|
||||
"
|
||||
/>
|
||||
<div class="relative h-full w-full flex items-center justify-center p-1 sm:p-4">
|
||||
<div class="relative flex h-full w-full min-w-0 items-stretch gap-3 p-1 sm:p-4">
|
||||
<div
|
||||
class="flex min-w-0 flex-1 items-center justify-center transition-all duration-300"
|
||||
:class="{ 'md:pr-1': computerUsePanelVisible }"
|
||||
>
|
||||
<div
|
||||
ref="videoContainerRef"
|
||||
class="relative bg-black overflow-hidden flex items-center justify-center"
|
||||
@@ -2906,6 +3035,17 @@ onUnmounted(() => {
|
||||
</div>
|
||||
</Transition>
|
||||
</div>
|
||||
</div>
|
||||
<ComputerUseSheet
|
||||
v-model:open="computerUseOpen"
|
||||
:connected="computerUseSocket.connected.value"
|
||||
:ws-error="computerUseSocket.error.value"
|
||||
:session="computerUseSession"
|
||||
:timeline="computerUseTimeline"
|
||||
@start="startComputerUse"
|
||||
@stop="stopComputerUse"
|
||||
@clear="clearComputerUseTimeline"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<Teleport :to="virtualKeyboardAttached ? '#keyboard-anchor' : 'body'" :disabled="virtualKeyboardAttached">
|
||||
|
||||
Reference in New Issue
Block a user