Files
HadTavern/agentui/api/server.py
2025-09-07 22:33:51 +03:00

589 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from fastapi import FastAPI, Request, HTTPException, Query, Header
import logging
from logging.handlers import RotatingFileHandler
import json
from urllib.parse import urlsplit, urlunsplit, parse_qsl, urlencode, unquote
from fastapi.responses import JSONResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
from typing import Any, Dict, List, Literal, Optional
from agentui.pipeline.executor import PipelineExecutor
from agentui.pipeline.defaults import default_pipeline
from agentui.pipeline.storage import load_pipeline, save_pipeline, list_presets, load_preset, save_preset
class UnifiedParams(BaseModel):
temperature: float = 0.7
max_tokens: Optional[int] = None
top_p: Optional[float] = 1.0
stop: Optional[List[str]] = None
class UnifiedMessage(BaseModel):
role: Literal["system", "user", "assistant", "tool"]
content: Any
tool_call_id: Optional[str] = None
name: Optional[str] = None
class UnifiedChatRequest(BaseModel):
vendor_format: Literal["openai", "gemini", "claude", "unknown"] = "unknown"
model: str = ""
messages: List[UnifiedMessage] = Field(default_factory=list)
tools: Optional[List[Dict[str, Any]]] = None
tool_choice: Optional[Any] = None
params: UnifiedParams = Field(default_factory=UnifiedParams)
system: Optional[str] = None
stream: bool = False
metadata: Dict[str, Any] = Field(default_factory=dict)
def detect_vendor(payload: Dict[str, Any]) -> str:
if "anthropic_version" in payload or payload.get("provider") == "anthropic":
return "claude"
# Gemini typical payload keys
if "contents" in payload or "generationConfig" in payload:
return "gemini"
# OpenAI typical keys
if "messages" in payload or "model" in payload:
return "openai"
return "unknown"
def normalize_to_unified(payload: Dict[str, Any]) -> UnifiedChatRequest:
vendor = detect_vendor(payload)
if vendor == "openai":
model = payload.get("model", "")
messages = payload.get("messages", [])
system = None
# OpenAI может иметь system в messages
norm_messages: List[UnifiedMessage] = []
for m in messages:
role = m.get("role", "user")
content = m.get("content")
if role == "system" and system is None and isinstance(content, str):
system = content
else:
norm_messages.append(UnifiedMessage(role=role, content=content))
params = UnifiedParams(
temperature=payload.get("temperature", 0.7),
max_tokens=payload.get("max_tokens"),
top_p=payload.get("top_p", 1.0),
stop=payload.get("stop"),
)
stream = bool(payload.get("stream", False))
return UnifiedChatRequest(
vendor_format="openai",
model=model,
messages=norm_messages,
params=params,
system=system,
stream=stream,
tools=payload.get("tools"),
tool_choice=payload.get("tool_choice"),
)
elif vendor == "gemini":
# Gemini → Unified (упрощённо, текст только)
model = payload.get("model", "")
contents = payload.get("contents", [])
norm_messages: List[UnifiedMessage] = []
for c in contents:
raw_role = c.get("role", "user")
# Gemini использует role: "user" и "model" — маппим "model" -> "assistant"
role = "assistant" if raw_role == "model" else (raw_role if raw_role in {"user", "system", "assistant", "tool"} else "user")
parts = c.get("parts", [])
# текстовые части склеиваем
text_parts = []
for p in parts:
if isinstance(p, dict) and "text" in p:
text_parts.append(p["text"])
content = "\n".join(text_parts) if text_parts else parts
norm_messages.append(UnifiedMessage(role=role, content=content))
gen = payload.get("generationConfig", {})
params = UnifiedParams(
temperature=gen.get("temperature", 0.7),
max_tokens=gen.get("maxOutputTokens"),
top_p=gen.get("topP", 1.0),
stop=gen.get("stopSequences"),
)
return UnifiedChatRequest(
vendor_format="gemini",
model=model,
messages=norm_messages,
params=params,
stream=False,
)
elif vendor == "claude":
model = payload.get("model", "")
system = payload.get("system")
messages = payload.get("messages", [])
norm_messages: List[UnifiedMessage] = []
for m in messages:
role = m.get("role", "user")
content_raw = m.get("content")
# Anthropic messages API: content может быть строкой или массивом блоков {type:"text", text:"..."}
if isinstance(content_raw, list):
text_parts: List[str] = []
for part in content_raw:
if isinstance(part, dict) and part.get("type") == "text" and isinstance(part.get("text"), str):
text_parts.append(part["text"])
content = "\n".join(text_parts)
else:
content = content_raw
norm_messages.append(UnifiedMessage(role=role, content=content))
params = UnifiedParams(
temperature=payload.get("temperature", 0.7),
max_tokens=payload.get("max_tokens"),
top_p=payload.get("top_p", 1.0),
stop=payload.get("stop"),
)
return UnifiedChatRequest(
vendor_format="claude",
model=model,
messages=norm_messages,
params=params,
system=system,
stream=False,
)
else:
raise HTTPException(status_code=400, detail="Unsupported or unknown vendor payload")
def build_macro_context(u: UnifiedChatRequest, incoming: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
last_user = next((m.content for m in reversed(u.messages) if m.role == "user"), "")
inc = incoming or {}
# Распарсим query-параметры (в т.ч. key для Gemini)
try:
qparams = dict(parse_qsl(inc.get("query", ""), keep_blank_values=True))
except Exception: # noqa: BLE001
qparams = {}
inc_enriched: Dict[str, Any] = dict(inc)
inc_enriched["query_params"] = qparams
# Необязательный удобный срез ключей
try:
headers = inc.get("headers") or {}
api_keys: Dict[str, Any] = {}
if isinstance(headers, dict):
api_keys["authorization"] = headers.get("authorization") or headers.get("Authorization")
api_keys["key"] = qparams.get("key")
if api_keys:
inc_enriched["api_keys"] = api_keys
except Exception: # noqa: BLE001
pass
return {
"vendor_format": u.vendor_format,
"model": u.model,
"system": u.system or "",
"chat": {
"last_user": last_user,
"messages": [m.model_dump() for m in u.messages],
},
"params": u.params.model_dump(),
"incoming": inc_enriched,
}
def jinja_render(template: str, ctx: Dict[str, Any]) -> str:
# Чтобы не тянуть Jinja2 в MVP: простая {{ key.path }} замена
def get_value(path: str, data: Dict[str, Any]) -> Any:
cur: Any = data
for part in path.split('.'):
if isinstance(cur, dict):
cur = cur.get(part, "")
else:
return ""
return cur if isinstance(cur, (str, int, float)) else ""
out = template
import re
for m in re.findall(r"\{\{\s*([^}]+)\s*\}\}", template):
expr = m.strip()
# support simple default filter: {{ path|default(value) }}
default_match = re.match(r"([^|]+)\|\s*default\((.*)\)", expr)
if default_match:
path = default_match.group(1).strip()
fallback = default_match.group(2).strip()
# strip quotes if present
if (fallback.startswith("\"") and fallback.endswith("\"")) or (fallback.startswith("'") and fallback.endswith("'")):
fallback = fallback[1:-1]
raw_val = get_value(path, ctx)
val = str(raw_val) if raw_val not in (None, "") else str(fallback)
else:
val = str(get_value(expr, ctx))
out = out.replace("{{ "+m+" }}", val).replace("{{"+m+"}}", val)
return out
async def execute_pipeline_echo(u: UnifiedChatRequest) -> Dict[str, Any]:
# Мини-пайплайн: PromptTemplate -> LLMInvoke(echo) -> VendorFormatter
macro_ctx = build_macro_context(u)
# PromptTemplate
prompt_template = "System: {{ system }}\nUser: {{ chat.last_user }}"
rendered_prompt = jinja_render(prompt_template, macro_ctx)
# LLMInvoke (echo, т.к. без реального провайдера в MVP)
llm_response_text = f"[echo by {u.model}]\n" + rendered_prompt
# VendorFormatter
if u.vendor_format == "openai":
return {
"id": "mockcmpl-123",
"object": "chat.completion",
"model": u.model,
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": llm_response_text},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 0, "completion_tokens": len(llm_response_text.split()), "total_tokens": 0},
}
if u.vendor_format == "gemini":
return {
"candidates": [
{
"content": {
"role": "model",
"parts": [{"text": llm_response_text}],
},
"finishReason": "STOP",
"index": 0,
}
],
"modelVersion": u.model,
}
if u.vendor_format == "claude":
return {
"id": "msg_mock_123",
"type": "message",
"model": u.model,
"role": "assistant",
"content": [
{"type": "text", "text": llm_response_text}
],
"stop_reason": "end_turn",
}
raise HTTPException(status_code=400, detail="Unsupported vendor for formatting")
def create_app() -> FastAPI:
app = FastAPI(title="НадTavern")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("agentui")
if not logger.handlers:
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)
file_handler = RotatingFileHandler("agentui.log", maxBytes=1_000_000, backupCount=3, encoding="utf-8")
file_handler.setLevel(logging.INFO)
logger.addHandler(stream_handler)
logger.addHandler(file_handler)
def _mask_headers(h: Dict[str, Any]) -> Dict[str, Any]:
# Временно отключаем маскировку Authorization для отладки
hidden = {"x-api-key", "cookie"}
masked: Dict[str, Any] = {}
for k, v in h.items():
lk = k.lower()
if lk in hidden:
masked[k] = "***"
else:
masked[k] = v
return masked
def _sanitize_url(url: str) -> str:
try:
parts = urlsplit(url)
qs = parse_qsl(parts.query, keep_blank_values=True)
qs_masked = [(k, "***" if k.lower() in {"key", "access_token", "token"} else v) for k, v in qs]
return urlunsplit((parts.scheme, parts.netloc, parts.path, urlencode(qs_masked), parts.fragment))
except Exception: # noqa: BLE001
return url
async def _log_request(req: Request, raw_body: Optional[bytes] = None, parsed: Optional[Any] = None) -> None:
try:
url = _sanitize_url(str(req.url))
headers = _mask_headers(dict(req.headers))
body_preview = None
if raw_body is not None:
body_preview = raw_body.decode(errors="ignore")
if len(body_preview) > 4000:
body_preview = body_preview[:4000] + "...<truncated>"
payload = {
"event": "incoming_request",
"method": req.method,
"url": url,
"headers": headers,
"body": body_preview,
"json": parsed if isinstance(parsed, (dict, list)) else None,
}
logger.info("%s", json.dumps(payload, ensure_ascii=False))
except Exception: # noqa: BLE001
pass
async def _log_response(req: Request, status: int, data: Any) -> None:
try:
payload = {
"event": "outgoing_response",
"method": req.method,
"path": req.url.path,
"status": status,
"json": data if isinstance(data, (dict, list)) else None,
}
logger.info("%s", json.dumps(payload, ensure_ascii=False))
except Exception: # noqa: BLE001
pass
@app.get("/")
async def index() -> HTMLResponse:
html = (
"<html><head><title>НадTavern</title></head>"
"<body>"
"<h1>НадTavern</h1>"
"<p>Простой UI и API запущены.</p>"
"<p>POST /v1/chat/completions — универсальный эндпоинт (без стриминга)."
" Поддерживает OpenAI/Gemini/Claude формы. Возвращает в исходном формате.</p>"
"<p><a href='/ui'>Перейти в UI</a></p>"
"</body></html>"
)
return HTMLResponse(html)
@app.post("/v1/chat/completions")
async def chat_completions(request: Request) -> JSONResponse:
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
await _log_request(request, raw_body=raw, parsed=payload)
unified = normalize_to_unified(payload)
unified.stream = False # по требованию MVP без стриминга
# контекст для пайплайна
incoming = {
"method": request.method,
"url": _sanitize_url(str(request.url)),
"path": request.url.path,
"query": request.url.query,
"headers": dict(request.headers),
"json": payload,
}
macro_ctx = build_macro_context(unified, incoming=incoming)
pipeline = load_pipeline()
executor = PipelineExecutor(pipeline)
last = await executor.run(macro_ctx)
result = last.get("result") or await execute_pipeline_echo(unified)
await _log_response(request, 200, result)
return JSONResponse(result)
# Google AI Studio совместимые роуты (Gemini):
# POST /v1beta/models/{model}:generateContent?key=...
# POST /v1/models/{model}:generateContent?key=...
@app.post("/v1beta/models/{model}:generateContent")
async def gemini_generate_content_v1beta(model: str, request: Request, key: Optional[str] = Query(default=None)) -> JSONResponse: # noqa: ARG001
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
# Убедимся, что модель присутствует в полезной нагрузке
if not isinstance(payload, dict):
raise HTTPException(status_code=400, detail="Invalid payload type")
payload = {**payload, "model": model}
await _log_request(request, raw_body=raw, parsed=payload)
unified = normalize_to_unified(payload)
unified.stream = False
incoming = {
"method": request.method,
"url": _sanitize_url(str(request.url)),
"path": request.url.path,
"query": request.url.query,
"headers": dict(request.headers),
"json": payload,
}
macro_ctx = build_macro_context(unified, incoming=incoming)
pipeline = load_pipeline()
executor = PipelineExecutor(pipeline)
last = await executor.run(macro_ctx)
result = last.get("result") or await execute_pipeline_echo(unified)
await _log_response(request, 200, result)
return JSONResponse(result)
@app.post("/v1/models/{model}:generateContent")
async def gemini_generate_content_v1(model: str, request: Request, key: Optional[str] = Query(default=None)) -> JSONResponse: # noqa: ARG001
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
if not isinstance(payload, dict):
raise HTTPException(status_code=400, detail="Invalid payload type")
payload = {**payload, "model": model}
await _log_request(request, raw_body=raw, parsed=payload)
unified = normalize_to_unified(payload)
unified.stream = False
incoming = {
"method": request.method,
"url": _sanitize_url(str(request.url)),
"path": request.url.path,
"query": request.url.query,
"headers": dict(request.headers),
"json": payload,
}
macro_ctx = build_macro_context(unified, incoming=incoming)
pipeline = load_pipeline()
executor = PipelineExecutor(pipeline)
last = await executor.run(macro_ctx)
result = last.get("result") or await execute_pipeline_echo(unified)
await _log_response(request, 200, result)
return JSONResponse(result)
# Catch-all для случаев, когда двоеточие в пути закодировано как %3A
@app.post("/v1beta/models/{rest_of_path:path}")
async def gemini_generate_content_v1beta_catchall(rest_of_path: str, request: Request, key: Optional[str] = Query(default=None)) -> JSONResponse: # noqa: ARG001
decoded = unquote(rest_of_path)
if ":generateContent" not in decoded:
raise HTTPException(status_code=404, detail="Not Found")
model = decoded.split(":generateContent", 1)[0]
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
if not isinstance(payload, dict):
raise HTTPException(status_code=400, detail="Invalid payload type")
payload = {**payload, "model": model}
await _log_request(request, raw_body=raw, parsed=payload)
unified = normalize_to_unified(payload)
unified.stream = False
incoming = {
"method": request.method,
"url": _sanitize_url(str(request.url)),
"path": request.url.path,
"query": request.url.query,
"headers": dict(request.headers),
"json": payload,
}
macro_ctx = build_macro_context(unified, incoming=incoming)
pipeline = load_pipeline()
executor = PipelineExecutor(pipeline)
last = await executor.run(macro_ctx)
result = last.get("result") or await execute_pipeline_echo(unified)
await _log_response(request, 200, result)
return JSONResponse(result)
@app.post("/v1/models/{rest_of_path:path}")
async def gemini_generate_content_v1_catchall(rest_of_path: str, request: Request, key: Optional[str] = Query(default=None)) -> JSONResponse: # noqa: ARG001
decoded = unquote(rest_of_path)
if ":generateContent" not in decoded:
raise HTTPException(status_code=404, detail="Not Found")
model = decoded.split(":generateContent", 1)[0]
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
if not isinstance(payload, dict):
raise HTTPException(status_code=400, detail="Invalid payload type")
payload = {**payload, "model": model}
await _log_request(request, raw_body=raw, parsed=payload)
unified = normalize_to_unified(payload)
unified.stream = False
incoming = {
"method": request.method,
"url": _sanitize_url(str(request.url)),
"path": request.url.path,
"query": request.url.query,
"headers": dict(request.headers),
"json": payload,
}
macro_ctx = build_macro_context(unified, incoming=incoming)
pipeline = load_pipeline()
executor = PipelineExecutor(pipeline)
last = await executor.run(macro_ctx)
result = last.get("result") or await execute_pipeline_echo(unified)
await _log_response(request, 200, result)
return JSONResponse(result)
# Anthropic Claude messages endpoint compatibility
@app.post("/v1/messages")
async def claude_messages(request: Request, anthropic_version: Optional[str] = Header(default=None)) -> JSONResponse: # noqa: ARG001
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
if not isinstance(payload, dict):
raise HTTPException(status_code=400, detail="Invalid payload type")
# Помечаем как Anthropic, передаём версию из заголовка в payload для детекции
if anthropic_version:
payload = {**payload, "anthropic_version": anthropic_version}
else:
payload = {**payload, "anthropic_version": payload.get("anthropic_version", "2023-06-01")}
await _log_request(request, raw_body=raw, parsed=payload)
unified = normalize_to_unified(payload)
unified.stream = False
incoming = {
"method": request.method,
"url": _sanitize_url(str(request.url)),
"path": request.url.path,
"query": request.url.query,
"headers": dict(request.headers),
"json": payload,
}
macro_ctx = build_macro_context(unified, incoming=incoming)
pipeline = load_pipeline()
executor = PipelineExecutor(pipeline)
last = await executor.run(macro_ctx)
result = last.get("result") or await execute_pipeline_echo(unified)
await _log_response(request, 200, result)
return JSONResponse(result)
app.mount("/ui", StaticFiles(directory="static", html=True), name="ui")
# Admin API для пайплайна
@app.get("/admin/pipeline")
async def get_pipeline() -> JSONResponse:
return JSONResponse(load_pipeline())
@app.post("/admin/pipeline")
async def set_pipeline(request: Request) -> JSONResponse:
raw = await request.body()
try:
pipeline = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
# простая проверка
if not isinstance(pipeline, dict) or "nodes" not in pipeline:
raise HTTPException(status_code=400, detail="Invalid pipeline format")
save_pipeline(pipeline)
return JSONResponse({"ok": True})
# Presets
@app.get("/admin/presets")
async def get_presets() -> JSONResponse:
return JSONResponse({"items": list_presets()})
@app.get("/admin/presets/{name}")
async def get_preset(name: str) -> JSONResponse:
try:
return JSONResponse(load_preset(name))
except FileNotFoundError:
raise HTTPException(status_code=404, detail="Preset not found")
@app.post("/admin/presets/{name}")
async def put_preset(name: str, request: Request) -> JSONResponse:
raw = await request.body()
try:
payload = json.loads(raw or b"{}")
except Exception: # noqa: BLE001
raise HTTPException(status_code=400, detail="Invalid JSON")
if not isinstance(payload, dict) or "nodes" not in payload:
raise HTTPException(status_code=400, detail="Invalid pipeline format")
save_preset(name, payload)
return JSONResponse({"ok": True})
return app
app = create_app()