FastAPI:现代 Python Web 框架
pip install fastapi uvicorn[standard]
最小例子:
# main.py
from fastapi import FastAPI
app = FastAPI()
@app.get("/")
def hello():
return {"message": "hello"}
@app.get("/users/{uid}")
def get_user(uid: int):
return {"uid": uid}
uvicorn main:app --reload
# 访问 http://localhost:8000
# 自动生成 API 文档 http://localhost:8000/docs
Pydantic:自动验证请求体
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class ChatRequest(BaseModel):
message: str
temperature: float = 0.7
max_tokens: int = 1024
class ChatResponse(BaseModel):
answer: str
tokens_used: int
@app.post("/chat", response_model=ChatResponse)
def chat(req: ChatRequest):
# 自动验证 body 是合法的 ChatRequest
answer = call_llm(req.message, req.temperature, req.max_tokens)
return ChatResponse(answer=answer, tokens_used=100)
非法请求 → 自动返回 422 + 清晰错误信息。
包装 Claude API 成自己的服务
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from anthropic import AsyncAnthropic
import os
app = FastAPI()
client = AsyncAnthropic()
class ChatRequest(BaseModel):
message: str
system: str = "你是助手"
class ChatResponse(BaseModel):
answer: str
@app.post("/chat")
async def chat(req: ChatRequest) -> ChatResponse:
try:
msg = await client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=req.system,
messages=[{"role": "user", "content": req.message}],
)
return ChatResponse(answer=msg.content[0].text)
except Exception as e:
raise HTTPException(500, str(e))
流式响应(前端体验关键)
from fastapi.responses import StreamingResponse
@app.post("/chat/stream")
async def chat_stream(req: ChatRequest):
async def generate():
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": req.message}],
) as stream:
async for text in stream.text_stream:
yield text
return StreamingResponse(generate(), media_type="text/plain")
前端用 fetch + getReader() 逐块接收——打字机效果。
加 API key 鉴权
from fastapi import Depends, HTTPException, Header
API_KEYS = {"key-abc123", "key-def456"}
def require_key(x_api_key: str = Header(None)):
if x_api_key not in API_KEYS:
raise HTTPException(401, "无效的 API key")
@app.post("/chat", dependencies=[Depends(require_key)])
def chat(req: ChatRequest): ...
限流(rate limit)
pip install slowapi
from slowapi import Limiter
from slowapi.util import get_remote_address
limiter = Limiter(key_func=get_remote_address)
app.state.limiter = limiter
@app.post("/chat")
@limiter.limit("10/minute")
def chat(req: ChatRequest): ...
部署:用 uvicorn + Gunicorn
pip install gunicorn
gunicorn main:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
-w 4 = 4 个工作进程。
Docker 化
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["gunicorn", "main:app", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8000"]
docker build -t ai-api .
docker run -p 8000:8000 -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY ai-api
部署到云
| 平台 | 适合 |
|---|---|
| Railway / Render | 一键部署,零配置 |
| Fly.io | 全球边缘节点 |
| Vercel / Cloudflare Workers | Serverless(注意冷启动) |
| AWS Lambda + API Gateway | 按调用付费 |
| 自购 VPS + Nginx | 最便宜(也最折腾) |
监控 / 日志
import logging
logger = logging.getLogger(__name__)
@app.post("/chat")
async def chat(req: ChatRequest):
logger.info("请求: user=%s, msg_len=%d", req.user_id, len(req.message))
try:
...
except Exception:
logger.exception("chat 失败")
生产再加 Sentry / Datadog / Prometheus——这些超出本篇范围。
一份生产级别的最小骨架
# main.py
from fastapi import FastAPI, Depends, HTTPException, Header
from pydantic import BaseModel
from anthropic import AsyncAnthropic
import os, logging
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
app = FastAPI(title="AI Chat API")
client = AsyncAnthropic()
class ChatRequest(BaseModel):
message: str
user_id: str
class ChatResponse(BaseModel):
answer: str
def require_key(x_api_key: str = Header(None)):
if x_api_key != os.getenv("API_KEY"):
raise HTTPException(401)
@app.post("/chat", response_model=ChatResponse, dependencies=[Depends(require_key)])
async def chat(req: ChatRequest):
log.info("user=%s len=%d", req.user_id, len(req.message))
msg = await client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": req.message}],
)
return ChatResponse(answer=msg.content[0].text)
@app.get("/health")
def health():
return {"ok": True}
跑起来后前端 / 移动端都能直接用。
下一篇是最后一篇:评估与监控。