mirror of
https://github.com/Piebald-AI/claude-code-system-prompts.git
synced 2026-05-30 21:54:18 +08:00
390 lines
10 KiB
Markdown
390 lines
10 KiB
Markdown
<!--
|
|
name: 'Data: Claude API reference — Python'
|
|
description: Python SDK reference including installation, client initialization, basic requests, thinking, and multi-turn conversation
|
|
ccVersion: 2.1.51
|
|
-->
|
|
# Claude API — Python
|
|
|
|
## Installation
|
|
|
|
\`\`\`bash
|
|
pip install anthropic
|
|
\`\`\`
|
|
|
|
## Client Initialization
|
|
|
|
\`\`\`python
|
|
import anthropic
|
|
|
|
# Default (uses ANTHROPIC_API_KEY env var)
|
|
client = anthropic.Anthropic()
|
|
|
|
# Explicit API key
|
|
client = anthropic.Anthropic(api_key="your-api-key")
|
|
|
|
# Async client
|
|
async_client = anthropic.AsyncAnthropic()
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Basic Message Request
|
|
|
|
\`\`\`python
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=1024,
|
|
messages=[
|
|
{"role": "user", "content": "What is the capital of France?"}
|
|
]
|
|
)
|
|
print(response.content[0].text)
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## System Prompts
|
|
|
|
\`\`\`python
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=1024,
|
|
system="You are a helpful coding assistant. Always provide examples in Python.",
|
|
messages=[{"role": "user", "content": "How do I read a JSON file?"}]
|
|
)
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Vision (Images)
|
|
|
|
### Base64
|
|
|
|
\`\`\`python
|
|
import base64
|
|
|
|
with open("image.png", "rb") as f:
|
|
image_data = base64.standard_b64encode(f.read()).decode("utf-8")
|
|
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=1024,
|
|
messages=[{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "base64",
|
|
"media_type": "image/png",
|
|
"data": image_data
|
|
}
|
|
},
|
|
{"type": "text", "text": "What's in this image?"}
|
|
]
|
|
}]
|
|
)
|
|
\`\`\`
|
|
|
|
### URL
|
|
|
|
\`\`\`python
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=1024,
|
|
messages=[{
|
|
"role": "user",
|
|
"content": [
|
|
{
|
|
"type": "image",
|
|
"source": {
|
|
"type": "url",
|
|
"url": "https://example.com/image.png"
|
|
}
|
|
},
|
|
{"type": "text", "text": "Describe this image"}
|
|
]
|
|
}]
|
|
)
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Prompt Caching
|
|
|
|
Cache large context to reduce costs (up to 90% savings).
|
|
|
|
\`\`\`python
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=1024,
|
|
system=[{
|
|
"type": "text",
|
|
"text": "You are an expert on this large document...",
|
|
"cache_control": {"type": "ephemeral"} # default TTL is 5 minutes
|
|
}],
|
|
messages=[{"role": "user", "content": "Summarize the key points"}]
|
|
)
|
|
|
|
# With explicit TTL (time-to-live)
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=1024,
|
|
system=[{
|
|
"type": "text",
|
|
"text": "You are an expert on this large document...",
|
|
"cache_control": {"type": "ephemeral", "ttl": "1h"} # 1 hour TTL
|
|
}],
|
|
messages=[{"role": "user", "content": "Summarize the key points"}]
|
|
)
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Extended Thinking
|
|
|
|
> **Opus 4.6 and Sonnet 4.6:** Use adaptive thinking. \`budget_tokens\` is deprecated on both Opus 4.6 and Sonnet 4.6.
|
|
> **Older models:** Use \`thinking: {type: "enabled", budget_tokens: N}\` (must be < \`max_tokens\`, min 1024).
|
|
|
|
\`\`\`python
|
|
# Opus 4.6: adaptive thinking (recommended)
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6",
|
|
max_tokens=16000,
|
|
thinking={"type": "adaptive"},
|
|
output_config={"effort": "high"}, # low | medium | high | max
|
|
messages=[{"role": "user", "content": "Solve this step by step..."}]
|
|
)
|
|
|
|
# Access thinking and response
|
|
for block in response.content:
|
|
if block.type == "thinking":
|
|
print(f"Thinking: {block.thinking}")
|
|
elif block.type == "text":
|
|
print(f"Response: {block.text}")
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Error Handling
|
|
|
|
\`\`\`python
|
|
import anthropic
|
|
|
|
try:
|
|
response = client.messages.create(...)
|
|
except anthropic.BadRequestError as e:
|
|
print(f"Bad request: {e.message}")
|
|
except anthropic.AuthenticationError:
|
|
print("Invalid API key")
|
|
except anthropic.PermissionDeniedError:
|
|
print("API key lacks required permissions")
|
|
except anthropic.NotFoundError:
|
|
print("Invalid model or endpoint")
|
|
except anthropic.RateLimitError as e:
|
|
retry_after = int(e.response.headers.get("retry-after", "60"))
|
|
print(f"Rate limited. Retry after {retry_after}s.")
|
|
except anthropic.APIStatusError as e:
|
|
if e.status_code >= 500:
|
|
print(f"Server error ({e.status_code}). Retry later.")
|
|
else:
|
|
print(f"API error: {e.message}")
|
|
except anthropic.APIConnectionError:
|
|
print("Network error. Check internet connection.")
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Multi-Turn Conversations
|
|
|
|
The API is stateless — send the full conversation history each time.
|
|
|
|
\`\`\`python
|
|
class ConversationManager:
|
|
"""Manage multi-turn conversations with the Claude API."""
|
|
|
|
def __init__(self, client: anthropic.Anthropic, model: str, system: str = None):
|
|
self.client = client
|
|
self.model = model
|
|
self.system = system
|
|
self.messages = []
|
|
|
|
def send(self, user_message: str, **kwargs) -> str:
|
|
"""Send a message and get a response."""
|
|
self.messages.append({"role": "user", "content": user_message})
|
|
|
|
response = self.client.messages.create(
|
|
model=self.model,
|
|
max_tokens=kwargs.get("max_tokens", 1024),
|
|
system=self.system,
|
|
messages=self.messages,
|
|
**kwargs
|
|
)
|
|
|
|
assistant_message = response.content[0].text
|
|
self.messages.append({"role": "assistant", "content": assistant_message})
|
|
|
|
return assistant_message
|
|
|
|
# Usage
|
|
conversation = ConversationManager(
|
|
client=anthropic.Anthropic(),
|
|
model="claude-opus-4-6",
|
|
system="You are a helpful assistant."
|
|
)
|
|
|
|
response1 = conversation.send("My name is Alice.")
|
|
response2 = conversation.send("What's my name?") # Claude remembers "Alice"
|
|
\`\`\`
|
|
|
|
**Rules:**
|
|
|
|
- Messages must alternate between \`user\` and \`assistant\`
|
|
- First message must be \`user\`
|
|
|
|
---
|
|
|
|
### Compaction (long conversations)
|
|
|
|
> **Beta, Opus 4.6 only.** When conversations approach the 200K context window, compaction automatically summarizes earlier context server-side. The API returns a \`compaction\` block; you must pass it back on subsequent requests — append \`response.content\`, not just the text.
|
|
|
|
\`\`\`python
|
|
import anthropic
|
|
|
|
client = anthropic.Anthropic()
|
|
messages = []
|
|
|
|
def chat(user_message: str) -> str:
|
|
messages.append({"role": "user", "content": user_message})
|
|
|
|
response = client.beta.messages.create(
|
|
betas=["compact-2026-01-12"],
|
|
model="claude-opus-4-6",
|
|
max_tokens=4096,
|
|
messages=messages,
|
|
context_management={
|
|
"edits": [{"type": "compact_20260112"}]
|
|
}
|
|
)
|
|
|
|
# Append full content — compaction blocks must be preserved
|
|
messages.append({"role": "assistant", "content": response.content})
|
|
|
|
return next(block.text for block in response.content if block.type == "text")
|
|
|
|
# Compaction triggers automatically when context grows large
|
|
print(chat("Help me build a Python web scraper"))
|
|
print(chat("Add support for JavaScript-rendered pages"))
|
|
print(chat("Now add rate limiting and error handling"))
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Stop Reasons
|
|
|
|
The \`stop_reason\` field in the response indicates why the model stopped generating:
|
|
|
|
| Value | Meaning |
|
|
|-------|---------|
|
|
| \`end_turn\` | Claude finished its response naturally |
|
|
| \`max_tokens\` | Hit the \`max_tokens\` limit — increase it or use streaming |
|
|
| \`stop_sequence\` | Hit a custom stop sequence |
|
|
| \`tool_use\` | Claude wants to call a tool — execute it and continue |
|
|
| \`pause_turn\` | Model paused and can be resumed (agentic flows) |
|
|
| \`refusal\` | Claude refused for safety reasons — output may not match your schema |
|
|
|
|
---
|
|
|
|
## Cost Optimization Strategies
|
|
|
|
### 1. Use Prompt Caching for Repeated Context
|
|
|
|
\`\`\`python
|
|
# Cache large system prompts or documents
|
|
system_with_cache = [{
|
|
"type": "text",
|
|
"text": large_document_text, # e.g., 50KB of context
|
|
"cache_control": {"type": "ephemeral"} # add "ttl": "1h" for longer caching
|
|
}]
|
|
|
|
# First request: full cost
|
|
# Subsequent requests: ~90% cheaper for cached portion
|
|
\`\`\`
|
|
|
|
### 2. Choose the Right Model
|
|
|
|
\`\`\`python
|
|
# Default to Opus for most tasks
|
|
response = client.messages.create(
|
|
model="claude-opus-4-6", # $5.00/$25.00 per 1M tokens
|
|
max_tokens=1024,
|
|
messages=[{"role": "user", "content": "Explain quantum computing"}]
|
|
)
|
|
|
|
# Use Sonnet for high-volume production workloads
|
|
standard_response = client.messages.create(
|
|
model="claude-sonnet-4-6", # $3.00/$15.00 per 1M tokens
|
|
max_tokens=1024,
|
|
messages=[{"role": "user", "content": "Summarize this document"}]
|
|
)
|
|
|
|
# Use Haiku only for simple, speed-critical tasks
|
|
simple_response = client.messages.create(
|
|
model="claude-haiku-4-5", # $1.00/$5.00 per 1M tokens
|
|
max_tokens=256,
|
|
messages=[{"role": "user", "content": "Classify this as positive or negative"}]
|
|
)
|
|
\`\`\`
|
|
|
|
### 3. Use Token Counting Before Requests
|
|
|
|
\`\`\`python
|
|
count_response = client.messages.count_tokens(
|
|
model="claude-opus-4-6",
|
|
messages=messages,
|
|
system=system
|
|
)
|
|
|
|
estimated_input_cost = count_response.input_tokens * 0.000005 # $5/1M tokens
|
|
print(f"Estimated input cost: \${estimated_input_cost:.4f}")
|
|
\`\`\`
|
|
|
|
---
|
|
|
|
## Retry with Exponential Backoff
|
|
|
|
> **Note:** The Anthropic SDK automatically retries rate limit (429) and server errors (5xx) with exponential backoff. You can configure this with \`max_retries\` (default: 2). Only implement custom retry logic if you need behavior beyond what the SDK provides.
|
|
|
|
\`\`\`python
|
|
import time
|
|
import random
|
|
import anthropic
|
|
|
|
def call_with_retry(
|
|
client: anthropic.Anthropic,
|
|
max_retries: int = 5,
|
|
base_delay: float = 1.0,
|
|
max_delay: float = 60.0,
|
|
**kwargs
|
|
):
|
|
"""Call the API with exponential backoff retry."""
|
|
last_exception = None
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
return client.messages.create(**kwargs)
|
|
except anthropic.RateLimitError as e:
|
|
last_exception = e
|
|
except anthropic.APIStatusError as e:
|
|
if e.status_code >= 500:
|
|
last_exception = e
|
|
else:
|
|
raise # Client errors (4xx except 429) should not be retried
|
|
|
|
delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay)
|
|
print(f"Retry {attempt + 1}/{max_retries} after {delay:.1f}s")
|
|
time.sleep(delay)
|
|
|
|
raise last_exception
|
|
\`\`\`
|