Chat Prefix Completion Guide
Learn how to use prefix completion to guide AI responses and create more controlled, structured outputs with DeepSeek's chat models.
Overview
Prefix completion allows you to:
- Control response format: Guide the AI to start responses in a specific way
- Ensure consistency: Maintain consistent output structures
- Improve accuracy: Reduce hallucination by providing context
- Create templates: Build reusable response patterns
What is Prefix Completion?
Prefix completion involves providing the beginning of the AI's response as part of your prompt, allowing you to:
- Set the tone: Start responses with specific phrases or styles
- Structure output: Begin with formatting like JSON, XML, or markdown
- Guide reasoning: Start with "Let me think step by step..."
- Ensure format: Begin with specific data structures
Basic Usage
Simple Prefix Example
python
from openai import OpenAI
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.deepseek.com/v1"
)
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": "Explain quantum computing."
},
{
"role": "assistant",
"content": "Quantum computing is a revolutionary technology that"
}
],
max_tokens=200
)
print(response.choices[0].message.content)
JSON Format Prefix
python
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": "Analyze the sentiment of this text: 'I love this product!'"
},
{
"role": "assistant",
"content": "{\n \"sentiment\": \""
}
],
max_tokens=100
)
# Output will continue the JSON structure
print(response.choices[0].message.content)
# Expected: "positive",
# "confidence": 0.95,
# "explanation": "The text expresses strong positive emotion..."
# }
Advanced Techniques
Structured Data Generation
Creating Consistent JSON Responses
python
def generate_structured_analysis(text: str):
"""Generate structured sentiment analysis"""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "system",
"content": "You are a sentiment analysis expert. Always respond in valid JSON format."
},
{
"role": "user",
"content": f"Analyze the sentiment of this text: '{text}'"
},
{
"role": "assistant",
"content": "{\n \"text\": \"" + text + "\",\n \"sentiment\": \""
}
],
max_tokens=200,
temperature=0.3
)
return response.choices[0].message.content
# Usage
result = generate_structured_analysis("The weather is absolutely beautiful today!")
print(result)
XML Format Responses
python
def generate_xml_summary(article: str):
"""Generate article summary in XML format"""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": f"Summarize this article: {article}"
},
{
"role": "assistant",
"content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<summary>\n <title>"
}
],
max_tokens=300
)
return response.choices[0].message.content
Step-by-Step Reasoning
python
def guided_problem_solving(problem: str):
"""Guide the AI through step-by-step problem solving"""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": f"Solve this problem: {problem}"
},
{
"role": "assistant",
"content": "Let me solve this step by step:\n\nStep 1:"
}
],
max_tokens=500
)
return response.choices[0].message.content
# Example usage
problem = "If a train travels 120 km in 2 hours, what is its average speed?"
solution = guided_problem_solving(problem)
print(solution)
Code Generation with Prefixes
python
def generate_function_with_prefix(description: str, language: str):
"""Generate code with specific formatting"""
if language.lower() == "python":
prefix = "def "
elif language.lower() == "javascript":
prefix = "function "
elif language.lower() == "java":
prefix = "public static "
else:
prefix = ""
response = client.chat.completions.create(
model="deepseek-coder",
messages=[
{
"role": "user",
"content": f"Write a {language} function that {description}"
},
{
"role": "assistant",
"content": prefix
}
],
max_tokens=300
)
return prefix + response.choices[0].message.content
# Usage
code = generate_function_with_prefix(
"calculates the factorial of a number",
"python"
)
print(code)
Use Cases and Patterns
1. Data Extraction
python
def extract_contact_info(text: str):
"""Extract contact information in structured format"""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": f"Extract contact information from this text: {text}"
},
{
"role": "assistant",
"content": "Contact Information:\n- Name: "
}
],
max_tokens=200
)
return response.choices[0].message.content
# Example
text = "John Smith can be reached at john.smith@email.com or call him at (555) 123-4567"
contacts = extract_contact_info(text)
print(contacts)
2. Report Generation
python
def generate_report_section(data: dict, section: str):
"""Generate specific report sections with consistent formatting"""
prefixes = {
"executive_summary": "## Executive Summary\n\nThis report presents ",
"methodology": "## Methodology\n\nThe analysis was conducted using ",
"findings": "## Key Findings\n\n1. ",
"recommendations": "## Recommendations\n\nBased on our analysis, we recommend "
}
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": f"Generate a {section} section for a report based on this data: {data}"
},
{
"role": "assistant",
"content": prefixes.get(section, "")
}
],
max_tokens=400
)
return response.choices[0].message.content
3. API Response Formatting
python
def format_api_response(query: str, response_type: str):
"""Format responses for different API endpoints"""
prefixes = {
"success": '{\n "status": "success",\n "data": ',
"error": '{\n "status": "error",\n "error_code": ',
"paginated": '{\n "status": "success",\n "page": 1,\n "total_pages": ',
"list": '{\n "status": "success",\n "items": ['
}
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": f"Create an API response for: {query}"
},
{
"role": "assistant",
"content": prefixes.get(response_type, "")
}
],
max_tokens=300
)
return response.choices[0].message.content
4. Educational Content
python
def create_lesson_plan(topic: str, grade_level: str):
"""Create structured lesson plans"""
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": f"Create a lesson plan about {topic} for {grade_level} students"
},
{
"role": "assistant",
"content": f"# Lesson Plan: {topic}\n\n**Grade Level:** {grade_level}\n\n## Learning Objectives\nBy the end of this lesson, students will be able to:\n1. "
}
],
max_tokens=500
)
return response.choices[0].message.content
Best Practices
1. Choose Appropriate Prefixes
python
# Good prefixes - specific and helpful
good_prefixes = [
"Based on the data provided, ",
"{\n \"result\": ",
"Step 1: Identify the problem\n",
"## Summary\n\nThe main points are:\n1. "
]
# Avoid vague prefixes
avoid_prefixes = [
"Well, ",
"I think ",
"Maybe ",
"It seems like "
]
2. Maintain Consistency
python
class ResponseFormatter:
"""Consistent response formatting using prefixes"""
def __init__(self, client):
self.client = client
self.prefixes = {
"analysis": "## Analysis Results\n\n**Key Findings:**\n- ",
"summary": "## Summary\n\nThis document covers ",
"recommendation": "## Recommendation\n\nBased on the analysis, I recommend ",
"explanation": "## Explanation\n\nTo understand this concept, let's break it down:\n\n1. "
}
def format_response(self, content: str, format_type: str):
prefix = self.prefixes.get(format_type, "")
response = self.client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": content
},
{
"role": "assistant",
"content": prefix
}
],
max_tokens=400
)
return prefix + response.choices[0].message.content
3. Handle Edge Cases
python
def safe_prefix_completion(prompt: str, prefix: str, max_retries: int = 3):
"""Safely handle prefix completion with retries"""
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": prompt
},
{
"role": "assistant",
"content": prefix
}
],
max_tokens=300,
temperature=0.3
)
result = prefix + response.choices[0].message.content
# Validate the result
if len(result.strip()) > len(prefix.strip()):
return result
else:
print(f"Attempt {attempt + 1}: Response too short, retrying...")
except Exception as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt == max_retries - 1:
raise
return None
4. Validate Output Format
python
import json
import re
def validate_json_response(response: str) -> bool:
"""Validate that response is valid JSON"""
try:
json.loads(response)
return True
except json.JSONDecodeError:
return False
def validate_markdown_structure(response: str) -> bool:
"""Validate markdown structure"""
# Check for headers
if not re.search(r'^#+\s', response, re.MULTILINE):
return False
return True
def validate_xml_structure(response: str) -> bool:
"""Validate XML structure"""
try:
import xml.etree.ElementTree as ET
ET.fromstring(response)
return True
except ET.ParseError:
return False
Advanced Patterns
1. Multi-Step Prefixes
python
def multi_step_analysis(data: str):
"""Perform analysis with multiple guided steps"""
steps = [
"## Data Analysis Report\n\n### Step 1: Data Overview\n",
"### Step 2: Pattern Identification\n",
"### Step 3: Statistical Analysis\n",
"### Step 4: Conclusions and Recommendations\n"
]
full_response = ""
conversation = [
{
"role": "user",
"content": f"Analyze this data comprehensively: {data}"
}
]
for i, step_prefix in enumerate(steps):
conversation.append({
"role": "assistant",
"content": step_prefix
})
response = client.chat.completions.create(
model="deepseek-chat",
messages=conversation,
max_tokens=200
)
step_content = step_prefix + response.choices[0].message.content
full_response += step_content + "\n\n"
# Update conversation with the complete step
conversation[-1]["content"] = step_content
return full_response
2. Conditional Prefixes
python
def conditional_response_format(query: str, user_type: str):
"""Use different prefixes based on user type"""
prefixes = {
"technical": "## Technical Analysis\n\n**Implementation Details:**\n",
"business": "## Business Impact Summary\n\n**Key Points:**\n- ",
"beginner": "## Simple Explanation\n\nLet me explain this in simple terms:\n",
"expert": "## Advanced Analysis\n\n**Technical Specifications:**\n"
}
prefix = prefixes.get(user_type, "")
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": query
},
{
"role": "assistant",
"content": prefix
}
],
max_tokens=400
)
return prefix + response.choices[0].message.content
3. Template-Based Generation
python
class TemplateGenerator:
"""Generate content using predefined templates with prefixes"""
def __init__(self, client):
self.client = client
self.templates = {
"email": {
"formal": "Subject: {subject}\n\nDear {recipient},\n\nI hope this email finds you well. ",
"casual": "Hi {recipient}!\n\nHope you're doing great! ",
"follow_up": "Subject: Follow-up on {subject}\n\nHi {recipient},\n\nI wanted to follow up on "
},
"report": {
"executive": "# Executive Report: {title}\n\n## Executive Summary\n\n",
"technical": "# Technical Report: {title}\n\n## Overview\n\nThis technical analysis covers ",
"progress": "# Progress Report: {title}\n\n## Current Status\n\nAs of {date}, the project status is "
}
}
def generate_content(self, template_type: str, template_style: str,
content_request: str, **kwargs):
"""Generate content using templates"""
template = self.templates.get(template_type, {}).get(template_style, "")
prefix = template.format(**kwargs)
response = self.client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": content_request
},
{
"role": "assistant",
"content": prefix
}
],
max_tokens=500
)
return prefix + response.choices[0].message.content
# Usage
generator = TemplateGenerator(client)
email = generator.generate_content(
"email", "formal",
"Write an email about the quarterly meeting",
subject="Q4 Planning Meeting",
recipient="Team"
)
Troubleshooting
Common Issues
- Incomplete responses: Increase max_tokens
- Format inconsistency: Use more specific prefixes
- Repetitive content: Adjust temperature settings
- Invalid JSON/XML: Add validation and retry logic
Debugging Tips
python
def debug_prefix_completion(prompt: str, prefix: str):
"""Debug prefix completion issues"""
print(f"Prompt: {prompt}")
print(f"Prefix: {repr(prefix)}")
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{
"role": "user",
"content": prompt
},
{
"role": "assistant",
"content": prefix
}
],
max_tokens=200,
temperature=0.3
)
result = prefix + response.choices[0].message.content
print(f"Full response: {repr(result)}")
print(f"Response length: {len(result)}")
return result
Performance Considerations
Token Efficiency
python
def optimize_prefix_tokens(prefix: str, max_prefix_tokens: int = 50):
"""Optimize prefix length for token efficiency"""
# Estimate tokens (rough approximation: 1 token ≈ 4 characters)
estimated_tokens = len(prefix) // 4
if estimated_tokens > max_prefix_tokens:
# Truncate prefix while maintaining structure
truncated = prefix[:max_prefix_tokens * 4]
# Try to end at a natural break point
for break_point in ['\n', '. ', ', ', ' ']:
last_break = truncated.rfind(break_point)
if last_break > len(truncated) * 0.8: # Keep at least 80%
return truncated[:last_break + len(break_point)]
return truncated
return prefix
Caching Strategies
python
from functools import lru_cache
import hashlib
class PrefixCompletionCache:
"""Cache prefix completion results"""
def __init__(self, client, cache_size: int = 100):
self.client = client
self.cache = {}
self.cache_size = cache_size
def _get_cache_key(self, prompt: str, prefix: str) -> str:
"""Generate cache key"""
content = f"{prompt}|{prefix}"
return hashlib.md5(content.encode()).hexdigest()
def get_completion(self, prompt: str, prefix: str, **kwargs):
"""Get completion with caching"""
cache_key = self._get_cache_key(prompt, prefix)
if cache_key in self.cache:
return self.cache[cache_key]
# Generate new completion
response = self.client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "user", "content": prompt},
{"role": "assistant", "content": prefix}
],
**kwargs
)
result = prefix + response.choices[0].message.content
# Cache result
if len(self.cache) >= self.cache_size:
# Remove oldest entry
oldest_key = next(iter(self.cache))
del self.cache[oldest_key]
self.cache[cache_key] = result
return result