Skip to content

Chat Prefix Completion Guide

Learn how to use prefix completion to guide AI responses and create more controlled, structured outputs with DeepSeek's chat models.

Overview

Prefix completion allows you to:

  • Control response format: Guide the AI to start responses in a specific way
  • Ensure consistency: Maintain consistent output structures
  • Improve accuracy: Reduce hallucination by providing context
  • Create templates: Build reusable response patterns

What is Prefix Completion?

Prefix completion involves providing the beginning of the AI's response as part of your prompt, allowing you to:

  1. Set the tone: Start responses with specific phrases or styles
  2. Structure output: Begin with formatting like JSON, XML, or markdown
  3. Guide reasoning: Start with "Let me think step by step..."
  4. Ensure format: Begin with specific data structures

Basic Usage

Simple Prefix Example

python
from openai import OpenAI

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.deepseek.com/v1"
)

response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {
            "role": "user",
            "content": "Explain quantum computing."
        },
        {
            "role": "assistant",
            "content": "Quantum computing is a revolutionary technology that"
        }
    ],
    max_tokens=200
)

print(response.choices[0].message.content)

JSON Format Prefix

python
response = client.chat.completions.create(
    model="deepseek-chat",
    messages=[
        {
            "role": "user",
            "content": "Analyze the sentiment of this text: 'I love this product!'"
        },
        {
            "role": "assistant",
            "content": "{\n  \"sentiment\": \""
        }
    ],
    max_tokens=100
)

# Output will continue the JSON structure
print(response.choices[0].message.content)
# Expected: "positive",
#   "confidence": 0.95,
#   "explanation": "The text expresses strong positive emotion..."
# }

Advanced Techniques

Structured Data Generation

Creating Consistent JSON Responses

python
def generate_structured_analysis(text: str):
    """Generate structured sentiment analysis"""
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "system",
                "content": "You are a sentiment analysis expert. Always respond in valid JSON format."
            },
            {
                "role": "user",
                "content": f"Analyze the sentiment of this text: '{text}'"
            },
            {
                "role": "assistant",
                "content": "{\n  \"text\": \"" + text + "\",\n  \"sentiment\": \""
            }
        ],
        max_tokens=200,
        temperature=0.3
    )
    
    return response.choices[0].message.content

# Usage
result = generate_structured_analysis("The weather is absolutely beautiful today!")
print(result)

XML Format Responses

python
def generate_xml_summary(article: str):
    """Generate article summary in XML format"""
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": f"Summarize this article: {article}"
            },
            {
                "role": "assistant",
                "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<summary>\n  <title>"
            }
        ],
        max_tokens=300
    )
    
    return response.choices[0].message.content

Step-by-Step Reasoning

python
def guided_problem_solving(problem: str):
    """Guide the AI through step-by-step problem solving"""
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": f"Solve this problem: {problem}"
            },
            {
                "role": "assistant",
                "content": "Let me solve this step by step:\n\nStep 1:"
            }
        ],
        max_tokens=500
    )
    
    return response.choices[0].message.content

# Example usage
problem = "If a train travels 120 km in 2 hours, what is its average speed?"
solution = guided_problem_solving(problem)
print(solution)

Code Generation with Prefixes

python
def generate_function_with_prefix(description: str, language: str):
    """Generate code with specific formatting"""
    
    if language.lower() == "python":
        prefix = "def "
    elif language.lower() == "javascript":
        prefix = "function "
    elif language.lower() == "java":
        prefix = "public static "
    else:
        prefix = ""
    
    response = client.chat.completions.create(
        model="deepseek-coder",
        messages=[
            {
                "role": "user",
                "content": f"Write a {language} function that {description}"
            },
            {
                "role": "assistant",
                "content": prefix
            }
        ],
        max_tokens=300
    )
    
    return prefix + response.choices[0].message.content

# Usage
code = generate_function_with_prefix(
    "calculates the factorial of a number", 
    "python"
)
print(code)

Use Cases and Patterns

1. Data Extraction

python
def extract_contact_info(text: str):
    """Extract contact information in structured format"""
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": f"Extract contact information from this text: {text}"
            },
            {
                "role": "assistant",
                "content": "Contact Information:\n- Name: "
            }
        ],
        max_tokens=200
    )
    
    return response.choices[0].message.content

# Example
text = "John Smith can be reached at john.smith@email.com or call him at (555) 123-4567"
contacts = extract_contact_info(text)
print(contacts)

2. Report Generation

python
def generate_report_section(data: dict, section: str):
    """Generate specific report sections with consistent formatting"""
    
    prefixes = {
        "executive_summary": "## Executive Summary\n\nThis report presents ",
        "methodology": "## Methodology\n\nThe analysis was conducted using ",
        "findings": "## Key Findings\n\n1. ",
        "recommendations": "## Recommendations\n\nBased on our analysis, we recommend "
    }
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": f"Generate a {section} section for a report based on this data: {data}"
            },
            {
                "role": "assistant",
                "content": prefixes.get(section, "")
            }
        ],
        max_tokens=400
    )
    
    return response.choices[0].message.content

3. API Response Formatting

python
def format_api_response(query: str, response_type: str):
    """Format responses for different API endpoints"""
    
    prefixes = {
        "success": '{\n  "status": "success",\n  "data": ',
        "error": '{\n  "status": "error",\n  "error_code": ',
        "paginated": '{\n  "status": "success",\n  "page": 1,\n  "total_pages": ',
        "list": '{\n  "status": "success",\n  "items": ['
    }
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": f"Create an API response for: {query}"
            },
            {
                "role": "assistant",
                "content": prefixes.get(response_type, "")
            }
        ],
        max_tokens=300
    )
    
    return response.choices[0].message.content

4. Educational Content

python
def create_lesson_plan(topic: str, grade_level: str):
    """Create structured lesson plans"""
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": f"Create a lesson plan about {topic} for {grade_level} students"
            },
            {
                "role": "assistant",
                "content": f"# Lesson Plan: {topic}\n\n**Grade Level:** {grade_level}\n\n## Learning Objectives\nBy the end of this lesson, students will be able to:\n1. "
            }
        ],
        max_tokens=500
    )
    
    return response.choices[0].message.content

Best Practices

1. Choose Appropriate Prefixes

python
# Good prefixes - specific and helpful
good_prefixes = [
    "Based on the data provided, ",
    "{\n  \"result\": ",
    "Step 1: Identify the problem\n",
    "## Summary\n\nThe main points are:\n1. "
]

# Avoid vague prefixes
avoid_prefixes = [
    "Well, ",
    "I think ",
    "Maybe ",
    "It seems like "
]

2. Maintain Consistency

python
class ResponseFormatter:
    """Consistent response formatting using prefixes"""
    
    def __init__(self, client):
        self.client = client
        self.prefixes = {
            "analysis": "## Analysis Results\n\n**Key Findings:**\n- ",
            "summary": "## Summary\n\nThis document covers ",
            "recommendation": "## Recommendation\n\nBased on the analysis, I recommend ",
            "explanation": "## Explanation\n\nTo understand this concept, let's break it down:\n\n1. "
        }
    
    def format_response(self, content: str, format_type: str):
        prefix = self.prefixes.get(format_type, "")
        
        response = self.client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {
                    "role": "user",
                    "content": content
                },
                {
                    "role": "assistant",
                    "content": prefix
                }
            ],
            max_tokens=400
        )
        
        return prefix + response.choices[0].message.content

3. Handle Edge Cases

python
def safe_prefix_completion(prompt: str, prefix: str, max_retries: int = 3):
    """Safely handle prefix completion with retries"""
    
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {
                        "role": "user",
                        "content": prompt
                    },
                    {
                        "role": "assistant",
                        "content": prefix
                    }
                ],
                max_tokens=300,
                temperature=0.3
            )
            
            result = prefix + response.choices[0].message.content
            
            # Validate the result
            if len(result.strip()) > len(prefix.strip()):
                return result
            else:
                print(f"Attempt {attempt + 1}: Response too short, retrying...")
                
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            if attempt == max_retries - 1:
                raise
    
    return None

4. Validate Output Format

python
import json
import re

def validate_json_response(response: str) -> bool:
    """Validate that response is valid JSON"""
    try:
        json.loads(response)
        return True
    except json.JSONDecodeError:
        return False

def validate_markdown_structure(response: str) -> bool:
    """Validate markdown structure"""
    # Check for headers
    if not re.search(r'^#+\s', response, re.MULTILINE):
        return False
    return True

def validate_xml_structure(response: str) -> bool:
    """Validate XML structure"""
    try:
        import xml.etree.ElementTree as ET
        ET.fromstring(response)
        return True
    except ET.ParseError:
        return False

Advanced Patterns

1. Multi-Step Prefixes

python
def multi_step_analysis(data: str):
    """Perform analysis with multiple guided steps"""
    
    steps = [
        "## Data Analysis Report\n\n### Step 1: Data Overview\n",
        "### Step 2: Pattern Identification\n",
        "### Step 3: Statistical Analysis\n",
        "### Step 4: Conclusions and Recommendations\n"
    ]
    
    full_response = ""
    conversation = [
        {
            "role": "user",
            "content": f"Analyze this data comprehensively: {data}"
        }
    ]
    
    for i, step_prefix in enumerate(steps):
        conversation.append({
            "role": "assistant",
            "content": step_prefix
        })
        
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=conversation,
            max_tokens=200
        )
        
        step_content = step_prefix + response.choices[0].message.content
        full_response += step_content + "\n\n"
        
        # Update conversation with the complete step
        conversation[-1]["content"] = step_content
    
    return full_response

2. Conditional Prefixes

python
def conditional_response_format(query: str, user_type: str):
    """Use different prefixes based on user type"""
    
    prefixes = {
        "technical": "## Technical Analysis\n\n**Implementation Details:**\n",
        "business": "## Business Impact Summary\n\n**Key Points:**\n- ",
        "beginner": "## Simple Explanation\n\nLet me explain this in simple terms:\n",
        "expert": "## Advanced Analysis\n\n**Technical Specifications:**\n"
    }
    
    prefix = prefixes.get(user_type, "")
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": query
            },
            {
                "role": "assistant",
                "content": prefix
            }
        ],
        max_tokens=400
    )
    
    return prefix + response.choices[0].message.content

3. Template-Based Generation

python
class TemplateGenerator:
    """Generate content using predefined templates with prefixes"""
    
    def __init__(self, client):
        self.client = client
        self.templates = {
            "email": {
                "formal": "Subject: {subject}\n\nDear {recipient},\n\nI hope this email finds you well. ",
                "casual": "Hi {recipient}!\n\nHope you're doing great! ",
                "follow_up": "Subject: Follow-up on {subject}\n\nHi {recipient},\n\nI wanted to follow up on "
            },
            "report": {
                "executive": "# Executive Report: {title}\n\n## Executive Summary\n\n",
                "technical": "# Technical Report: {title}\n\n## Overview\n\nThis technical analysis covers ",
                "progress": "# Progress Report: {title}\n\n## Current Status\n\nAs of {date}, the project status is "
            }
        }
    
    def generate_content(self, template_type: str, template_style: str, 
                        content_request: str, **kwargs):
        """Generate content using templates"""
        
        template = self.templates.get(template_type, {}).get(template_style, "")
        prefix = template.format(**kwargs)
        
        response = self.client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {
                    "role": "user",
                    "content": content_request
                },
                {
                    "role": "assistant",
                    "content": prefix
                }
            ],
            max_tokens=500
        )
        
        return prefix + response.choices[0].message.content

# Usage
generator = TemplateGenerator(client)
email = generator.generate_content(
    "email", "formal",
    "Write an email about the quarterly meeting",
    subject="Q4 Planning Meeting",
    recipient="Team"
)

Troubleshooting

Common Issues

  1. Incomplete responses: Increase max_tokens
  2. Format inconsistency: Use more specific prefixes
  3. Repetitive content: Adjust temperature settings
  4. Invalid JSON/XML: Add validation and retry logic

Debugging Tips

python
def debug_prefix_completion(prompt: str, prefix: str):
    """Debug prefix completion issues"""
    
    print(f"Prompt: {prompt}")
    print(f"Prefix: {repr(prefix)}")
    
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {
                "role": "user",
                "content": prompt
            },
            {
                "role": "assistant",
                "content": prefix
            }
        ],
        max_tokens=200,
        temperature=0.3
    )
    
    result = prefix + response.choices[0].message.content
    print(f"Full response: {repr(result)}")
    print(f"Response length: {len(result)}")
    
    return result

Performance Considerations

Token Efficiency

python
def optimize_prefix_tokens(prefix: str, max_prefix_tokens: int = 50):
    """Optimize prefix length for token efficiency"""
    
    # Estimate tokens (rough approximation: 1 token ≈ 4 characters)
    estimated_tokens = len(prefix) // 4
    
    if estimated_tokens > max_prefix_tokens:
        # Truncate prefix while maintaining structure
        truncated = prefix[:max_prefix_tokens * 4]
        
        # Try to end at a natural break point
        for break_point in ['\n', '. ', ', ', ' ']:
            last_break = truncated.rfind(break_point)
            if last_break > len(truncated) * 0.8:  # Keep at least 80%
                return truncated[:last_break + len(break_point)]
        
        return truncated
    
    return prefix

Caching Strategies

python
from functools import lru_cache
import hashlib

class PrefixCompletionCache:
    """Cache prefix completion results"""
    
    def __init__(self, client, cache_size: int = 100):
        self.client = client
        self.cache = {}
        self.cache_size = cache_size
    
    def _get_cache_key(self, prompt: str, prefix: str) -> str:
        """Generate cache key"""
        content = f"{prompt}|{prefix}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def get_completion(self, prompt: str, prefix: str, **kwargs):
        """Get completion with caching"""
        cache_key = self._get_cache_key(prompt, prefix)
        
        if cache_key in self.cache:
            return self.cache[cache_key]
        
        # Generate new completion
        response = self.client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "user", "content": prompt},
                {"role": "assistant", "content": prefix}
            ],
            **kwargs
        )
        
        result = prefix + response.choices[0].message.content
        
        # Cache result
        if len(self.cache) >= self.cache_size:
            # Remove oldest entry
            oldest_key = next(iter(self.cache))
            del self.cache[oldest_key]
        
        self.cache[cache_key] = result
        return result

Next Steps

基于 DeepSeek AI 大模型技术