本文介绍 LLM 上下文窗口超出错误的处理策略,包括文本摘要压缩、滑动窗口、分块处理等方法,帮助开发者处理长文本场景。
LLM 有固定的上下文窗口限制(如 GPT-4 128K tokens),超出时会报错或截断。本文介绍处理长文本的策略。
import openai
try:
response = openai.ChatCompletion.create(
model="gpt-4",
messages=long_messages # 超出上下文限制
)
except openai.error.InvalidRequestError as e:
if "maximum context" in str(e).lower():
print("超出上下文窗口限制")
from langchain.text_splitter import RecursiveCharacterTextSplitter
def summarize_long_text(text: str, max_length: int = 4000) -> str:
"""将长文本摘要到指定长度"""
if len(text) <= max_length:
return text
summary_prompt = f"""将以下文本摘要到 {max_length} 字符以内,保留关键信息:
{text[:10000]}"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": summary_prompt}]
)
return response.choices[0].message.content
def sliding_window_search(
query: str,
document: str,
window_size: int = 2000,
step: int = 500
) -> list[str]:
"""滑动窗口检索相关片段"""
chunks = []
for i in range(0, len(document), step):
chunk = document[i:i + window_size]
# 检查片段是否相关
if is_relevant(query, chunk):
chunks.append(chunk)
if len(chunks) >= 3: # 最多取3个片段
break
return chunks
def is_relevant(query: str, chunk: str) -> bool:
"""简单相关性判断"""
query_words = set(query.lower().split())
chunk_words = set(chunk.lower().split())
overlap = query_words & chunk_words
return len(overlap) >= 2
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000,
chunk_overlap=200, # 重叠区域保持上下文
separators=["\n\n", "\n", "。", " "]
)
def process_long_document(
document: str,
query: str,
llm
) -> str:
# 1. 分块
chunks = text_splitter.split_text(document)
# 2. 检索相关块
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
relevant_chunks = []
for i, chunk in enumerate(chunks):
similarity = embeddings.embed_query(query)
chunk_emb = embeddings.embed_query(chunk)
# 简单相似度判断
if cosine_similarity(similarity, chunk_emb) > 0.5:
relevant_chunks.append((i, chunk))
# 3. 按顺序组合
relevant_chunks.sort(key=lambda x: x[0])
combined = "\n".join([c[1] for c in relevant_chunks[:3]])
# 4. 生成答案
response = llm.invoke(f"基于以下内容回答:\n{combined}\n\n问题:{query}")
return response
Auto-repair applied, but unresolved findings remain.
策略说明准确
代码示例验证通过