tirea_agentos/runtime/context/
mod.rs

1//! Context management: logical compression + hard truncation.
2//!
3//! Combines two concerns into a single plugin:
4//! 1. **Compaction** — LLM-based summarization of old messages and artifact
5//!    compaction via [`ContextState`].
6//! 2. **Truncation** — hard token-budget enforcement via [`truncate_to_budget`].
7//!
8//! Registers a single [`ContextTransform`](transform::ContextTransform) that
9//! first replaces pre-boundary messages with a summary and swaps large artifact
10//! content with compact views, then truncates history to fit the token budget.
11
12mod compaction;
13mod plugin;
14mod state;
15mod transform;
16
17#[cfg(test)]
18mod tests;
19
20use tirea_contract::runtime::inference::{ContextCompactionMode, ContextWindowPolicy};
21
22pub use plugin::ContextPlugin;
23// State types re-exported for sibling modules (tests, etc.) within the crate.
24pub(crate) use compaction::trim_thread_to_latest_boundary;
25#[allow(unused_imports)]
26pub(crate) use state::{ArtifactRef, CompactBoundary, ContextAction, ContextState};
27
28/// Behavior ID used for registration.
29pub const CONTEXT_PLUGIN_ID: &str = "context";
30
31pub(super) const SUMMARY_MESSAGE_OPEN: &str = "<conversation-summary>";
32pub(super) const SUMMARY_MESSAGE_CLOSE: &str = "</conversation-summary>";
33
34fn auto_compact_threshold(max_context_tokens: usize, max_output_tokens: usize) -> usize {
35    let available = max_context_tokens.saturating_sub(max_output_tokens);
36    available.saturating_mul(7) / 10
37}
38
39pub(crate) fn policy_for_model(model: &str) -> ContextWindowPolicy {
40    match model {
41        m if m.contains("claude") => ContextWindowPolicy {
42            max_context_tokens: 200_000,
43            max_output_tokens: 16_384,
44            enable_prompt_cache: true,
45            autocompact_threshold: Some(auto_compact_threshold(200_000, 16_384)),
46            compaction_mode: ContextCompactionMode::KeepRecentRawSuffix,
47            ..ContextWindowPolicy::default()
48        },
49        m if m.contains("gpt-4o") => ContextWindowPolicy {
50            max_context_tokens: 128_000,
51            max_output_tokens: 16_384,
52            enable_prompt_cache: false,
53            autocompact_threshold: Some(auto_compact_threshold(128_000, 16_384)),
54            compaction_mode: ContextCompactionMode::KeepRecentRawSuffix,
55            ..ContextWindowPolicy::default()
56        },
57        m if m.contains("gpt-4") => ContextWindowPolicy {
58            max_context_tokens: 128_000,
59            max_output_tokens: 4_096,
60            enable_prompt_cache: false,
61            autocompact_threshold: Some(auto_compact_threshold(128_000, 4_096)),
62            compaction_mode: ContextCompactionMode::KeepRecentRawSuffix,
63            ..ContextWindowPolicy::default()
64        },
65        _ => ContextWindowPolicy::default(),
66    }
67}