Compare commits
6 Commits
a3ced1f246
...
rust-v0.1.
| Author | SHA1 | Date | |
|---|---|---|---|
| 66e0649b01 | |||
| 84bc98a66b | |||
| 3bc152029e | |||
| ffbd2e38ec | |||
| 0841ba05a8 | |||
| 44dc7a3bed |
363
llmx-rs/Cargo.lock
generated
363
llmx-rs/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -43,7 +43,7 @@ members = [
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.1.7"
|
version = "0.1.9"
|
||||||
# Track the edition for all workspace crates in one place. Individual
|
# Track the edition for all workspace crates in one place. Individual
|
||||||
# crates can still override this value, but keeping it here means new
|
# crates can still override this value, but keeping it here means new
|
||||||
# crates created with `cargo new -w ...` automatically inherit the 2024
|
# crates created with `cargo new -w ...` automatically inherit the 2024
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ use thiserror::Error;
|
|||||||
const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
|
const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
|
||||||
const END_PATCH_MARKER: &str = "*** End Patch";
|
const END_PATCH_MARKER: &str = "*** End Patch";
|
||||||
const ADD_FILE_MARKER: &str = "*** Add File: ";
|
const ADD_FILE_MARKER: &str = "*** Add File: ";
|
||||||
|
const CREATE_FILE_MARKER: &str = "*** Create File: "; // Alias for Add File
|
||||||
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
|
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
|
||||||
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
|
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
|
||||||
const MOVE_TO_MARKER: &str = "*** Move to: ";
|
const MOVE_TO_MARKER: &str = "*** Move to: ";
|
||||||
@@ -245,8 +246,8 @@ fn check_start_and_end_lines_strict(
|
|||||||
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
|
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
|
||||||
// Be tolerant of case mismatches and extra padding around marker strings.
|
// Be tolerant of case mismatches and extra padding around marker strings.
|
||||||
let first_line = lines[0].trim();
|
let first_line = lines[0].trim();
|
||||||
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
|
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER).or_else(|| first_line.strip_prefix(CREATE_FILE_MARKER)) {
|
||||||
// Add File
|
// Add File (also accepts Create File as alias)
|
||||||
let mut contents = String::new();
|
let mut contents = String::new();
|
||||||
let mut parsed_lines = 1;
|
let mut parsed_lines = 1;
|
||||||
for add_line in &lines[1..] {
|
for add_line in &lines[1..] {
|
||||||
@@ -331,7 +332,7 @@ fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), P
|
|||||||
|
|
||||||
Err(InvalidHunkError {
|
Err(InvalidHunkError {
|
||||||
message: format!(
|
message: format!(
|
||||||
"'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
|
"'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Create File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
|
||||||
),
|
),
|
||||||
line_number,
|
line_number,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ pub(crate) async fn stream_chat_completions(
|
|||||||
provider: &ModelProviderInfo,
|
provider: &ModelProviderInfo,
|
||||||
otel_event_manager: &OtelEventManager,
|
otel_event_manager: &OtelEventManager,
|
||||||
session_source: &SessionSource,
|
session_source: &SessionSource,
|
||||||
|
model_max_output_tokens: Option<i64>,
|
||||||
) -> Result<ResponseStream> {
|
) -> Result<ResponseStream> {
|
||||||
if prompt.output_schema.is_some() {
|
if prompt.output_schema.is_some() {
|
||||||
return Err(LlmxErr::UnsupportedOperation(
|
return Err(LlmxErr::UnsupportedOperation(
|
||||||
@@ -443,8 +444,10 @@ pub(crate) async fn stream_chat_completions(
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Add max_tokens - required by Anthropic Messages API
|
// Add max_tokens - required by Anthropic Messages API
|
||||||
// Use provider config value or default to 8192
|
// Priority: config model_max_output_tokens > provider max_tokens > default 20480
|
||||||
let max_tokens = provider.max_tokens.unwrap_or(8192);
|
let max_tokens = model_max_output_tokens
|
||||||
|
.or(provider.max_tokens)
|
||||||
|
.unwrap_or(20480);
|
||||||
if let Some(obj) = payload.as_object_mut() {
|
if let Some(obj) = payload.as_object_mut() {
|
||||||
obj.insert("max_tokens".to_string(), json!(max_tokens));
|
obj.insert("max_tokens".to_string(), json!(max_tokens));
|
||||||
}
|
}
|
||||||
@@ -610,7 +613,9 @@ async fn process_chat_sse<S>(
|
|||||||
) where
|
) where
|
||||||
S: Stream<Item = Result<Bytes>> + Unpin,
|
S: Stream<Item = Result<Bytes>> + Unpin,
|
||||||
{
|
{
|
||||||
|
debug!("process_chat_sse started, idle_timeout={:?}", idle_timeout);
|
||||||
let mut stream = stream.eventsource();
|
let mut stream = stream.eventsource();
|
||||||
|
debug!("SSE stream initialized, waiting for first event");
|
||||||
|
|
||||||
// State to accumulate a function call across streaming chunks.
|
// State to accumulate a function call across streaming chunks.
|
||||||
// OpenAI may split the `arguments` string over multiple `delta` events
|
// OpenAI may split the `arguments` string over multiple `delta` events
|
||||||
@@ -645,7 +650,14 @@ async fn process_chat_sse<S>(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
// Stream closed gracefully – emit Completed with dummy id.
|
// Stream closed gracefully – emit any pending items first, then Completed
|
||||||
|
debug!("Stream closed gracefully (Ok(None)), emitting pending items");
|
||||||
|
if let Some(item) = assistant_item.take() {
|
||||||
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
|
}
|
||||||
|
if let Some(item) = reasoning_item.take() {
|
||||||
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
|
}
|
||||||
let _ = tx_event
|
let _ = tx_event
|
||||||
.send(Ok(ResponseEvent::Completed {
|
.send(Ok(ResponseEvent::Completed {
|
||||||
response_id: String::new(),
|
response_id: String::new(),
|
||||||
@@ -860,27 +872,46 @@ async fn process_chat_sse<S>(
|
|||||||
|
|
||||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
}
|
}
|
||||||
"stop" => {
|
"stop" | "length" => {
|
||||||
// Regular turn without tool-call. Emit the final assistant message
|
// Regular turn without tool-call, or hit max_tokens limit.
|
||||||
// as a single OutputItemDone so non-delta consumers see the result.
|
debug!("Processing finish_reason={}, assistant_item.is_some()={}, reasoning_item.is_some()={}",
|
||||||
|
finish_reason, assistant_item.is_some(), reasoning_item.is_some());
|
||||||
|
// Emit the final assistant message as a single OutputItemDone so non-delta consumers see the result.
|
||||||
|
if let Some(item) = assistant_item.take() {
|
||||||
|
debug!("Emitting assistant_item: {:?}", item);
|
||||||
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
|
} else {
|
||||||
|
debug!("No assistant_item to emit");
|
||||||
|
}
|
||||||
|
// Also emit a terminal Reasoning item so UIs can finalize raw reasoning.
|
||||||
|
if let Some(item) = reasoning_item.take() {
|
||||||
|
debug!("Emitting reasoning_item");
|
||||||
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
|
} else {
|
||||||
|
debug!("No reasoning_item to emit");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// Unknown finish_reason - still emit pending items to avoid hanging
|
||||||
|
debug!("Unknown finish_reason: {}, emitting pending items", finish_reason);
|
||||||
if let Some(item) = assistant_item.take() {
|
if let Some(item) = assistant_item.take() {
|
||||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
}
|
}
|
||||||
// Also emit a terminal Reasoning item so UIs can finalize raw reasoning.
|
|
||||||
if let Some(item) = reasoning_item.take() {
|
if let Some(item) = reasoning_item.take() {
|
||||||
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit Completed regardless of reason so the agent can advance.
|
// Emit Completed regardless of reason so the agent can advance.
|
||||||
|
debug!("Sending Completed event after finish_reason={}", finish_reason);
|
||||||
let _ = tx_event
|
let _ = tx_event
|
||||||
.send(Ok(ResponseEvent::Completed {
|
.send(Ok(ResponseEvent::Completed {
|
||||||
response_id: String::new(),
|
response_id: String::new(),
|
||||||
token_usage: token_usage.clone(),
|
token_usage: token_usage.clone(),
|
||||||
}))
|
}))
|
||||||
.await;
|
.await;
|
||||||
|
debug!("Completed event sent, returning from SSE processor");
|
||||||
|
|
||||||
// Prepare for potential next turn (should not happen in same stream).
|
// Prepare for potential next turn (should not happen in same stream).
|
||||||
// fn_call_state = FunctionCallState::default();
|
// fn_call_state = FunctionCallState::default();
|
||||||
|
|||||||
@@ -152,6 +152,7 @@ impl ModelClient {
|
|||||||
&self.provider,
|
&self.provider,
|
||||||
&self.otel_event_manager,
|
&self.otel_event_manager,
|
||||||
&self.session_source,
|
&self.session_source,
|
||||||
|
self.config.model_max_output_tokens,
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
|||||||
@@ -973,6 +973,8 @@ impl Config {
|
|||||||
|
|
||||||
let mut model_providers = built_in_model_providers();
|
let mut model_providers = built_in_model_providers();
|
||||||
// Merge user-defined providers into the built-in list.
|
// Merge user-defined providers into the built-in list.
|
||||||
|
// Note: This uses or_insert() so built-in providers take precedence.
|
||||||
|
// For custom max_tokens, use model_max_output_tokens config instead.
|
||||||
for (key, provider) in cfg.model_providers.into_iter() {
|
for (key, provider) in cfg.model_providers.into_iter() {
|
||||||
model_providers.entry(key).or_insert(provider);
|
model_providers.entry(key).or_insert(provider);
|
||||||
}
|
}
|
||||||
|
|||||||
198
test_system_message.json
Normal file
198
test_system_message.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user