+ * Also used as the wire model when {@link #getWireModel()} is not set. Falls + * back to {@link SessionConfig#getModel()}. + * + * @return the model ID, or {@code null} if not set + */ + public String getModelId() { + return modelId; + } + + /** + * Sets the well-known model name used by the runtime to look up agent + * configuration (tools, prompts, reasoning behavior) and default token limits. + *
+ * Also used as the wire model when {@link #setWireModel(String)} is not set. + * Falls back to {@link SessionConfig#getModel()}. + * + * @param modelId + * the model ID + * @return this config for method chaining + */ + public ProviderConfig setModelId(String modelId) { + this.modelId = modelId; + return this; + } + + /** + * Gets the model name sent to the provider API for inference. + *
+ * Use this when the provider's model name (e.g. an Azure deployment name or a + * custom fine-tune name) differs from {@link #getModelId()}. Falls back to + * {@link #getModelId()}, then {@link SessionConfig#getModel()}. + * + * @return the wire model name, or {@code null} if not set + */ + public String getWireModel() { + return wireModel; + } + + /** + * Sets the model name sent to the provider API for inference. + *
+ * Use this when the provider's model name (e.g. an Azure deployment name or a + * custom fine-tune name) differs from {@link #getModelId()}. Falls back to + * {@link #getModelId()}, then {@link SessionConfig#getModel()}. + * + * @param wireModel + * the wire model name + * @return this config for method chaining + */ + public ProviderConfig setWireModel(String wireModel) { + this.wireModel = wireModel; + return this; + } + + /** + * Gets the override for the resolved model's default max prompt tokens. + *
+ * The runtime triggers conversation compaction before sending a request when + * the prompt (system message, history, tool definitions, user message) would + * exceed this limit. + * + * @return the max input tokens, or {@code null} if not set + */ + public Integer getMaxInputTokens() { + return maxInputTokens; + } + + /** + * Sets the override for the resolved model's default max prompt tokens. + *
+ * The runtime triggers conversation compaction before sending a request when + * the prompt (system message, history, tool definitions, user message) would + * exceed this limit. + * + * @param maxInputTokens + * the max input tokens + * @return this config for method chaining + */ + public ProviderConfig setMaxInputTokens(Integer maxInputTokens) { + this.maxInputTokens = maxInputTokens; + return this; + } + + /** + * Gets the override for the resolved model's default max output tokens. + *
+ * When hit, the model stops generating and returns a truncated response. + * + * @return the max output tokens, or {@code null} if not set + */ + public Integer getMaxOutputTokens() { + return maxOutputTokens; + } + + /** + * Sets the override for the resolved model's default max output tokens. + *
+ * When hit, the model stops generating and returns a truncated response.
+ *
+ * @param maxOutputTokens
+ * the max output tokens
+ * @return this config for method chaining
+ */
+ public ProviderConfig setMaxOutputTokens(Integer maxOutputTokens) {
+ this.maxOutputTokens = maxOutputTokens;
+ return this;
+ }
}
diff --git a/src/site/markdown/advanced.md b/src/site/markdown/advanced.md
index e1e08a275..cf11d5d1f 100644
--- a/src/site/markdown/advanced.md
+++ b/src/site/markdown/advanced.md
@@ -421,17 +421,36 @@ foundry service status
When using BYOK, be aware of these limitations:
-#### Identity Limitations
+#### Model and Token Limit Overrides
-BYOK authentication uses **static credentials only**. The following identity providers are NOT supported:
+You can override the model name and token limits used by the provider:
-- ❌ **Microsoft Entra ID (Azure AD)** - No support for Entra managed identities or service principals
-- ❌ **Third-party identity providers** - No OIDC, SAML, or other federated identity
-- ❌ **Managed identities** - Azure Managed Identity is not supported
+```java
+var session = client.createSession(
+ new SessionConfig().setOnPermissionRequest(PermissionHandler.APPROVE_ALL)
+ .setProvider(new ProviderConfig()
+ .setType("openai")
+ .setBaseUrl("https://api.openai.com/v1")
+ .setApiKey("sk-...")
+ .setModelId("gpt-4o") // Runtime model for config lookup
+ .setWireModel("my-finetune-v3") // Actual model name sent to provider API
+ .setMaxInputTokens(100_000) // Override max prompt tokens
+ .setMaxOutputTokens(4096)) // Override max output tokens
+).get();
+```
-You must use an API key or static bearer token that you manage yourself.
+| Property | Description |
+|---|---|
+| `modelId` | Well-known model name for runtime config lookup (tools, prompts, reasoning). Also used as wire model when `wireModel` is not set. Falls back to `SessionConfig.model`. |
+| `wireModel` | Model name sent to the provider API. Use when the provider's model name (e.g. Azure deployment name or fine-tune) differs from `modelId`. Falls back to `modelId`, then `SessionConfig.model`. |
+| `maxInputTokens` | Override max prompt tokens. The runtime compacts conversation before exceeding this limit. |
+| `maxOutputTokens` | Override max output tokens. The model stops generating when this limit is hit. |
-**Why not Entra ID?** While Entra ID does issue bearer tokens, these tokens are short-lived (typically 1 hour) and require automatic refresh via the Azure Identity SDK. The `bearerToken` option only accepts a static string—there is no callback mechanism for the SDK to request fresh tokens. For long-running workloads requiring Entra authentication, you would need to implement your own token refresh logic and create new sessions with updated tokens.
+#### Identity Limitations
+
+BYOK authentication uses **static credentials only**.
+
+You must use an API key or static bearer token that you manage yourself.
---
diff --git a/src/test/java/com/github/copilot/sdk/ProviderConfigTest.java b/src/test/java/com/github/copilot/sdk/ProviderConfigTest.java
index d3e18010b..b59af09f3 100644
--- a/src/test/java/com/github/copilot/sdk/ProviderConfigTest.java
+++ b/src/test/java/com/github/copilot/sdk/ProviderConfigTest.java
@@ -46,6 +46,10 @@ void testDefaultsAreNull() {
assertNull(provider.getApiKey());
assertNull(provider.getBearerToken());
assertNull(provider.getAzure());
+ assertNull(provider.getModelId());
+ assertNull(provider.getWireModel());
+ assertNull(provider.getMaxInputTokens());
+ assertNull(provider.getMaxOutputTokens());
}
@Test
@@ -232,7 +236,8 @@ void testSerializeCustomWireApi() throws Exception {
void testSerializeAllFields() throws Exception {
var provider = new ProviderConfig().setType("azure-openai").setWireApi("completions")
.setBaseUrl("https://my-resource.openai.azure.com").setApiKey("my-api-key")
- .setBearerToken("my-bearer-token").setAzure(new AzureOptions().setApiVersion("2024-02-01"));
+ .setBearerToken("my-bearer-token").setAzure(new AzureOptions().setApiVersion("2024-02-01"))
+ .setModelId("gpt-4o").setWireModel("my-deployment").setMaxInputTokens(50_000).setMaxOutputTokens(2048);
JsonNode json = MAPPER.valueToTree(provider);
@@ -242,7 +247,11 @@ void testSerializeAllFields() throws Exception {
assertEquals("my-api-key", json.get("apiKey").asText());
assertEquals("my-bearer-token", json.get("bearerToken").asText());
assertEquals("2024-02-01", json.get("azure").get("apiVersion").asText());
- assertEquals(6, json.size(), "Expected exactly 6 JSON fields");
+ assertEquals("gpt-4o", json.get("modelId").asText());
+ assertEquals("my-deployment", json.get("wireModel").asText());
+ assertEquals(50_000, json.get("maxPromptTokens").asInt());
+ assertEquals(2048, json.get("maxOutputTokens").asInt());
+ assertEquals(10, json.size(), "Expected exactly 10 JSON fields");
}
@Test
@@ -285,6 +294,30 @@ void testRoundTripProviderConfig() throws Exception {
assertEquals(original.getAzure().getApiVersion(), deserialized.getAzure().getApiVersion());
}
+ @Test
+ void testSerializeProviderModelAndTokenOverrides() throws Exception {
+ var provider = new ProviderConfig().setType("openai").setBaseUrl("https://example.com/provider")
+ .setHeaders(java.util.Map.of("Authorization", "Bearer provider-token")).setModelId("gpt-4o")
+ .setWireModel("my-finetune-v3").setMaxInputTokens(100_000).setMaxOutputTokens(4096);
+
+ JsonNode json = MAPPER.valueToTree(provider);
+
+ assertEquals("https://example.com/provider", json.get("baseUrl").asText());
+ assertEquals("Bearer provider-token", json.get("headers").get("Authorization").asText());
+ assertEquals("gpt-4o", json.get("modelId").asText());
+ assertEquals("my-finetune-v3", json.get("wireModel").asText());
+ assertEquals(100_000, json.get("maxPromptTokens").asInt());
+ assertEquals(4096, json.get("maxOutputTokens").asInt());
+
+ ProviderConfig deserialized = MAPPER.treeToValue(json, ProviderConfig.class);
+ assertNotNull(deserialized);
+ assertEquals("https://example.com/provider", deserialized.getBaseUrl());
+ assertEquals("gpt-4o", deserialized.getModelId());
+ assertEquals("my-finetune-v3", deserialized.getWireModel());
+ assertEquals(100_000, deserialized.getMaxInputTokens());
+ assertEquals(4096, deserialized.getMaxOutputTokens());
+ }
+
@Test
void testForwardCompatibilityIgnoresUnknownFields() throws Exception {
String json = """
diff --git a/src/test/java/com/github/copilot/sdk/SessionConfigE2ETest.java b/src/test/java/com/github/copilot/sdk/SessionConfigE2ETest.java
index 5dcb36604..578f7a3a5 100644
--- a/src/test/java/com/github/copilot/sdk/SessionConfigE2ETest.java
+++ b/src/test/java/com/github/copilot/sdk/SessionConfigE2ETest.java
@@ -127,4 +127,54 @@ private static String getSystemMessage(Map