diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt index 238d6e882a..b0f3d96f87 100644 --- a/extensions/inference-cortex-extension/bin/version.txt +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -1 +1 @@ -1.0.7 +1.0.8 diff --git a/extensions/inference-cortex-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json index 1e5ec8db68..6a0dcd4a0b 100644 --- a/extensions/inference-cortex-extension/resources/default_settings.json +++ b/extensions/inference-cortex-extension/resources/default_settings.json @@ -18,6 +18,16 @@ "placeholder": "4" } }, + { + "key": "cpu_threads", + "title": "CPU Threads", + "description": "The number of CPU threads to use (when in CPU mode)", + "controllerType": "input", + "controllerProps": { + "value": "", + "placeholder": "Number of CPU threads" + } + }, { "key": "flash_attn", "title": "Flash Attention enabled", diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 537b3bc622..03c094ec13 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -42,6 +42,7 @@ export enum Settings { flash_attn = 'flash_attn', cache_type = 'cache_type', use_mmap = 'use_mmap', + cpu_threads = 'cpu_threads', } /** @@ -65,6 +66,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { flash_attn: boolean = true use_mmap: boolean = true cache_type: string = 'f16' + cpu_threads?: number /** * The URL for making inference requests. @@ -104,6 +106,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.flash_attn = await this.getSetting(Settings.flash_attn, true) this.use_mmap = await this.getSetting(Settings.use_mmap, true) this.cache_type = await this.getSetting(Settings.cache_type, 'f16') + const threads_number = Number( + await this.getSetting(Settings.cpu_threads, '') + ) + if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number this.queue.add(() => this.clean()) @@ -139,6 +145,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { this.cache_type = value as string } else if (key === Settings.use_mmap && typeof value === 'boolean') { this.use_mmap = value as boolean + } else if (key === Settings.cpu_threads && typeof value === 'string') { + const threads_number = Number(value) + if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number } } @@ -196,6 +205,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { flash_attn: this.flash_attn, cache_type: this.cache_type, use_mmap: this.use_mmap, + ...(this.cpu_threads ? { cpu_threads: this.cpu_threads } : {}), }, timeout: false, signal, diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index b573843448..2e12543626 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -98,7 +98,7 @@ export const useCreateNewThread = () => { // Use ctx length by default const overriddenParameters = { max_tokens: !isLocalEngine(defaultModel?.engine) - ? (defaultModel?.parameters.token_limit ?? 8192) + ? (defaultModel?.parameters.max_tokens ?? 8192) : defaultContextLength, } diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx index 52aafba834..3dbb56a86f 100644 --- a/web/screens/Settings/Advanced/index.tsx +++ b/web/screens/Settings/Advanced/index.tsx @@ -417,7 +417,7 @@ const Advanced = () => { )} {/* Vulkan for AMD GPU/ APU and Intel Arc GPU */} - {!isMac && gpuList.length > 0 && experimentalEnabled && ( + {!isMac && experimentalEnabled && (
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx index 3257a194fd..dab50aba78 100644 --- a/web/screens/Thread/ThreadRightPanel/index.tsx +++ b/web/screens/Thread/ThreadRightPanel/index.tsx @@ -257,7 +257,7 @@ const ThreadRightPanel = () => { id="assistant-instructions" placeholder="Eg. You are a helpful assistant." value={activeAssistant?.instructions ?? ''} - // autoResize + autoResize onChange={onAssistantInstructionChanged} />