From cfbdcbcc50873e75f4dd6e98c2b792883ac243a4 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 6 Jan 2025 10:35:40 +0700
Subject: [PATCH] fix: populate engines, models and legacy settings (#4403)

* fix: populate engines, models and legacy settings

* chore: legacy logics update configured remote engine
---
 .../browser/extensions/enginesManagement.ts   |  17 +-
 core/src/node/helper/config.ts                | 108 +---
 core/src/node/helper/resource.ts              |   7 +-
 core/src/types/engine/index.ts                |   3 +-
 core/src/types/message/messageEntity.ts       |  18 +-
 .../engine-management-extension/engines.mjs   |  27 +
 .../models/anthropic.json}                    |  45 +-
 .../models/cohere.json}                       |  36 +-
 .../models/groq.json}                         | 217 +-------
 .../models/martian.json}                      |  17 +-
 .../models/mistral.json}                      |  45 +-
 .../models/nvidia.json}                       |  13 +-
 .../models/openai.json}                       | 102 +---
 .../models/openrouter.json}                   |  13 +-
 .../resources/anthropic.json                  |  22 +
 .../resources/cohere.json                     |  22 +
 .../resources/groq.json                       |  22 +
 .../resources/martian.json                    |  22 +
 .../resources/mistral.json                    |  22 +
 .../resources/nvidia.json                     |  22 +
 .../resources/openai.json                     |  22 +
 .../resources/openrouter.json                 |  22 +
 .../rolldown.config.mjs                       |   3 +
 .../src/@types/global.d.ts                    |   6 +
 .../engine-management-extension/src/index.ts  | 149 +++--
 .../engine-management-extension/tsconfig.json |   3 +-
 .../inference-anthropic-extension/README.md   |  79 ---
 .../jest.config.js                            |   9 -
 .../package.json                              |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../src/anthropic.test.ts                     |  77 ---
 .../src/env.d.ts                              |   2 -
 .../src/index.ts                              | 150 -----
 .../tsconfig.json                             |  15 -
 .../inference-cohere-extension/README.md      |  79 ---
 .../inference-cohere-extension/package.json   |  46 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../inference-cohere-extension/src/env.d.ts   |   2 -
 .../inference-cohere-extension/src/index.ts   | 117 ----
 .../inference-cohere-extension/tsconfig.json  |  14 -
 extensions/inference-groq-extension/README.md |  75 ---
 .../inference-groq-extension/package.json     |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../inference-groq-extension/src/env.d.ts     |   2 -
 .../inference-groq-extension/src/index.ts     |  64 ---
 .../inference-groq-extension/tsconfig.json    |  14 -
 .../inference-martian-extension/README.md     |  79 ---
 .../inference-martian-extension/package.json  |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../inference-martian-extension/src/env.d.ts  |   2 -
 .../inference-martian-extension/src/index.ts  |  63 ---
 .../inference-martian-extension/tsconfig.json |  14 -
 .../inference-mistral-extension/README.md     |  79 ---
 .../inference-mistral-extension/package.json  |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../inference-mistral-extension/src/env.d.ts  |   2 -
 .../inference-mistral-extension/src/index.ts  |  63 ---
 .../inference-mistral-extension/tsconfig.json |  14 -
 .../inference-nvidia-extension/README.md      |  79 ---
 .../inference-nvidia-extension/package.json   |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../inference-nvidia-extension/src/env.d.ts   |   2 -
 .../inference-nvidia-extension/src/index.ts   |  63 ---
 .../inference-nvidia-extension/tsconfig.json  |  14 -
 .../inference-openai-extension/README.md      |  79 ---
 .../inference-openai-extension/jest.config.js |   9 -
 .../inference-openai-extension/package.json   |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  18 -
 .../src/OpenAIExtension.test.ts               |  54 --
 .../inference-openai-extension/src/env.d.ts   |   2 -
 .../inference-openai-extension/src/index.ts   |  90 ---
 .../inference-openai-extension/tsconfig.json  |  15 -
 .../inference-openrouter-extension/README.md  |  79 ---
 .../package.json                              |  42 --
 .../resources/settings.json                   |  34 --
 .../rolldown.config.mjs                       |  18 -
 .../src/env.d.ts                              |   2 -
 .../src/index.ts                              |  85 ---
 .../tsconfig.json                             |  14 -
 .../README.md                                 |  75 ---
 .../package.json                              |  42 --
 .../resources/settings.json                   |  24 -
 .../rolldown.config.mjs                       |  16 -
 .../src/env.d.ts                              |   1 -
 .../src/index.ts                              |  66 ---
 .../tsconfig.json                             |  14 -
 extensions/model-extension/src/cortex.ts      |   1 +
 extensions/yarn.lock                          | 254 +--------
 web/containers/ErrorMessage/index.tsx         |   5 +-
 web/containers/ModelDropdown/index.tsx        | 514 ++++++++----------
 web/containers/SetupRemoteModel/index.tsx     |  14 +-
 web/hooks/useActiveModel.ts                   |   6 +-
 web/hooks/useCreateNewThread.ts               |  19 +-
 web/hooks/useEngineManagement.ts              |  21 +
 web/hooks/useSendChatMessage.ts               |   5 +-
 .../Settings/Engines/ModalAddRemoteEngine.tsx |   5 +-
 .../Settings/Engines/RemoteEngineSettings.tsx |  23 +-
 web/screens/Settings/MyModels/index.tsx       |  26 +-
 web/screens/Settings/SettingDetail/index.tsx  |  10 +-
 .../ChatBody/EmptyThread/index.tsx            |   8 +-
 web/utils/messageRequestBuilder.ts            |   1 +
 108 files changed, 760 insertions(+), 3683 deletions(-)
 create mode 100644 extensions/engine-management-extension/engines.mjs
 rename extensions/{inference-anthropic-extension/resources/models.json => engine-management-extension/models/anthropic.json} (54%)
 rename extensions/{inference-cohere-extension/resources/models.json => engine-management-extension/models/cohere.json} (62%)
 rename extensions/{inference-groq-extension/resources/models.json => engine-management-extension/models/groq.json} (53%)
 rename extensions/{inference-martian-extension/resources/models.json => engine-management-extension/models/martian.json} (63%)
 rename extensions/{inference-mistral-extension/resources/models.json => engine-management-extension/models/mistral.json} (58%)
 rename extensions/{inference-nvidia-extension/resources/models.json => engine-management-extension/models/nvidia.json} (57%)
 rename extensions/{inference-openai-extension/resources/models.json => engine-management-extension/models/openai.json} (62%)
 rename extensions/{inference-openrouter-extension/resources/models.json => engine-management-extension/models/openrouter.json} (63%)
 create mode 100644 extensions/engine-management-extension/resources/anthropic.json
 create mode 100644 extensions/engine-management-extension/resources/cohere.json
 create mode 100644 extensions/engine-management-extension/resources/groq.json
 create mode 100644 extensions/engine-management-extension/resources/martian.json
 create mode 100644 extensions/engine-management-extension/resources/mistral.json
 create mode 100644 extensions/engine-management-extension/resources/nvidia.json
 create mode 100644 extensions/engine-management-extension/resources/openai.json
 create mode 100644 extensions/engine-management-extension/resources/openrouter.json
 delete mode 100644 extensions/inference-anthropic-extension/README.md
 delete mode 100644 extensions/inference-anthropic-extension/jest.config.js
 delete mode 100644 extensions/inference-anthropic-extension/package.json
 delete mode 100644 extensions/inference-anthropic-extension/resources/settings.json
 delete mode 100644 extensions/inference-anthropic-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-anthropic-extension/src/anthropic.test.ts
 delete mode 100644 extensions/inference-anthropic-extension/src/env.d.ts
 delete mode 100644 extensions/inference-anthropic-extension/src/index.ts
 delete mode 100644 extensions/inference-anthropic-extension/tsconfig.json
 delete mode 100644 extensions/inference-cohere-extension/README.md
 delete mode 100644 extensions/inference-cohere-extension/package.json
 delete mode 100644 extensions/inference-cohere-extension/resources/settings.json
 delete mode 100644 extensions/inference-cohere-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-cohere-extension/src/env.d.ts
 delete mode 100644 extensions/inference-cohere-extension/src/index.ts
 delete mode 100644 extensions/inference-cohere-extension/tsconfig.json
 delete mode 100644 extensions/inference-groq-extension/README.md
 delete mode 100644 extensions/inference-groq-extension/package.json
 delete mode 100644 extensions/inference-groq-extension/resources/settings.json
 delete mode 100644 extensions/inference-groq-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-groq-extension/src/env.d.ts
 delete mode 100644 extensions/inference-groq-extension/src/index.ts
 delete mode 100644 extensions/inference-groq-extension/tsconfig.json
 delete mode 100644 extensions/inference-martian-extension/README.md
 delete mode 100644 extensions/inference-martian-extension/package.json
 delete mode 100644 extensions/inference-martian-extension/resources/settings.json
 delete mode 100644 extensions/inference-martian-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-martian-extension/src/env.d.ts
 delete mode 100644 extensions/inference-martian-extension/src/index.ts
 delete mode 100644 extensions/inference-martian-extension/tsconfig.json
 delete mode 100644 extensions/inference-mistral-extension/README.md
 delete mode 100644 extensions/inference-mistral-extension/package.json
 delete mode 100644 extensions/inference-mistral-extension/resources/settings.json
 delete mode 100644 extensions/inference-mistral-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-mistral-extension/src/env.d.ts
 delete mode 100644 extensions/inference-mistral-extension/src/index.ts
 delete mode 100644 extensions/inference-mistral-extension/tsconfig.json
 delete mode 100644 extensions/inference-nvidia-extension/README.md
 delete mode 100644 extensions/inference-nvidia-extension/package.json
 delete mode 100644 extensions/inference-nvidia-extension/resources/settings.json
 delete mode 100644 extensions/inference-nvidia-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-nvidia-extension/src/env.d.ts
 delete mode 100644 extensions/inference-nvidia-extension/src/index.ts
 delete mode 100644 extensions/inference-nvidia-extension/tsconfig.json
 delete mode 100644 extensions/inference-openai-extension/README.md
 delete mode 100644 extensions/inference-openai-extension/jest.config.js
 delete mode 100644 extensions/inference-openai-extension/package.json
 delete mode 100644 extensions/inference-openai-extension/resources/settings.json
 delete mode 100644 extensions/inference-openai-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-openai-extension/src/OpenAIExtension.test.ts
 delete mode 100644 extensions/inference-openai-extension/src/env.d.ts
 delete mode 100644 extensions/inference-openai-extension/src/index.ts
 delete mode 100644 extensions/inference-openai-extension/tsconfig.json
 delete mode 100644 extensions/inference-openrouter-extension/README.md
 delete mode 100644 extensions/inference-openrouter-extension/package.json
 delete mode 100644 extensions/inference-openrouter-extension/resources/settings.json
 delete mode 100644 extensions/inference-openrouter-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-openrouter-extension/src/env.d.ts
 delete mode 100644 extensions/inference-openrouter-extension/src/index.ts
 delete mode 100644 extensions/inference-openrouter-extension/tsconfig.json
 delete mode 100644 extensions/inference-triton-trtllm-extension/README.md
 delete mode 100644 extensions/inference-triton-trtllm-extension/package.json
 delete mode 100644 extensions/inference-triton-trtllm-extension/resources/settings.json
 delete mode 100644 extensions/inference-triton-trtllm-extension/rolldown.config.mjs
 delete mode 100644 extensions/inference-triton-trtllm-extension/src/env.d.ts
 delete mode 100644 extensions/inference-triton-trtllm-extension/src/index.ts
 delete mode 100644 extensions/inference-triton-trtllm-extension/tsconfig.json

diff --git a/core/src/browser/extensions/enginesManagement.ts b/core/src/browser/extensions/enginesManagement.ts
index 88120b563f..66dff87df6 100644
--- a/core/src/browser/extensions/enginesManagement.ts
+++ b/core/src/browser/extensions/enginesManagement.ts
@@ -55,7 +55,18 @@ export abstract class EngineManagementExtension extends BaseExtension {
    * @param name - Inference engine name.
    * @returns A Promise that resolves to intall of engine.
    */
-  abstract installEngine(name: string, engineConfig: EngineConfig): Promise<{ messages: string }>
+  abstract installEngine(
+    name: string,
+    engineConfig: EngineConfig
+  ): Promise<{ messages: string }>
+
+  /**
+   * Add a new remote engine
+   * @returns A Promise that resolves to intall of engine.
+   */
+  abstract addRemoteEngine(
+    engineConfig: EngineConfig
+  ): Promise<{ messages: string }>
 
   /**
    * @param name - Inference engine name.
@@ -70,7 +81,9 @@ export abstract class EngineManagementExtension extends BaseExtension {
    * @param name - Inference engine name.
    * @returns A Promise that resolves to an object of default engine.
    */
-  abstract getDefaultEngineVariant(name: InferenceEngine): Promise<DefaultEngineVariant>
+  abstract getDefaultEngineVariant(
+    name: InferenceEngine
+  ): Promise<DefaultEngineVariant>
 
   /**
    * @body variant - string
diff --git a/core/src/node/helper/config.ts b/core/src/node/helper/config.ts
index 8bf48d629c..6fb28d01f1 100644
--- a/core/src/node/helper/config.ts
+++ b/core/src/node/helper/config.ts
@@ -1,8 +1,7 @@
-import { AppConfiguration, SettingComponentProps } from '../../types'
+import { AppConfiguration } from '../../types'
 import { join, resolve } from 'path'
 import fs from 'fs'
 import os from 'os'
-import childProcess from 'child_process'
 const configurationFileName = 'settings.json'
 
 /**
@@ -19,7 +18,9 @@ export const getAppConfigurations = (): AppConfiguration => {
 
   if (!fs.existsSync(configurationFile)) {
     // create default app config if we don't have one
-    console.debug(`App config not found, creating default config at ${configurationFile}`)
+    console.debug(
+      `App config not found, creating default config at ${configurationFile}`
+    )
     fs.writeFileSync(configurationFile, JSON.stringify(appDefaultConfiguration))
     return appDefaultConfiguration
   }
@@ -30,20 +31,28 @@ export const getAppConfigurations = (): AppConfiguration => {
     )
     return appConfigurations
   } catch (err) {
-    console.error(`Failed to read app config, return default config instead! Err: ${err}`)
+    console.error(
+      `Failed to read app config, return default config instead! Err: ${err}`
+    )
     return defaultAppConfig()
   }
 }
 
 const getConfigurationFilePath = () =>
   join(
-    global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
+    global.core?.appPath() ||
+      process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
     configurationFileName
   )
 
-export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
+export const updateAppConfiguration = (
+  configuration: AppConfiguration
+): Promise<void> => {
   const configurationFile = getConfigurationFilePath()
-  console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
+  console.debug(
+    'updateAppConfiguration, configurationFile: ',
+    configurationFile
+  )
 
   fs.writeFileSync(configurationFile, JSON.stringify(configuration))
   return Promise.resolve()
@@ -69,86 +78,6 @@ export const getJanExtensionsPath = (): string => {
   return join(appConfigurations.data_folder, 'extensions')
 }
 
-/**
- * Utility function to physical cpu count
- *
- * @returns {number} The physical cpu count.
- */
-export const physicalCpuCount = async (): Promise<number> => {
-  const platform = os.platform()
-  try {
-    if (platform === 'linux') {
-      const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
-      return parseInt(output.trim(), 10)
-    } else if (platform === 'darwin') {
-      const output = await exec('sysctl -n hw.physicalcpu_max')
-      return parseInt(output.trim(), 10)
-    } else if (platform === 'win32') {
-      const output = await exec('WMIC CPU Get NumberOfCores')
-      return output
-        .split(os.EOL)
-        .map((line: string) => parseInt(line))
-        .filter((value: number) => !isNaN(value))
-        .reduce((sum: number, number: number) => sum + number, 1)
-    } else {
-      const cores = os.cpus().filter((cpu: any, index: number) => {
-        const hasHyperthreading = cpu.model.includes('Intel')
-        const isOdd = index % 2 === 1
-        return !hasHyperthreading || isOdd
-      })
-      return cores.length
-    }
-  } catch (err) {
-    console.warn('Failed to get physical CPU count', err)
-    // Divide by 2 to get rid of hyper threading
-    const coreCount = Math.ceil(os.cpus().length / 2)
-    console.debug('Using node API to get physical CPU count:', coreCount)
-    return coreCount
-  }
-}
-
-const exec = async (command: string): Promise<string> => {
-  return new Promise((resolve, reject) => {
-    childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
-      if (error) {
-        reject(error)
-      } else {
-        resolve(stdout)
-      }
-    })
-  })
-}
-
-// a hacky way to get the api key. we should comes up with a better
-// way to handle this
-export const getEngineConfiguration = async (engineId: string) => {
-  if (engineId !== 'openai' && engineId !== 'groq') return undefined
-
-  const settingDirectoryPath = join(
-    getJanDataFolderPath(),
-    'settings',
-    '@janhq',
-    engineId === 'openai' ? 'inference-openai-extension' : 'inference-groq-extension',
-    'settings.json'
-  )
-
-  const content = fs.readFileSync(settingDirectoryPath, 'utf-8')
-  const settings: SettingComponentProps[] = JSON.parse(content)
-  const apiKeyId = engineId === 'openai' ? 'openai-api-key' : 'groq-api-key'
-  const keySetting = settings.find((setting) => setting.key === apiKeyId)
-  let fullUrl = settings.find((setting) => setting.key === 'chat-completions-endpoint')
-    ?.controllerProps.value
-
-  let apiKey = keySetting?.controllerProps.value
-  if (typeof apiKey !== 'string') apiKey = ''
-  if (typeof fullUrl !== 'string') fullUrl = ''
-
-  return {
-    api_key: apiKey,
-    full_url: fullUrl,
-  }
-}
-
 /**
  * Default app configurations
  * App Data Folder default to Electron's userData
@@ -158,7 +87,10 @@ export const getEngineConfiguration = async (engineId: string) => {
  */
 export const defaultAppConfig = (): AppConfiguration => {
   const { app } = require('electron')
-  const defaultJanDataFolder = join(app?.getPath('userData') ?? os?.homedir() ?? '', 'data')
+  const defaultJanDataFolder = join(
+    app?.getPath('userData') ?? os?.homedir() ?? '',
+    'data'
+  )
   return {
     data_folder:
       process.env.CI === 'e2e'
diff --git a/core/src/node/helper/resource.ts b/core/src/node/helper/resource.ts
index c7bfbf20c7..dfcb123eca 100644
--- a/core/src/node/helper/resource.ts
+++ b/core/src/node/helper/resource.ts
@@ -1,13 +1,8 @@
 import { SystemResourceInfo } from '../../types'
-import { physicalCpuCount } from './config'
-import { log } from './logger'
 
 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
-  const cpu = await physicalCpuCount()
-  log(`[CORTEX]::CPU information - ${cpu}`)
-
   return {
-    numCpuPhysicalCore: cpu,
+    numCpuPhysicalCore: 0,
     memAvailable: 0, // TODO: this should not be 0
   }
 }
diff --git a/core/src/types/engine/index.ts b/core/src/types/engine/index.ts
index 83be19d662..fc41caf7a0 100644
--- a/core/src/types/engine/index.ts
+++ b/core/src/types/engine/index.ts
@@ -1,7 +1,7 @@
 import { InferenceEngine } from '../../types'
 
 export type Engines = {
-  [key in InferenceEngine]: EngineVariant[]
+  [key in InferenceEngine]: (EngineVariant & EngineConfig)[]
 }
 
 export type EngineMetadata = {
@@ -40,6 +40,7 @@ export type EngineReleased = {
 }
 
 export type EngineConfig = {
+  engine?: string
   version?: string
   variant?: string
   type?: string
diff --git a/core/src/types/message/messageEntity.ts b/core/src/types/message/messageEntity.ts
index 302b824ee5..edd253a57d 100644
--- a/core/src/types/message/messageEntity.ts
+++ b/core/src/types/message/messageEntity.ts
@@ -32,9 +32,8 @@ export type ThreadMessage = {
   completed_at: number
   /** The additional metadata of this message. **/
   metadata?: Record<string, unknown>
-
+  /** Type of the message */
   type?: string
-
   /** The error code which explain what error type. Used in conjunction with MessageStatus.Error */
   error_code?: ErrorCode
 }
@@ -72,6 +71,10 @@ export type MessageRequest = {
   // TODO: deprecate threadId field
   thread?: Thread
 
+  /** Engine name to process */
+  engine?: string
+
+  /** Message type */
   type?: string
 }
 
@@ -147,7 +150,9 @@ export interface Attachment {
   /**
    * The tools to add this file to.
    */
-  tools?: Array<CodeInterpreterTool | Attachment.AssistantToolsFileSearchTypeOnly>
+  tools?: Array<
+    CodeInterpreterTool | Attachment.AssistantToolsFileSearchTypeOnly
+  >
 }
 
 export namespace Attachment {
@@ -166,5 +171,10 @@ export interface IncompleteDetails {
   /**
    * The reason the message is incomplete.
    */
-  reason: 'content_filter' | 'max_tokens' | 'run_cancelled' | 'run_expired' | 'run_failed'
+  reason:
+    | 'content_filter'
+    | 'max_tokens'
+    | 'run_cancelled'
+    | 'run_expired'
+    | 'run_failed'
 }
diff --git a/extensions/engine-management-extension/engines.mjs b/extensions/engine-management-extension/engines.mjs
new file mode 100644
index 0000000000..9457582fb8
--- /dev/null
+++ b/extensions/engine-management-extension/engines.mjs
@@ -0,0 +1,27 @@
+import anthropic from './resources/anthropic.json' with { type: 'json' }
+import openai from './resources/openai.json' with { type: 'json' }
+import openrouter from './resources/openrouter.json' with { type: 'json' }
+import groq from './resources/groq.json' with { type: 'json' }
+import martian from './resources/martian.json' with { type: 'json' }
+import mistral from './resources/mistral.json' with { type: 'json' }
+import nvidia from './resources/nvidia.json' with { type: 'json' }
+
+import openaiModels from './models/openai.json' with { type: 'json' }
+import anthropicModels from './models/anthropic.json' with { type: 'json' }
+import openrouterModels from './models/openrouter.json' with { type: 'json' }
+import groqModels from './models/groq.json' with { type: 'json' }
+import martianModels from './models/martian.json' with { type: 'json' }
+import mistralModels from './models/mistral.json' with { type: 'json' }
+import nvidiaModels from './models/nvidia.json' with { type: 'json' }
+
+const engines = [anthropic, openai, openrouter, groq, mistral, martian, nvidia]
+const models = [
+  ...openaiModels,
+  ...anthropicModels,
+  ...openrouterModels,
+  ...groqModels,
+  ...mistralModels,
+  ...martianModels,
+  ...nvidiaModels,
+]
+export { engines, models }
diff --git a/extensions/inference-anthropic-extension/resources/models.json b/extensions/engine-management-extension/models/anthropic.json
similarity index 54%
rename from extensions/inference-anthropic-extension/resources/models.json
rename to extensions/engine-management-extension/models/anthropic.json
index 59e41245b6..d35ba4c22e 100644
--- a/extensions/inference-anthropic-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/anthropic.json
@@ -1,74 +1,41 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-opus-latest",
+    "model": "claude-3-opus-latest",
     "object": "model",
     "name": "Claude 3 Opus Latest",
     "version": "1.0",
     "description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 4096,
       "temperature": 0.7,
       "stream": false
     },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
     "engine": "anthropic"
   },
   {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-haiku-latest",
+    "model": "claude-3-5-haiku-latest",
     "object": "model",
     "name": "Claude 3.5 Haiku Latest",
     "version": "1.0",
     "description": "Claude 3.5 Haiku is the fastest model provides near-instant responsiveness.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8192,
       "temperature": 0.7,
       "stream": false
     },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
     "engine": "anthropic"
   },
   {
-    "sources": [
-      {
-        "url": "https://www.anthropic.com/"
-      }
-    ],
-    "id": "claude-3-5-sonnet-latest",
+    "model": "claude-3-5-sonnet-latest",
     "object": "model",
     "name": "Claude 3.5 Sonnet Latest",
     "version": "1.0",
     "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8192,
       "temperature": 0.7,
       "stream": true
     },
-    "metadata": {
-      "author": "Anthropic",
-      "tags": ["General", "Big Context Length"]
-    },
     "engine": "anthropic"
   }
 ]
diff --git a/extensions/inference-cohere-extension/resources/models.json b/extensions/engine-management-extension/models/cohere.json
similarity index 62%
rename from extensions/inference-cohere-extension/resources/models.json
rename to extensions/engine-management-extension/models/cohere.json
index 2b4cc3e8e4..ecebb9016f 100644
--- a/extensions/inference-cohere-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/cohere.json
@@ -1,56 +1,28 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://cohere.com"
-      }
-    ],
-    "id": "command-r-plus",
+    "model": "command-r-plus",
     "object": "model",
     "name": "Command R+",
     "version": "1.0",
     "description": "Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 128000,
       "temperature": 0.7,
       "stream": false
     },
-    "metadata": {
-      "author": "Cohere",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "cohere"
   },
   {
-    "sources": [
-      {
-        "url": "https://cohere.com"
-      }
-    ],
-    "id": "command-r",
+    "model": "command-r",
     "object": "model",
     "name": "Command R",
     "version": "1.0",
     "description": "Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 128000,
       "temperature": 0.7,
       "stream": false
     },
-    "metadata": {
-      "author": "Cohere",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "cohere"
   }
 ]
diff --git a/extensions/inference-groq-extension/resources/models.json b/extensions/engine-management-extension/models/groq.json
similarity index 53%
rename from extensions/inference-groq-extension/resources/models.json
rename to extensions/engine-management-extension/models/groq.json
index b4b013dad6..38a0f38350 100644
--- a/extensions/inference-groq-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/groq.json
@@ -1,18 +1,11 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama3-70b-8192",
+    "model": "llama3-70b-8192",
     "object": "model",
     "name": "Groq Llama 3 70b",
     "version": "1.1",
     "description": "Groq Llama 3 70b with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8192,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -21,29 +14,15 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama3-8b-8192",
+    "model": "llama3-8b-8192",
     "object": "model",
     "name": "Groq Llama 3 8b",
     "version": "1.1",
     "description": "Groq Llama 3 8b with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8192,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -52,29 +31,15 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.1-8b-instant",
+    "model": "llama-3.1-8b-instant",
     "object": "model",
     "name": "Groq Llama 3.1 8b Instant",
     "version": "1.1",
     "description": "Groq Llama 3.1 8b with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8000,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -83,29 +48,15 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.2-11b-text-preview",
+    "model": "llama-3.2-11b-text-preview",
     "object": "model",
     "name": "Groq Llama 3.2 11b Text Preview",
     "version": "1.1",
     "description": "Groq Llama 3.2 11b Text Preview with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8192,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -114,29 +65,15 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.2-11b-vision-preview",
+    "model": "llama-3.2-11b-vision-preview",
     "object": "model",
     "name": "Groq Llama 3.2 11b Vision Preview",
     "version": "1.1",
     "description": "Groq Llama 3.2 11b Vision Preview with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 8192,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -145,28 +82,14 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.2-1b-preview",
+    "model": "llama-3.2-1b-preview",
     "object": "model",
     "name": "Groq Llama 3.2 1b Preview",
     "version": "1.1",
     "description": "Groq Llama 3.2 1b Preview with supercharged speed!",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 8192,
       "temperature": 0.7,
@@ -176,28 +99,14 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.2-3b-preview",
+    "model": "llama-3.2-3b-preview",
     "object": "model",
     "name": "Groq Llama 3.2 3b Preview",
     "version": "1.1",
     "description": "Groq Llama 3.2 3b Preview with supercharged speed!",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 8192,
       "temperature": 0.7,
@@ -207,28 +116,14 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.2-90b-text-preview",
+    "model": "llama-3.2-90b-text-preview",
     "object": "model",
     "name": "Groq Llama 3.2 90b Text Preview",
     "version": "1.1",
     "description": "Groq Llama 3.2 90b Text Preview with supercharged speed!",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 8192,
       "temperature": 0.7,
@@ -238,28 +133,14 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "llama-3.2-90b-vision-preview",
+    "model": "llama-3.2-90b-vision-preview",
     "object": "model",
     "name": "Groq Llama 3.2 90b Vision Preview",
     "version": "1.1",
     "description": "Groq Llama 3.2 90b Vision Preview with supercharged speed!",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 8192,
       "temperature": 0.7,
@@ -269,58 +150,14 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Meta",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "gemma-7b-it",
-    "object": "model",
-    "name": "Groq Gemma 7B Instruct",
-    "version": "1.2",
-    "description": "Groq Gemma 7B Instruct with supercharged speed!",
-    "format": "api",
-    "settings": {},
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Google",
-      "tags": [
-        "General"
-      ]
-    },
-    "engine": "groq"
-  },
-  {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "gemma2-9b-it",
+    "model": "gemma2-9b-it",
     "object": "model",
     "name": "Groq Gemma 9B Instruct",
     "version": "1.2",
     "description": "Groq Gemma 9b Instruct with supercharged speed!",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 8192,
       "temperature": 0.7,
@@ -330,27 +167,14 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Google",
-      "tags": [
-        "General"
-      ]
-    },
     "engine": "groq"
   },
   {
-    "sources": [
-      {
-        "url": "https://groq.com"
-      }
-    ],
-    "id": "mixtral-8x7b-32768",
+    "model": "mixtral-8x7b-32768",
     "object": "model",
     "name": "Groq Mixtral 8x7B Instruct",
     "version": "1.2",
     "description": "Groq Mixtral 8x7B Instruct is Mixtral with supercharged speed!",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 32768,
       "temperature": 0.7,
@@ -360,13 +184,6 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Mistral",
-      "tags": [
-        "General",
-        "Big Context Length"
-      ]
-    },
     "engine": "groq"
   }
-]
\ No newline at end of file
+]
diff --git a/extensions/inference-martian-extension/resources/models.json b/extensions/engine-management-extension/models/martian.json
similarity index 63%
rename from extensions/inference-martian-extension/resources/models.json
rename to extensions/engine-management-extension/models/martian.json
index cf59e958e7..b935587ccb 100644
--- a/extensions/inference-martian-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/martian.json
@@ -1,17 +1,10 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://withmartian.com/"
-      }
-    ],
-    "id": "router",
+    "model": "router",
     "object": "model",
     "name": "Martian Model Router",
     "version": "1.0",
     "description": "Martian Model Router dynamically routes requests to the best LLM in real-time",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 4096,
       "temperature": 0.7,
@@ -21,12 +14,6 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "Martian",
-      "tags": [
-        "General"
-      ]
-    },
     "engine": "martian"
   }
-]
\ No newline at end of file
+]
diff --git a/extensions/inference-mistral-extension/resources/models.json b/extensions/engine-management-extension/models/mistral.json
similarity index 58%
rename from extensions/inference-mistral-extension/resources/models.json
rename to extensions/engine-management-extension/models/mistral.json
index 23ecd6fdd4..47833a31c4 100644
--- a/extensions/inference-mistral-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/mistral.json
@@ -1,83 +1,44 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://docs.mistral.ai/api/"
-      }
-    ],
-    "id": "mistral-small-latest",
+    "model": "mistral-small-latest",
     "object": "model",
     "name": "Mistral Small",
     "version": "1.1",
     "description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 32000,
       "temperature": 0.7,
       "top_p": 0.95,
       "stream": true
     },
-    "metadata": {
-      "author": "Mistral",
-      "tags": [
-        "General"
-      ]
-    },
     "engine": "mistral"
   },
   {
-    "sources": [
-      {
-        "url": "https://docs.mistral.ai/api/"
-      }
-    ],
-    "id": "mistral-large-latest",
+    "model": "mistral-large-latest",
     "object": "model",
     "name": "Mistral Large",
     "version": "1.1",
     "description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 32000,
       "temperature": 0.7,
       "top_p": 0.95,
       "stream": true
     },
-    "metadata": {
-      "author": "Mistral",
-      "tags": [
-        "General"
-      ]
-    },
     "engine": "mistral"
   },
   {
-    "sources": [
-      {
-        "url": "https://docs.mistral.ai/api/"
-      }
-    ],
-    "id": "open-mixtral-8x22b",
+    "model": "open-mixtral-8x22b",
     "object": "model",
     "name": "Mixtral 8x22B",
     "version": "1.1",
     "description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 32000,
       "temperature": 0.7,
       "top_p": 0.95,
       "stream": true
     },
-    "metadata": {
-      "author": "Mistral",
-      "tags": [
-        "General"
-      ]
-    },
     "engine": "mistral"
   }
 ]
diff --git a/extensions/inference-nvidia-extension/resources/models.json b/extensions/engine-management-extension/models/nvidia.json
similarity index 57%
rename from extensions/inference-nvidia-extension/resources/models.json
rename to extensions/engine-management-extension/models/nvidia.json
index b97644fc99..f2adac7798 100644
--- a/extensions/inference-nvidia-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/nvidia.json
@@ -1,17 +1,10 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://integrate.api.nvidia.com/v1/chat/completions"
-      }
-    ],
-    "id": "mistralai/mistral-7b-instruct-v0.2",
+    "model": "mistralai/mistral-7b-instruct-v0.2",
     "object": "model",
     "name": "Mistral 7B",
     "version": "1.1",
     "description": "Mistral 7B with NVIDIA",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 1024,
       "temperature": 0.3,
@@ -22,10 +15,6 @@
       "stop": null,
       "seed": null
     },
-    "metadata": {
-      "author": "NVIDIA",
-      "tags": ["General"]
-    },
     "engine": "nvidia"
   }
 ]
diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/engine-management-extension/models/openai.json
similarity index 62%
rename from extensions/inference-openai-extension/resources/models.json
rename to extensions/engine-management-extension/models/openai.json
index 0c822fde26..8f59b42ea5 100644
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/openai.json
@@ -1,18 +1,12 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "gpt-4-turbo",
+    "model": "gpt-4-turbo",
     "object": "model",
     "name": "OpenAI GPT 4 Turbo",
     "version": "1.2",
     "description": "OpenAI GPT 4 Turbo model is extremely good",
     "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 4096,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -21,26 +15,16 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   },
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "gpt-3.5-turbo",
+    "model": "gpt-3.5-turbo",
     "object": "model",
     "name": "OpenAI GPT 3.5 Turbo",
     "version": "1.1",
     "description": "OpenAI GPT 3.5 Turbo model is extremely fast",
     "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 4096,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -49,28 +33,16 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   },
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "gpt-4o",
+    "model": "gpt-4o",
     "object": "model",
     "name": "OpenAI GPT 4o",
     "version": "1.1",
     "description": "OpenAI GPT 4o is a new flagship model with fast speed and high quality",
     "format": "api",
-    "settings": {
-      "vision_model": true
-    },
-    "parameters": {
+    "inference_params": {
       "max_tokens": 4096,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -79,28 +51,16 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   },
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "gpt-4o-mini",
+    "model": "gpt-4o-mini",
     "object": "model",
     "name": "OpenAI GPT 4o-mini",
     "version": "1.1",
     "description": "GPT-4o mini (“o” for “omni”) is a fast, affordable small model for focused tasks.",
     "format": "api",
-    "settings": {
-      "vision_model": true
-    },
-    "parameters": {
+    "inference_params": {
       "max_tokens": 16384,
       "temperature": 0.7,
       "top_p": 0.95,
@@ -109,26 +69,16 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   },
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "o1",
+    "model": "o1",
     "object": "model",
     "name": "OpenAI o1",
     "version": "1.0",
     "description": "OpenAI o1 is a new model with complex reasoning",
     "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 100000,
       "temperature": 1,
       "top_p": 1,
@@ -136,26 +86,16 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   },
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "o1-preview",
+    "model": "o1-preview",
     "object": "model",
     "name": "OpenAI o1-preview",
     "version": "1.0",
     "description": "OpenAI o1-preview is a new model with complex reasoning",
     "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 32768,
       "temperature": 1,
       "top_p": 1,
@@ -163,26 +103,16 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   },
   {
-    "sources": [
-      {
-        "url": "https://openai.com"
-      }
-    ],
-    "id": "o1-mini",
+    "model": "o1-mini",
     "object": "model",
     "name": "OpenAI o1-mini",
     "version": "1.0",
     "description": "OpenAI o1-mini is a lightweight reasoning model",
     "format": "api",
-    "settings": {},
-    "parameters": {
+    "inference_params": {
       "max_tokens": 65536,
       "temperature": 1,
       "top_p": 1,
@@ -190,10 +120,6 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General"]
-    },
     "engine": "openai"
   }
 ]
diff --git a/extensions/inference-openrouter-extension/resources/models.json b/extensions/engine-management-extension/models/openrouter.json
similarity index 63%
rename from extensions/inference-openrouter-extension/resources/models.json
rename to extensions/engine-management-extension/models/openrouter.json
index 31dea8734a..5ac189a814 100644
--- a/extensions/inference-openrouter-extension/resources/models.json
+++ b/extensions/engine-management-extension/models/openrouter.json
@@ -1,17 +1,10 @@
 [
   {
-    "sources": [
-      {
-        "url": "https://openrouter.ai"
-      }
-    ],
-    "id": "open-router-auto",
+    "model": "open-router-auto",
     "object": "model",
     "name": "OpenRouter",
     "version": "1.0",
     "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "format": "api",
-    "settings": {},
     "parameters": {
       "max_tokens": 128000,
       "temperature": 0.7,
@@ -19,10 +12,6 @@
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
-    "metadata": {
-      "author": "OpenRouter",
-      "tags": ["General", "Big Context Length"]
-    },
     "engine": "openrouter"
   }
 ]
diff --git a/extensions/engine-management-extension/resources/anthropic.json b/extensions/engine-management-extension/resources/anthropic.json
new file mode 100644
index 0000000000..a79c96493b
--- /dev/null
+++ b/extensions/engine-management-extension/resources/anthropic.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-anthropic-extension",
+  "type": "remote",
+  "engine": "anthropic",
+  "url": "https://api.anthropic.com",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://api.anthropic.com/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://api.anthropic.com/v1/messages",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/cohere.json b/extensions/engine-management-extension/resources/cohere.json
new file mode 100644
index 0000000000..fc6730659b
--- /dev/null
+++ b/extensions/engine-management-extension/resources/cohere.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-cohere-extension",
+  "type": "remote",
+  "engine": "cohere",
+  "url": "https://api.cohere.ai",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://api.cohere.ai/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://api.cohere.ai/v1/chat",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/groq.json b/extensions/engine-management-extension/resources/groq.json
new file mode 100644
index 0000000000..9e9a68759e
--- /dev/null
+++ b/extensions/engine-management-extension/resources/groq.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-groq-extension",
+  "type": "remote",
+  "engine": "groq",
+  "url": "https://api.groq.com",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://api.groq.com/openai/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://api.groq.com/openai/v1/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/martian.json b/extensions/engine-management-extension/resources/martian.json
new file mode 100644
index 0000000000..bd867d846d
--- /dev/null
+++ b/extensions/engine-management-extension/resources/martian.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-martian-extension",
+  "type": "remote",
+  "engine": "martian",
+  "url": "https://withmartian.com",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://withmartian.com/api/openai/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://withmartian.com/api/openai/v1/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/mistral.json b/extensions/engine-management-extension/resources/mistral.json
new file mode 100644
index 0000000000..bcf3928436
--- /dev/null
+++ b/extensions/engine-management-extension/resources/mistral.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-mistral-extension",
+  "type": "remote",
+  "engine": "mistral",
+  "url": "https://api.mistral.ai",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://api.mistral.ai/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://api.mistral.ai/v1/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/nvidia.json b/extensions/engine-management-extension/resources/nvidia.json
new file mode 100644
index 0000000000..b9fb80b8e4
--- /dev/null
+++ b/extensions/engine-management-extension/resources/nvidia.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-nvidia-extension",
+  "type": "remote",
+  "engine": "nvidia",
+  "url": "https://integrate.api.nvidia.com",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://integrate.api.nvidia.com/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://integrate.api.nvidia.com/v1/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/openai.json b/extensions/engine-management-extension/resources/openai.json
new file mode 100644
index 0000000000..19031792a0
--- /dev/null
+++ b/extensions/engine-management-extension/resources/openai.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-openai-extension",
+  "type": "remote",
+  "engine": "openai",
+  "url": "https://api.openai.com",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://api.openai.com/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://api.openai.com/v1/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/resources/openrouter.json b/extensions/engine-management-extension/resources/openrouter.json
new file mode 100644
index 0000000000..8bb4b7d75b
--- /dev/null
+++ b/extensions/engine-management-extension/resources/openrouter.json
@@ -0,0 +1,22 @@
+{
+  "id": "@janhq/inference-openrouter-extension",
+  "type": "remote",
+  "engine": "openrouter",
+  "url": "https://openrouter.ai",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://openrouter.ai/api/v1/models",
+    "api_key_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://openrouter.ai/api/v1/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    }
+  }
+}
diff --git a/extensions/engine-management-extension/rolldown.config.mjs b/extensions/engine-management-extension/rolldown.config.mjs
index 038f23cc3e..b59d395d1d 100644
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@@ -1,4 +1,5 @@
 import { defineConfig } from 'rolldown'
+import { engines, models } from './engines.mjs'
 import pkgJson from './package.json' with { type: 'json' }
 
 export default defineConfig([
@@ -13,6 +14,8 @@ export default defineConfig([
       API_URL: JSON.stringify('http://127.0.0.1:39291'),
       SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
       CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.43'),
+      DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
+      DEFAULT_REMOTE_MODELS: JSON.stringify(models),
     },
   },
   {
diff --git a/extensions/engine-management-extension/src/@types/global.d.ts b/extensions/engine-management-extension/src/@types/global.d.ts
index 8d0a94fef4..2d520d5f9e 100644
--- a/extensions/engine-management-extension/src/@types/global.d.ts
+++ b/extensions/engine-management-extension/src/@types/global.d.ts
@@ -3,6 +3,12 @@ declare const CORTEX_ENGINE_VERSION: string
 declare const SOCKET_URL: string
 declare const NODE: string
 
+declare const DEFAULT_REMOTE_ENGINES: ({
+  id: string
+  engine: string
+} & EngineConfig)[]
+declare const DEFAULT_REMOTE_MODELS: Model[]
+
 interface Core {
   api: APIFunctions
   events: EventEmitter
diff --git a/extensions/engine-management-extension/src/index.ts b/extensions/engine-management-extension/src/index.ts
index 079ca4400e..51bae634ef 100644
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@@ -8,10 +8,14 @@ import {
   EngineReleased,
   executeOnMain,
   systemInformation,
+  Model,
+  fs,
+  joinPath,
 } from '@janhq/core'
 import ky, { HTTPError } from 'ky'
 import PQueue from 'p-queue'
 import { EngineError } from './error'
+import { getJanDataFolderPath } from '@janhq/core'
 
 /**
  * JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
@@ -28,41 +32,11 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
     await executeOnMain(NODE, 'symlinkEngines')
     // Run Healthcheck
     this.queue.add(() => this.healthz())
-    try {
-      const variant = await this.getDefaultEngineVariant(
-        InferenceEngine.cortex_llamacpp
-      )
-      const installedEngines = await this.getInstalledEngines(
-        InferenceEngine.cortex_llamacpp
-      )
-      if (
-        !installedEngines.some(
-          (e) => e.name === variant.variant && e.version === variant.version
-        )
-      ) {
-        throw new EngineError(
-          'Default engine is not available, use bundled version.'
-        )
-      }
-    } catch (error) {
-      if (
-        (error instanceof HTTPError && error.response.status === 400) ||
-        error instanceof EngineError
-      ) {
-        const systemInfo = await systemInformation()
-        const variant = await executeOnMain(
-          NODE,
-          'engineVariant',
-          systemInfo.gpuSetting
-        )
-        await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
-          variant: variant,
-          version: `${CORTEX_ENGINE_VERSION}`,
-        })
-      } else {
-        console.error('An unexpected error occurred:', error)
-      }
-    }
+    // Update default local engine
+    this.updateDefaultEngine()
+
+    // Populate default remote engines
+    this.populateDefaultRemoteEngines()
   }
 
   /**
@@ -89,9 +63,10 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
     return this.queue.add(() =>
       ky
         .get(`${API_URL}/v1/models/remote/${name}`)
-        .json<any>()
+        .json<Model[]>()
         .then((e) => e)
-    ) as Promise<any>
+        .catch(() => [])
+    ) as Promise<Model[]>
   }
 
   /**
@@ -156,6 +131,16 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
     ) as Promise<{ messages: string }>
   }
 
+  /**
+   * Add a new remote engine
+   * @returns A Promise that resolves to intall of engine.
+   */
+  async addRemoteEngine(engineConfig: EngineConfig) {
+    return this.queue.add(() =>
+      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
+    ) as Promise<{ messages: string }>
+  }
+
   /**
    * @param name - Inference engine name.
    * @returns A Promise that resolves to unintall of engine.
@@ -168,6 +153,16 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
     ) as Promise<{ messages: string }>
   }
 
+  /**
+   * Add a new remote model
+   * @param model - Remote model object.
+   */
+  async addRemoteModel(model: Model) {
+    return this.queue.add(() =>
+      ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
+    )
+  }
+
   /**
    * @param name - Inference engine name.
    * @returns A Promise that resolves to an object of default engine.
@@ -219,4 +214,84 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
       })
       .then(() => {})
   }
+
+  /**
+   * Update default local engine
+   * This is to use built-in engine variant in case there is no default engine set
+   */
+  async updateDefaultEngine() {
+    try {
+      const variant = await this.getDefaultEngineVariant(
+        InferenceEngine.cortex_llamacpp
+      )
+      const installedEngines = await this.getInstalledEngines(
+        InferenceEngine.cortex_llamacpp
+      )
+      if (
+        !installedEngines.some(
+          (e) => e.name === variant.variant && e.version === variant.version
+        )
+      ) {
+        throw new EngineError(
+          'Default engine is not available, use bundled version.'
+        )
+      }
+    } catch (error) {
+      if (
+        (error instanceof HTTPError && error.response.status === 400) ||
+        error instanceof EngineError
+      ) {
+        const systemInfo = await systemInformation()
+        const variant = await executeOnMain(
+          NODE,
+          'engineVariant',
+          systemInfo.gpuSetting
+        )
+        await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
+          variant: variant,
+          version: `${CORTEX_ENGINE_VERSION}`,
+        })
+      } else {
+        console.error('An unexpected error occurred:', error)
+      }
+    }
+  }
+
+  /**
+   * This is to populate default remote engines in case there is no customized remote engine setting
+   */
+  async populateDefaultRemoteEngines() {
+    const engines = await this.getEngines()
+    if (
+      !Object.values(engines)
+        .flat()
+        .some((e) => e.type === 'remote')
+    ) {
+      DEFAULT_REMOTE_ENGINES.forEach(async (engine) => {
+        const { id, ...data } = engine
+
+        /// BEGIN - Migrate legacy api key settings
+        let api_key = undefined
+        if (id) {
+          const apiKeyPath = await joinPath([
+            await getJanDataFolderPath(),
+            'settings',
+            id,
+            'settings.json',
+          ])
+          const settings = await fs.readFileSync(apiKeyPath, 'utf-8')
+          api_key = JSON.parse(settings).find(
+            (e) => e.key === `${data.engine}-api-key`
+          )?.controllerProps?.value
+        }
+        data.api_key = api_key
+        /// END - Migrate legacy api key settings
+
+        await this.addRemoteEngine(data).catch(console.error)
+      })
+      DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
+        await this.addRemoteModel(data).catch(() => {})
+      })
+    }
+  }
 }
diff --git a/extensions/engine-management-extension/tsconfig.json b/extensions/engine-management-extension/tsconfig.json
index 891d28a605..72e1e1895a 100644
--- a/extensions/engine-management-extension/tsconfig.json
+++ b/extensions/engine-management-extension/tsconfig.json
@@ -8,7 +8,8 @@
     "forceConsistentCasingInFileNames": true,
     "strict": false,
     "skipLibCheck": true,
-    "rootDir": "./src"
+    "rootDir": "./src",
+    "resolveJsonModule": true
   },
   "include": ["./src"],
   "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
diff --git a/extensions/inference-anthropic-extension/README.md b/extensions/inference-anthropic-extension/README.md
deleted file mode 100644
index 1c0dcbd3d4..0000000000
--- a/extensions/inference-anthropic-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Anthropic Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-anthropic-extension/jest.config.js b/extensions/inference-anthropic-extension/jest.config.js
deleted file mode 100644
index 3e32adceb2..0000000000
--- a/extensions/inference-anthropic-extension/jest.config.js
+++ /dev/null
@@ -1,9 +0,0 @@
-/** @type {import('ts-jest').JestConfigWithTsJest} */
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-  transform: {
-    'node_modules/@janhq/core/.+\\.(j|t)s?$': 'ts-jest',
-  },
-  transformIgnorePatterns: ['node_modules/(?!@janhq/core/.*)'],
-}
diff --git a/extensions/inference-anthropic-extension/package.json b/extensions/inference-anthropic-extension/package.json
deleted file mode 100644
index cb064d2aaa..0000000000
--- a/extensions/inference-anthropic-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-anthropic-extension",
-  "productName": "Anthropic Inference Engine",
-  "version": "1.0.3",
-  "description": "This extension enables Anthropic chat completion API calls",
-  "main": "dist/index.js",
-  "engine": "anthropic",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "test": "jest test",
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-anthropic-extension/resources/settings.json b/extensions/inference-anthropic-extension/resources/settings.json
deleted file mode 100644
index 9ca4405ac7..0000000000
--- a/extensions/inference-anthropic-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "anthropic-api-key",
-    "title": "API Key",
-    "description": "The Anthropic API uses API keys for authentication. Visit your [API Keys](https://console.anthropic.com/settings/keys) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [Anthropic API documentation](https://docs.anthropic.com/claude/docs/intro-to-claude) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://api.anthropic.com/v1/messages",
-      "value": "https://api.anthropic.com/v1/messages"
-    }
-  }
-]
diff --git a/extensions/inference-anthropic-extension/rolldown.config.mjs b/extensions/inference-anthropic-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-anthropic-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-anthropic-extension/src/anthropic.test.ts b/extensions/inference-anthropic-extension/src/anthropic.test.ts
deleted file mode 100644
index 703ead0fbe..0000000000
--- a/extensions/inference-anthropic-extension/src/anthropic.test.ts
+++ /dev/null
@@ -1,77 +0,0 @@
-// Import necessary modules
-import JanInferenceAnthropicExtension, { Settings } from '.'
-import { PayloadType, ChatCompletionRole } from '@janhq/core'
-
-// Mocks
-jest.mock('@janhq/core', () => ({
-  RemoteOAIEngine: jest.fn().mockImplementation(() => ({
-    registerSettings: jest.fn(),
-    registerModels: jest.fn(),
-    getSetting: jest.fn(),
-    onChange: jest.fn(),
-    onSettingUpdate: jest.fn(),
-    onLoad: jest.fn(),
-    headers: jest.fn(),
-  })),
-  PayloadType: jest.fn(),
-  ChatCompletionRole: {
-    User: 'user' as const,
-    Assistant: 'assistant' as const,
-    System: 'system' as const,
-  },
-}))
-
-// Helper functions
-const createMockPayload = (): PayloadType => ({
-  messages: [
-    { role: ChatCompletionRole.System, content: 'Meow' },
-    { role: ChatCompletionRole.User, content: 'Hello' },
-    { role: ChatCompletionRole.Assistant, content: 'Hi there' },
-  ],
-  model: 'claude-v1',
-  stream: false,
-})
-
-describe('JanInferenceAnthropicExtension', () => {
-  let extension: JanInferenceAnthropicExtension
-
-  beforeEach(() => {
-    extension = new JanInferenceAnthropicExtension('', '')
-    extension.apiKey = 'mock-api-key'
-    extension.inferenceUrl = 'mock-endpoint'
-    jest.clearAllMocks()
-  })
-
-  it('should initialize with correct settings', async () => {
-    await extension.onLoad()
-    expect(extension.apiKey).toBe('mock-api-key')
-    expect(extension.inferenceUrl).toBe('mock-endpoint')
-  })
-
-  it('should transform payload correctly', () => {
-    const payload = createMockPayload()
-    const transformedPayload = extension.transformPayload(payload)
-
-    expect(transformedPayload).toEqual({
-      max_tokens: 4096,
-      model: 'claude-v1',
-      stream: false,
-      system: 'Meow',
-      messages: [
-        { role: 'user', content: 'Hello' },
-        { role: 'assistant', content: 'Hi there' },
-      ],
-    })
-  })
-
-  it('should transform response correctly', () => {
-    const nonStreamResponse = { content: [{ text: 'Test response' }] }
-    const streamResponse =
-      'data: {"type":"content_block_delta","delta":{"text":"Hello"}}'
-
-    expect(extension.transformResponse(nonStreamResponse)).toBe('Test response')
-    expect(extension.transformResponse(streamResponse)).toBe('Hello')
-    expect(extension.transformResponse('')).toBe('')
-    expect(extension.transformResponse('event: something')).toBe('')
-  })
-})
diff --git a/extensions/inference-anthropic-extension/src/env.d.ts b/extensions/inference-anthropic-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-anthropic-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-anthropic-extension/src/index.ts b/extensions/inference-anthropic-extension/src/index.ts
deleted file mode 100644
index 8c286d7a5f..0000000000
--- a/extensions/inference-anthropic-extension/src/index.ts
+++ /dev/null
@@ -1,150 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-anthropic-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-import { PayloadType } from '@janhq/core'
-import { ChatCompletionRole } from '@janhq/core'
-
-export enum Settings {
-  apiKey = 'anthropic-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-
-type AnthropicPayloadType = {
-  stream: boolean
-  model?: string
-  max_tokens?: number
-  messages?: Array<{ role: string; content: string }>
-  system?: string
-}
-
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceAnthropicExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'anthropic'
-  maxTokens: number = 4096
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  // Override the headers method to include the x-API-key in the request headers
-  override async headers(): Promise<HeadersInit> {
-    return {
-      'Content-Type': 'application/json',
-      'x-api-key': this.apiKey,
-      'anthropic-version': '2023-06-01',
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-
-  // Override the transformPayload method to convert the payload to the required format
-  transformPayload = (payload: PayloadType): AnthropicPayloadType => {
-    if (!payload.messages || payload.messages.length === 0) {
-      return {
-        max_tokens: this.maxTokens,
-        messages: [],
-        model: payload.model,
-        stream: payload.stream,
-      }
-    }
-
-    const convertedData: AnthropicPayloadType = {
-      max_tokens: this.maxTokens,
-      messages: [],
-      model: payload.model,
-      stream: payload.stream,
-    }
-
-    payload.messages.forEach((item) => {
-      if (item.role === ChatCompletionRole.User) {
-        convertedData.messages.push({
-          role: 'user',
-          content: item.content as string,
-        })
-      } else if (item.role === ChatCompletionRole.Assistant) {
-        convertedData.messages.push({
-          role: 'assistant',
-          content: item.content as string,
-        })
-      } else if (item.role === ChatCompletionRole.System) {
-        // When using Claude, you can dramatically improve its performance by using the system parameter to give it a role.
-        // This technique, known as role prompting, is the most powerful way to use system prompts with Claude.
-        convertedData.system = item.content as string
-      }
-    })
-
-    return convertedData
-  }
-
-  // Sample returned stream data from anthropic
-  // {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}         }
-  // {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}              }
-  // {"type":"content_block_stop","index":0        }
-  // {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"output_tokens":12}   }
-
-  // Override the transformResponse method to convert the response to the required format
-  transformResponse = (data: any): string => {
-    // handling stream response
-    if (typeof data === 'string' && data.trim().length === 0) return ''
-    if (typeof data === 'string' && data.startsWith('event: ')) return ''
-    if (typeof data === 'string' && data.startsWith('data: ')) {
-      data = data.replace('data: ', '')
-      const parsedData = JSON.parse(data)
-      if (parsedData.type !== 'content_block_delta') return ''
-      return parsedData.delta?.text ?? ''
-    }
-
-    // non stream response
-    if (data.content && data.content.length > 0 && data.content[0].text) {
-      return data.content[0].text
-    }
-
-    console.error('Invalid response format:', data)
-    return ''
-  }
-}
diff --git a/extensions/inference-anthropic-extension/tsconfig.json b/extensions/inference-anthropic-extension/tsconfig.json
deleted file mode 100644
index 6db951c9e4..0000000000
--- a/extensions/inference-anthropic-extension/tsconfig.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"],
-  "exclude": ["**/*.test.ts"]
-}
diff --git a/extensions/inference-cohere-extension/README.md b/extensions/inference-cohere-extension/README.md
deleted file mode 100644
index 089a096e8b..0000000000
--- a/extensions/inference-cohere-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Cohere Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-cohere-extension/package.json b/extensions/inference-cohere-extension/package.json
deleted file mode 100644
index 7058c0e531..0000000000
--- a/extensions/inference-cohere-extension/package.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
-  "name": "@janhq/inference-cohere-extension",
-  "productName": "Cohere Inference Engine",
-  "version": "1.0.0",
-  "description": "This extension enables Cohere chat completion API calls",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "engine": "cohere",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "exports": {
-    ".": "./dist/index.js",
-    "./main": "./dist/module.js"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-cohere-extension/resources/settings.json b/extensions/inference-cohere-extension/resources/settings.json
deleted file mode 100644
index 79150d7e5c..0000000000
--- a/extensions/inference-cohere-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "cohere-api-key",
-    "title": "API Key",
-    "description": "The Cohere API uses API keys for authentication. Visit your [API Keys](https://dashboard.cohere.com/api-keys) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [Cohere API documentation](https://docs.cohere.com/reference/chat) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://api.cohere.ai/v1/chat",
-      "value": "https://api.cohere.ai/v1/chat"
-    }
-  }
-]
diff --git a/extensions/inference-cohere-extension/rolldown.config.mjs b/extensions/inference-cohere-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-cohere-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-cohere-extension/src/env.d.ts b/extensions/inference-cohere-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-cohere-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-cohere-extension/src/index.ts b/extensions/inference-cohere-extension/src/index.ts
deleted file mode 100644
index 018df60f99..0000000000
--- a/extensions/inference-cohere-extension/src/index.ts
+++ /dev/null
@@ -1,117 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-cohere-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-import { PayloadType } from '@janhq/core'
-import { ChatCompletionRole } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'cohere-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-
-enum RoleType {
-  user = 'USER',
-  chatbot = 'CHATBOT',
-  system = 'SYSTEM',
-}
-
-type CoherePayloadType = {
-  chat_history?: Array<{ role: RoleType; message: string }>
-  message?: string
-  preamble?: string
-}
-
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceCohereExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'cohere'
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-
-  transformPayload = (payload: PayloadType): CoherePayloadType => {
-    if (payload.messages.length === 0) {
-      return {}
-    }
-
-    const { messages, ...params } = payload
-    const convertedData: CoherePayloadType = {
-      ...params,
-      chat_history: [],
-      message: '',
-    }
-    messages.forEach((item, index) => {
-      // Assign the message of the last item to the `message` property
-      if (index === messages.length - 1) {
-        convertedData.message = item.content as string
-        return
-      }
-      if (item.role === ChatCompletionRole.User) {
-        convertedData.chat_history.push({
-          role: RoleType.user,
-          message: item.content as string,
-        })
-      } else if (item.role === ChatCompletionRole.Assistant) {
-        convertedData.chat_history.push({
-          role: RoleType.chatbot,
-          message: item.content as string,
-        })
-      } else if (item.role === ChatCompletionRole.System) {
-        convertedData.preamble = item.content as string
-      }
-    })
-    return convertedData
-  }
-
-  transformResponse = (data: any) => {
-    return typeof data === 'object'
-      ? data.text
-      : (JSON.parse(data.replace('data: ', '').trim()).text ?? '')
-  }
-}
diff --git a/extensions/inference-cohere-extension/tsconfig.json b/extensions/inference-cohere-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-cohere-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/inference-groq-extension/README.md b/extensions/inference-groq-extension/README.md
deleted file mode 100644
index f9690da09d..0000000000
--- a/extensions/inference-groq-extension/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-groq-extension/package.json b/extensions/inference-groq-extension/package.json
deleted file mode 100644
index e400c517b7..0000000000
--- a/extensions/inference-groq-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-groq-extension",
-  "productName": "Groq Inference Engine",
-  "version": "1.0.1",
-  "description": "This extension enables fast Groq chat completion API calls",
-  "main": "dist/index.js",
-  "engine": "groq",
-  "module": "dist/module.js",
-  "author": "Carsen Klock & Jan",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-groq-extension/resources/settings.json b/extensions/inference-groq-extension/resources/settings.json
deleted file mode 100644
index 767fec0ba4..0000000000
--- a/extensions/inference-groq-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "groq-api-key",
-    "title": "API Key",
-    "description": "The Groq API uses API keys for authentication. Visit your [API Keys](https://console.groq.com/keys) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [Groq documentation](https://console.groq.com/docs/openai) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://api.groq.com/openai/v1/chat/completions",
-      "value": "https://api.groq.com/openai/v1/chat/completions"
-    }
-  }
-]
diff --git a/extensions/inference-groq-extension/rolldown.config.mjs b/extensions/inference-groq-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-groq-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-groq-extension/src/env.d.ts b/extensions/inference-groq-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-groq-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-groq-extension/src/index.ts b/extensions/inference-groq-extension/src/index.ts
deleted file mode 100644
index d6969b48fa..0000000000
--- a/extensions/inference-groq-extension/src/index.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-groq-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'groq-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceGroqExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider = 'groq'
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    // Retrieve API Key Setting
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-}
diff --git a/extensions/inference-groq-extension/tsconfig.json b/extensions/inference-groq-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-groq-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/inference-martian-extension/README.md b/extensions/inference-martian-extension/README.md
deleted file mode 100644
index 5b8e898d7c..0000000000
--- a/extensions/inference-martian-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Martian Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-martian-extension/package.json b/extensions/inference-martian-extension/package.json
deleted file mode 100644
index c1371917c7..0000000000
--- a/extensions/inference-martian-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-martian-extension",
-  "productName": "Martian Inference Engine",
-  "version": "1.0.1",
-  "description": "This extension enables Martian chat completion API calls",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "engine": "martian",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-martian-extension/resources/settings.json b/extensions/inference-martian-extension/resources/settings.json
deleted file mode 100644
index 6825099f5e..0000000000
--- a/extensions/inference-martian-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "martian-api-key",
-    "title": "API Key",
-    "description": "The Martian API uses API keys for authentication. Visit your [API Keys](https://withmartian.com/dashboard) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [Martian API documentation](https://docs.withmartian.com/martian-model-router/getting-started/quickstart-integrating-martian-into-your-codebase) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://withmartian.com/api/openai/v1/chat/completions",
-      "value": "https://withmartian.com/api/openai/v1/chat/completions"
-    }
-  }
-]
diff --git a/extensions/inference-martian-extension/rolldown.config.mjs b/extensions/inference-martian-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-martian-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-martian-extension/src/env.d.ts b/extensions/inference-martian-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-martian-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-martian-extension/src/index.ts b/extensions/inference-martian-extension/src/index.ts
deleted file mode 100644
index 8cbe4e52db..0000000000
--- a/extensions/inference-martian-extension/src/index.ts
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-martian-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'martian-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceMartianExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'martian'
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-}
diff --git a/extensions/inference-martian-extension/tsconfig.json b/extensions/inference-martian-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-martian-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/inference-mistral-extension/README.md b/extensions/inference-mistral-extension/README.md
deleted file mode 100644
index adb36558cf..0000000000
--- a/extensions/inference-mistral-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Mistral Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-mistral-extension/package.json b/extensions/inference-mistral-extension/package.json
deleted file mode 100644
index 504a3d6133..0000000000
--- a/extensions/inference-mistral-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-mistral-extension",
-  "productName": "MistralAI Inference Engine",
-  "version": "1.0.1",
-  "description": "This extension enables Mistral chat completion API calls",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "engine": "mistral",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-mistral-extension/resources/settings.json b/extensions/inference-mistral-extension/resources/settings.json
deleted file mode 100644
index 963674b02e..0000000000
--- a/extensions/inference-mistral-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "mistral-api-key",
-    "title": "API Key",
-    "description": "The Mistral API uses API keys for authentication. Visit your [API Keys](https://console.mistral.ai/api-keys/) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [Mistral API documentation](https://docs.mistral.ai/api/#operation/createChatCompletion) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://api.mistral.ai/v1/chat/completions",
-      "value": "https://api.mistral.ai/v1/chat/completions"
-    }
-  }
-]
diff --git a/extensions/inference-mistral-extension/rolldown.config.mjs b/extensions/inference-mistral-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-mistral-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-mistral-extension/src/env.d.ts b/extensions/inference-mistral-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-mistral-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-mistral-extension/src/index.ts b/extensions/inference-mistral-extension/src/index.ts
deleted file mode 100644
index 22b977c5a1..0000000000
--- a/extensions/inference-mistral-extension/src/index.ts
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-mistral-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'mistral-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceMistralExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'mistral'
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-}
diff --git a/extensions/inference-mistral-extension/tsconfig.json b/extensions/inference-mistral-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-mistral-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/inference-nvidia-extension/README.md b/extensions/inference-nvidia-extension/README.md
deleted file mode 100644
index 65a1b2b593..0000000000
--- a/extensions/inference-nvidia-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Nvidia Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-nvidia-extension/package.json b/extensions/inference-nvidia-extension/package.json
deleted file mode 100644
index 771ccec4f7..0000000000
--- a/extensions/inference-nvidia-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-nvidia-extension",
-  "productName": "NVIDIA NIM Inference Engine",
-  "version": "1.0.1",
-  "description": "This extension enables NVIDIA chat completion API calls",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "engine": "nvidia",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-nvidia-extension/resources/settings.json b/extensions/inference-nvidia-extension/resources/settings.json
deleted file mode 100644
index 6b26526532..0000000000
--- a/extensions/inference-nvidia-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "nvidia-api-key",
-    "title": "API Key",
-    "description": "The NVIDIA API uses API keys for authentication. Visit your [API Keys](https://org.ngc.nvidia.com/setup/personal-keys) page to retrieve the API key you'll use in your requests..",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [NVIDIA API documentation](https://www.nvidia.com/en-us/ai/) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://integrate.api.nvidia.com/v1/chat/completions",
-      "value": "https://integrate.api.nvidia.com/v1/chat/completions"
-    }
-  }
-]
diff --git a/extensions/inference-nvidia-extension/rolldown.config.mjs b/extensions/inference-nvidia-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-nvidia-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-nvidia-extension/src/env.d.ts b/extensions/inference-nvidia-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-nvidia-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-nvidia-extension/src/index.ts b/extensions/inference-nvidia-extension/src/index.ts
deleted file mode 100644
index 0e5bb81354..0000000000
--- a/extensions/inference-nvidia-extension/src/index.ts
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-mistral-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'nvidia-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanNVIDIANIMInferenceEngine extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'nvidia'
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-}
diff --git a/extensions/inference-nvidia-extension/tsconfig.json b/extensions/inference-nvidia-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-nvidia-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/inference-openai-extension/README.md b/extensions/inference-openai-extension/README.md
deleted file mode 100644
index c716c725c0..0000000000
--- a/extensions/inference-openai-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# OpenAI Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-openai-extension/jest.config.js b/extensions/inference-openai-extension/jest.config.js
deleted file mode 100644
index 3e32adceb2..0000000000
--- a/extensions/inference-openai-extension/jest.config.js
+++ /dev/null
@@ -1,9 +0,0 @@
-/** @type {import('ts-jest').JestConfigWithTsJest} */
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-  transform: {
-    'node_modules/@janhq/core/.+\\.(j|t)s?$': 'ts-jest',
-  },
-  transformIgnorePatterns: ['node_modules/(?!@janhq/core/.*)'],
-}
diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
deleted file mode 100644
index f790b60ca1..0000000000
--- a/extensions/inference-openai-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-openai-extension",
-  "productName": "OpenAI Inference Engine",
-  "version": "1.0.5",
-  "description": "This extension enables OpenAI chat completion API calls",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "engine": "openai",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-openai-extension/resources/settings.json b/extensions/inference-openai-extension/resources/settings.json
deleted file mode 100644
index db2e80c9bf..0000000000
--- a/extensions/inference-openai-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "openai-api-key",
-    "title": "API Key",
-    "description": "The OpenAI API uses API keys for authentication. Visit your [API Keys](https://platform.openai.com/account/api-keys) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [OpenAI API documentation](https://platform.openai.com/docs/api-reference/chat/create) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://api.openai.com/v1/chat/completions",
-      "value": "https://api.openai.com/v1/chat/completions"
-    }
-  }
-]
diff --git a/extensions/inference-openai-extension/rolldown.config.mjs b/extensions/inference-openai-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-openai-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-openai-extension/src/OpenAIExtension.test.ts b/extensions/inference-openai-extension/src/OpenAIExtension.test.ts
deleted file mode 100644
index 4d46bc0077..0000000000
--- a/extensions/inference-openai-extension/src/OpenAIExtension.test.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * @jest-environment jsdom
- */
-jest.mock('@janhq/core', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  RemoteOAIEngine: jest.fn().mockImplementation(() => ({
-    onLoad: jest.fn(),
-    registerSettings: jest.fn(),
-    registerModels: jest.fn(),
-    getSetting: jest.fn(),
-    onSettingUpdate: jest.fn(),
-  })),
-}))
-import JanInferenceOpenAIExtension, { Settings } from '.'
-
-describe('JanInferenceOpenAIExtension', () => {
-  let extension: JanInferenceOpenAIExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanInferenceOpenAIExtension()
-  })
-
-  it('should initialize with settings and models', async () => {
-    await extension.onLoad()
-    // Assuming there are some default SETTINGS and MODELS being registered
-    expect(extension.apiKey).toBe(undefined)
-    expect(extension.inferenceUrl).toBe('')
-  })
-
-  it('should transform the payload for preview models', () => {
-    const payload: any = {
-      max_tokens: 100,
-      model: 'o1-mini',
-      // Add other required properties...
-    }
-
-    const transformedPayload = extension.transformPayload(payload)
-    expect(transformedPayload.max_completion_tokens).toBe(payload.max_tokens)
-    expect(transformedPayload).not.toHaveProperty('max_tokens')
-    expect(transformedPayload).toHaveProperty('max_completion_tokens')
-  })
-
-  it('should not transform the payload for non-preview models', () => {
-    const payload: any = {
-      max_tokens: 100,
-      model: 'non-preview-model',
-      // Add other required properties...
-    }
-
-    const transformedPayload = extension.transformPayload(payload)
-    expect(transformedPayload).toEqual(payload)
-  })
-})
diff --git a/extensions/inference-openai-extension/src/env.d.ts b/extensions/inference-openai-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-openai-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
deleted file mode 100644
index 0996c6bef7..0000000000
--- a/extensions/inference-openai-extension/src/index.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-openai-extension/src/index
- */
-
-import { ModelRuntimeParams, PayloadType, RemoteOAIEngine } from '@janhq/core'
-
-export enum Settings {
-  apiKey = 'openai-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-type OpenAIPayloadType = PayloadType &
-  ModelRuntimeParams & { max_completion_tokens: number }
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'openai'
-  previewModels = ['o1-mini', 'o1-preview']
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-
-  /**
-   * Tranform the payload before sending it to the inference endpoint.
-   * The new preview models such as o1-mini and o1-preview replaced max_tokens by max_completion_tokens parameter.
-   * Others do not.
-   * @param payload
-   * @returns
-   */
-  transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
-    // Remove empty stop words
-    if (payload.stop?.length === 0) {
-      const { stop, ...params } = payload
-      payload = params
-    }
-    // Transform the payload for preview models
-    if (this.previewModels.includes(payload.model)) {
-      const { max_tokens, stop, ...params } = payload
-      return {
-        ...params,
-        max_completion_tokens: max_tokens,
-      }
-    }
-    // Pass through for non-preview models
-    return payload
-  }
-}
diff --git a/extensions/inference-openai-extension/tsconfig.json b/extensions/inference-openai-extension/tsconfig.json
deleted file mode 100644
index 6db951c9e4..0000000000
--- a/extensions/inference-openai-extension/tsconfig.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"],
-  "exclude": ["**/*.test.ts"]
-}
diff --git a/extensions/inference-openrouter-extension/README.md b/extensions/inference-openrouter-extension/README.md
deleted file mode 100644
index aab10755d4..0000000000
--- a/extensions/inference-openrouter-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Open Router Engine Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-openrouter-extension/package.json b/extensions/inference-openrouter-extension/package.json
deleted file mode 100644
index fd53ad0f55..0000000000
--- a/extensions/inference-openrouter-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-openrouter-extension",
-  "productName": "OpenRouter Inference Engine",
-  "version": "1.0.0",
-  "description": "This extension enables Open Router chat completion API calls",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "engine": "openrouter",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-openrouter-extension/resources/settings.json b/extensions/inference-openrouter-extension/resources/settings.json
deleted file mode 100644
index 189aee0a0b..0000000000
--- a/extensions/inference-openrouter-extension/resources/settings.json
+++ /dev/null
@@ -1,34 +0,0 @@
-[
-  {
-    "key": "openrouter-api-key",
-    "title": "API Key",
-    "description": "The OpenRouter API uses API keys for authentication. Visit your [API Keys](https://openrouter.ai/keys) page to retrieve the API key you'll use in your requests.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions. See the [OpenRouter API documentation](https://openrouter.ai/docs/requests) for more information.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "https://openrouter.ai/api/v1/chat/completions",
-      "value": "https://openrouter.ai/api/v1/chat/completions"
-    }
-  },
-  {
-    "key": "openrouter-model",
-    "title": "Model",
-    "description": "If the model parameter is omitted, the user or payer's default is used. Otherwise, remember to select a value for model from the [supported models](https://openrouter.ai/docs/models) or API, and include the organization prefix.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Leave empty for default model",
-      "value": ""
-    }
-  }
-]
diff --git a/extensions/inference-openrouter-extension/rolldown.config.mjs b/extensions/inference-openrouter-extension/rolldown.config.mjs
deleted file mode 100644
index 9ebaace2e1..0000000000
--- a/extensions/inference-openrouter-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,18 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    MODELS: JSON.stringify(modelsJson),
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-openrouter-extension/src/env.d.ts b/extensions/inference-openrouter-extension/src/env.d.ts
deleted file mode 100644
index 40ca58094a..0000000000
--- a/extensions/inference-openrouter-extension/src/env.d.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
-declare const MODELS: Model[]
diff --git a/extensions/inference-openrouter-extension/src/index.ts b/extensions/inference-openrouter-extension/src/index.ts
deleted file mode 100644
index a34c4c38bc..0000000000
--- a/extensions/inference-openrouter-extension/src/index.ts
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-openai-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-import { PayloadType } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'openrouter-api-key',
-  model = 'openrouter-model',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'openrouter'
-  model?: string | undefined
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-    this.registerModels(MODELS)
-
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-    this.model = await this.getSetting<string>(Settings.model, '')
-    // Openrouter uses default model on no model param set
-    if (!this.model?.length) this.model = undefined
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  override async headers(): Promise<HeadersInit> {
-    return {
-      'Content-Type': 'application/json',
-      'HTTP-Referer': 'https://jan.ai',
-      'Authorization': `Bearer ${this.apiKey}`,
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    } else if (key === Settings.model) {
-      this.model =
-        typeof value === 'string' && value.length > 0 ? value : undefined
-    }
-  }
-
-  transformPayload = (payload: PayloadType) => ({
-    ...payload,
-    model: payload.model !== 'open-router-auto' ? payload.model : this.model,
-  })
-}
diff --git a/extensions/inference-openrouter-extension/tsconfig.json b/extensions/inference-openrouter-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-openrouter-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/inference-triton-trtllm-extension/README.md b/extensions/inference-triton-trtllm-extension/README.md
deleted file mode 100644
index f9690da09d..0000000000
--- a/extensions/inference-triton-trtllm-extension/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/inference-triton-trtllm-extension/package.json b/extensions/inference-triton-trtllm-extension/package.json
deleted file mode 100644
index 9c1f5e05f4..0000000000
--- a/extensions/inference-triton-trtllm-extension/package.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "@janhq/inference-triton-trt-llm-extension",
-  "productName": "Triton-TRT-LLM Inference Engine",
-  "version": "1.0.0",
-  "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
-  "main": "dist/index.js",
-  "engine": "triton_trtllm",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.7.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "fetch-retry": "^5.0.6",
-    "rxjs": "^7.8.1",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "fetch-retry"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/inference-triton-trtllm-extension/resources/settings.json b/extensions/inference-triton-trtllm-extension/resources/settings.json
deleted file mode 100644
index 26b80a6863..0000000000
--- a/extensions/inference-triton-trtllm-extension/resources/settings.json
+++ /dev/null
@@ -1,24 +0,0 @@
-[
-  {
-    "key": "tritonllm-api-key",
-    "title": "API Key",
-    "description": "The Triton LLM API uses API keys for authentication.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "Insert API Key",
-      "value": "",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  },
-  {
-    "key": "chat-completions-endpoint",
-    "title": "Chat Completions Endpoint",
-    "description": "The endpoint to use for chat completions.",
-    "controllerType": "input",
-    "controllerProps": {
-      "placeholder": "http://localhost:8000/v2/models/tensorrt_llm_bls/generate",
-      "value": "http://localhost:8000/v2/models/tensorrt_llm_bls/generate"
-    }
-  }
-]
diff --git a/extensions/inference-triton-trtllm-extension/rolldown.config.mjs b/extensions/inference-triton-trtllm-extension/rolldown.config.mjs
deleted file mode 100644
index e0659a4853..0000000000
--- a/extensions/inference-triton-trtllm-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,16 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    SETTINGS: JSON.stringify(settingJson),
-    ENGINE: JSON.stringify(pkgJson.engine),
-  },
-})
diff --git a/extensions/inference-triton-trtllm-extension/src/env.d.ts b/extensions/inference-triton-trtllm-extension/src/env.d.ts
deleted file mode 100644
index 4ff21449c1..0000000000
--- a/extensions/inference-triton-trtllm-extension/src/env.d.ts
+++ /dev/null
@@ -1 +0,0 @@
-declare const SETTINGS: SettingComponentProps[]
diff --git a/extensions/inference-triton-trtllm-extension/src/index.ts b/extensions/inference-triton-trtllm-extension/src/index.ts
deleted file mode 100644
index 2f351defee..0000000000
--- a/extensions/inference-triton-trtllm-extension/src/index.ts
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-nvidia-triton-trt-llm-extension/src/index
- */
-
-import { RemoteOAIEngine } from '@janhq/core'
-
-enum Settings {
-  apiKey = 'tritonllm-api-key',
-  chatCompletionsEndPoint = 'chat-completions-endpoint',
-}
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceTritonTrtLLMExtension extends RemoteOAIEngine {
-  inferenceUrl: string = ''
-  provider: string = 'triton_trtllm'
-
-  /**
-   * Subscribes to events emitted by the @janhq/core package.
-   */
-  async onLoad() {
-    super.onLoad()
-
-    // Register Settings
-    this.registerSettings(SETTINGS)
-
-    // Retrieve API Key Setting
-    this.apiKey = await this.getSetting<string>(Settings.apiKey, '')
-    this.inferenceUrl = await this.getSetting<string>(
-      Settings.chatCompletionsEndPoint,
-      ''
-    )
-
-    if (this.inferenceUrl.length === 0) {
-      SETTINGS.forEach((setting) => {
-        if (setting.key === Settings.chatCompletionsEndPoint) {
-          this.inferenceUrl = setting.controllerProps.value as string
-        }
-      })
-    }
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.apiKey) {
-      this.apiKey = value as string
-    } else if (key === Settings.chatCompletionsEndPoint) {
-      if (typeof value !== 'string') return
-
-      if (value.trim().length === 0) {
-        SETTINGS.forEach((setting) => {
-          if (setting.key === Settings.chatCompletionsEndPoint) {
-            this.inferenceUrl = setting.controllerProps.value as string
-          }
-        })
-      } else {
-        this.inferenceUrl = value
-      }
-    }
-  }
-}
diff --git a/extensions/inference-triton-trtllm-extension/tsconfig.json b/extensions/inference-triton-trtllm-extension/tsconfig.json
deleted file mode 100644
index 2477d58ce5..0000000000
--- a/extensions/inference-triton-trtllm-extension/tsconfig.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"]
-}
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index ca5763962d..7618e8170a 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -183,6 +183,7 @@ export class CortexAPI implements ICortexAPI {
     model.parameters = {
       ...extractInferenceParams(model),
       ...model.parameters,
+      ...model.inference_params,
     }
     model.settings = {
       ...extractModelLoadParams(model),
diff --git a/extensions/yarn.lock b/extensions/yarn.lock
index d139b917c7..24308d03b3 100644
--- a/extensions/yarn.lock
+++ b/extensions/yarn.lock
@@ -509,161 +509,71 @@ __metadata:
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fassistant-extension%40workspace%3Aassistant-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Fassistant-extension%40workspace%3Aassistant-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Fassistant-extension%40workspace%3Aassistant-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fconversational-extension%40workspace%3Aconversational-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Fconversational-extension%40workspace%3Aconversational-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Fconversational-extension%40workspace%3Aconversational-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fengine-management-extension%40workspace%3Aengine-management-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Fengine-management-extension%40workspace%3Aengine-management-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Fengine-management-extension%40workspace%3Aengine-management-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-anthropic-extension%40workspace%3Ainference-anthropic-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-anthropic-extension%40workspace%3Ainference-anthropic-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-cohere-extension%40workspace%3Ainference-cohere-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-cohere-extension%40workspace%3Ainference-cohere-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-cortex-extension%40workspace%3Ainference-cortex-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-cortex-extension%40workspace%3Ainference-cortex-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-groq-extension%40workspace%3Ainference-groq-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-groq-extension%40workspace%3Ainference-groq-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-martian-extension%40workspace%3Ainference-martian-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-martian-extension%40workspace%3Ainference-martian-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-mistral-extension%40workspace%3Ainference-mistral-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-mistral-extension%40workspace%3Ainference-mistral-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Finference-cortex-extension%40workspace%3Ainference-cortex-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-nvidia-extension%40workspace%3Ainference-nvidia-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-nvidia-extension%40workspace%3Ainference-nvidia-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-openai-extension%40workspace%3Ainference-openai-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-openai-extension%40workspace%3Ainference-openai-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-openrouter-extension%40workspace%3Ainference-openrouter-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-openrouter-extension%40workspace%3Ainference-openrouter-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
-  languageName: node
-  linkType: hard
-
-"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Finference-triton-trt-llm-extension%40workspace%3Ainference-triton-trtllm-extension":
-  version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Finference-triton-trt-llm-extension%40workspace%3Ainference-triton-trtllm-extension"
-  dependencies:
-    rxjs: "npm:^7.8.1"
-    ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fmodel-extension%40workspace%3Amodel-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Fmodel-extension%40workspace%3Amodel-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Fmodel-extension%40workspace%3Amodel-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fmonitoring-extension%40workspace%3Amonitoring-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Fmonitoring-extension%40workspace%3Amonitoring-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Fmonitoring-extension%40workspace%3Amonitoring-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
 "@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Ftensorrt-llm-extension%40workspace%3Atensorrt-llm-extension":
   version: 0.1.10
-  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=91cd98&locator=%40janhq%2Ftensorrt-llm-extension%40workspace%3Atensorrt-llm-extension"
+  resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=0f7f72&locator=%40janhq%2Ftensorrt-llm-extension%40workspace%3Atensorrt-llm-extension"
   dependencies:
     rxjs: "npm:^7.8.1"
     ulidx: "npm:^2.3.0"
-  checksum: 10c0/af79c509b1ff8a2893f5fd6545cfa8b3bb6a2e2bc13acdd5963766a1caac635b8b69ab627bfb356e052f16542f2b7187b607bdaed6acec24cd7c9a6087e4abc2
+  checksum: 10c0/68338f54e6cceb00b1f35e4a34209cde33205f01ebc28a0cc945e106661a27041d8f5a991479013bf43c445bc4272564955cc3869e0136b5c7d58e9d6c65e8be
   languageName: node
   linkType: hard
 
@@ -683,36 +593,6 @@ __metadata:
   languageName: unknown
   linkType: soft
 
-"@janhq/inference-anthropic-extension@workspace:inference-anthropic-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-anthropic-extension@workspace:inference-anthropic-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-cohere-extension@workspace:inference-cohere-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-cohere-extension@workspace:inference-cohere-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
 "@janhq/inference-cortex-extension@workspace:inference-cortex-extension":
   version: 0.0.0-use.local
   resolution: "@janhq/inference-cortex-extension@workspace:inference-cortex-extension"
@@ -743,112 +623,6 @@ __metadata:
   languageName: unknown
   linkType: soft
 
-"@janhq/inference-groq-extension@workspace:inference-groq-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-groq-extension@workspace:inference-groq-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-martian-extension@workspace:inference-martian-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-martian-extension@workspace:inference-martian-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-mistral-extension@workspace:inference-mistral-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-mistral-extension@workspace:inference-mistral-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-nvidia-extension@workspace:inference-nvidia-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-nvidia-extension@workspace:inference-nvidia-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-openai-extension@workspace:inference-openai-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-openai-extension@workspace:inference-openai-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-openrouter-extension@workspace:inference-openrouter-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-openrouter-extension@workspace:inference-openrouter-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
-"@janhq/inference-triton-trt-llm-extension@workspace:inference-triton-trtllm-extension":
-  version: 0.0.0-use.local
-  resolution: "@janhq/inference-triton-trt-llm-extension@workspace:inference-triton-trtllm-extension"
-  dependencies:
-    "@janhq/core": ../../core/package.tgz
-    cpx: "npm:^1.5.0"
-    fetch-retry: "npm:^5.0.6"
-    rimraf: "npm:^3.0.2"
-    rolldown: "npm:1.0.0-beta.1"
-    rxjs: "npm:^7.8.1"
-    ts-loader: "npm:^9.5.0"
-    typescript: "npm:^5.7.2"
-    ulidx: "npm:^2.3.0"
-  languageName: unknown
-  linkType: soft
-
 "@janhq/model-extension@workspace:model-extension":
   version: 0.0.0-use.local
   resolution: "@janhq/model-extension@workspace:model-extension"
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index e0705e6b6d..4c88da5ccd 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -1,6 +1,7 @@
 import {
   EngineManager,
   ErrorCode,
+  InferenceEngine,
   MessageStatus,
   ThreadMessage,
 } from '@janhq/core'
@@ -14,8 +15,6 @@ import ModalTroubleShooting, {
 
 import { MainViewState } from '@/constants/screens'
 
-import { isLocalEngine } from '@/utils/modelEngine'
-
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 
 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
@@ -82,7 +81,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
           >
             {message.content[0]?.text?.value === 'Failed to fetch' &&
             engine &&
-            !isLocalEngine(String(engine?.name)) ? (
+            engine?.name !== InferenceEngine.cortex_llamacpp ? (
               <span>
                 No internet connection. <br /> Switch to an on-device model or
                 check connection.
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index b5abf89d7f..7bbb1b2646 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -2,7 +2,7 @@ import { useState, useMemo, useEffect, useCallback, useRef } from 'react'
 
 import Image from 'next/image'
 
-import { InferenceEngine, Model } from '@janhq/core'
+import { EngineConfig, InferenceEngine } from '@janhq/core'
 import {
   Badge,
   Button,
@@ -31,6 +31,8 @@ import SetupRemoteModel from '@/containers/SetupRemoteModel'
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import useDownloadModel from '@/hooks/useDownloadModel'
 import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
+import { useGetEngines } from '@/hooks/useEngineManagement'
+
 import useRecommendedModel from '@/hooks/useRecommendedModel'
 
 import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
@@ -38,23 +40,16 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
 import { formatDownloadPercentage, toGibibytes } from '@/utils/converter'
 
 import { manualRecommendationModel } from '@/utils/model'
-import {
-  getLogoEngine,
-  getTitleByEngine,
-  isLocalEngine,
-  priorityEngine,
-} from '@/utils/modelEngine'
-
-import { extensionManager } from '@/extension'
+import { getLogoEngine } from '@/utils/modelEngine'
 
 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
-import { inActiveEngineProviderAtom } from '@/helpers/atoms/Extension.atom'
 import {
   configuredModelsAtom,
   getDownloadingModelAtom,
   selectedModelAtom,
   showEngineListModelAtom,
 } from '@/helpers/atoms/Model.atom'
+
 import {
   activeThreadAtom,
   setThreadModelParamsAtom,
@@ -90,6 +85,7 @@ const ModelDropdown = ({
   const [dropdownOptions, setDropdownOptions] = useState<HTMLDivElement | null>(
     null
   )
+  const { engines } = useGetEngines()
 
   const downloadStates = useAtomValue(modelDownloadStateAtom)
   const setThreadModelParams = useSetAtom(setThreadModelParamsAtom)
@@ -105,6 +101,16 @@ const ModelDropdown = ({
   )
   const { updateThreadMetadata } = useCreateNewThread()
 
+  const engineList = useMemo(
+    () =>
+      Object.entries(engines ?? {}).flatMap((e) => ({
+        name: e[0],
+        type: e[1][0]?.type === 'remote' ? 'remote' : 'local',
+        engine: e[1][0],
+      })),
+    [engines]
+  )
+
   useClickOutside(() => handleChangeStateOpen(false), null, [
     dropdownOptions,
     toggle,
@@ -122,13 +128,6 @@ const ModelDropdown = ({
     [setModelDropdownState]
   )
 
-  const isModelSupportRagAndTools = useCallback((model: Model) => {
-    return (
-      model?.engine === InferenceEngine.openai ||
-      isLocalEngine(model?.engine as InferenceEngine)
-    )
-  }, [])
-
   const filteredDownloadedModels = useMemo(
     () =>
       configuredModels
@@ -142,11 +141,12 @@ const ModelDropdown = ({
         )
         .filter((e) => {
           if (searchFilter === 'local') {
-            return isLocalEngine(e.engine)
-          }
-          if (searchFilter === 'remote') {
-            return !isLocalEngine(e.engine)
+            return (
+              engineList.find((t) => t.engine.engine === e.engine)?.type ===
+              'local'
+            )
           }
+          return true
         })
         .sort((a, b) => a.name.localeCompare(b.name))
         .sort((a, b) => {
@@ -164,7 +164,7 @@ const ModelDropdown = ({
             return 0
           }
         }),
-    [configuredModels, searchText, searchFilter, downloadedModels]
+    [configuredModels, searchText, searchFilter, downloadedModels, engineList]
   )
 
   useEffect(() => {
@@ -179,6 +179,15 @@ const ModelDropdown = ({
     }
   }, [open])
 
+  useEffect(() => {
+    setShowEngineListModel((prev) => [
+      ...prev,
+      ...engineList
+        .filter((x) => (x.engine.api_key?.length ?? 0) > 0)
+        .map((e) => e.name),
+    ])
+  }, [setShowEngineListModel, engineList])
+
   useEffect(() => {
     if (!activeThread) return
     const modelId = activeAssistant?.model?.id
@@ -193,6 +202,14 @@ const ModelDropdown = ({
     activeAssistant?.model?.id,
   ])
 
+  const isLocalEngine = useCallback(
+    (engine?: string) => {
+      if (!engine) return false
+      return engineList.some((t) => t.name === engine && t.type === 'local')
+    },
+    [engineList]
+  )
+
   const onClickModelItem = useCallback(
     async (modelId: string) => {
       if (!activeAssistant) return
@@ -210,7 +227,7 @@ const ModelDropdown = ({
               tools: [
                 {
                   type: 'retrieval',
-                  enabled: isModelSupportRagAndTools(model as Model),
+                  enabled: model?.engine === InferenceEngine.cortex,
                   settings: {
                     ...(activeAssistant.tools &&
                       activeAssistant.tools[0]?.settings),
@@ -225,13 +242,15 @@ const ModelDropdown = ({
           8192,
           model?.settings.ctx_len ?? 8192
         )
+
         const overriddenParameters = {
-          ctx_len: !isLocalEngine(model?.engine)
-            ? undefined
-            : defaultContextLength,
-          max_tokens: !isLocalEngine(model?.engine)
-            ? (model?.parameters.max_tokens ?? 8192)
-            : defaultContextLength,
+          ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined,
+          max_tokens: defaultContextLength
+            ? Math.min(
+                model?.parameters.token_limit ?? 8192,
+                defaultContextLength
+              )
+            : model?.parameters.token_limit,
         }
 
         const modelParams = {
@@ -258,95 +277,17 @@ const ModelDropdown = ({
       setSelectedModel,
       activeThread,
       updateThreadMetadata,
-      isModelSupportRagAndTools,
       setThreadModelParams,
       updateModelParameter,
     ]
   )
 
-  const [extensionHasSettings, setExtensionHasSettings] = useState<
-    { name?: string; setting: string; apiKey: string; provider: string }[]
-  >([])
-
-  const inActiveEngineProvider = useAtomValue(inActiveEngineProviderAtom)
-
-  useEffect(() => {
-    const getAllSettings = async () => {
-      const extensionsMenu: {
-        name?: string
-        setting: string
-        apiKey: string
-        provider: string
-      }[] = []
-      const extensions = extensionManager.getAll()
-
-      for (const extension of extensions) {
-        if (typeof extension.getSettings === 'function') {
-          const settings = await extension.getSettings()
-          if (
-            (settings && settings.length > 0) ||
-            (await extension.installationState()) !== 'NotRequired'
-          ) {
-            extensionsMenu.push({
-              name: extension.productName,
-              setting: extension.name,
-              apiKey:
-                'apiKey' in extension && typeof extension.apiKey === 'string'
-                  ? extension.apiKey
-                  : '',
-              provider:
-                'provider' in extension &&
-                typeof extension.provider === 'string'
-                  ? extension.provider
-                  : '',
-            })
-          }
-        }
-      }
-      setExtensionHasSettings(extensionsMenu)
-    }
-    getAllSettings()
-  }, [])
-
-  const findByEngine = filteredDownloadedModels
-    .map((x) => {
-      // Legacy engine support - they will be grouped under Cortex LlamaCPP
-      if (x.engine === InferenceEngine.nitro)
-        return InferenceEngine.cortex_llamacpp
-      return x.engine
-    })
-    .filter((x) => !inActiveEngineProvider.includes(x))
-
-  const groupByEngine = findByEngine
-    .filter(function (item, index) {
-      if (findByEngine.indexOf(item) === index) return item
-    })
-    .sort((a, b) => {
-      if (priorityEngine.includes(a) && priorityEngine.includes(b)) {
-        return priorityEngine.indexOf(a) - priorityEngine.indexOf(b)
-      } else if (priorityEngine.includes(a)) {
-        return -1
-      } else if (priorityEngine.includes(b)) {
-        return 1
-      } else {
-        return 0 // Leave the rest in their original order
-      }
-    })
-
-  const getEngineStatusReady: InferenceEngine[] = extensionHasSettings
-    ?.filter((e) => e.apiKey.length > 0)
-    .map((x) => x.provider as InferenceEngine)
-
-  useEffect(() => {
-    setShowEngineListModel((prev) => [
-      ...prev,
-      ...(getEngineStatusReady as InferenceEngine[]),
-    ])
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [setShowEngineListModel, extensionHasSettings])
-
-  const isDownloadALocalModel = downloadedModels.some((x) =>
-    isLocalEngine(x.engine)
+  const isDownloadALocalModel = useMemo(
+    () =>
+      downloadedModels.some((x) =>
+        engineList.some((t) => t.name === x.engine && t.type === 'local')
+      ),
+    [downloadedModels, engineList]
   )
 
   if (strictedThread && !activeThread) {
@@ -434,85 +375,187 @@ const ModelDropdown = ({
             />
           </div>
           <ScrollArea className="h-[calc(100%-90px)] w-full">
-            {groupByEngine.map((engine, i) => {
-              const apiKey = !isLocalEngine(engine)
-                ? extensionHasSettings.filter((x) => x.provider === engine)[0]
-                    ?.apiKey.length > 1
-                : true
-              const engineLogo = getLogoEngine(engine as InferenceEngine)
-              const showModel = showEngineListModel.includes(engine)
-              const onClickChevron = () => {
-                if (showModel) {
-                  setShowEngineListModel((prev) =>
-                    prev.filter((item) => item !== engine)
-                  )
-                } else {
-                  setShowEngineListModel((prev) => [...prev, engine])
+            {engineList
+              .filter((e) => e.type === searchFilter)
+              .map((engine, i) => {
+                const isConfigured =
+                  engine.type === 'local' ||
+                  ((engine.engine as EngineConfig).api_key?.length ?? 0) > 1
+                const engineLogo = getLogoEngine(engine.name as InferenceEngine)
+                const showModel = showEngineListModel.includes(engine.name)
+                const onClickChevron = () => {
+                  if (showModel) {
+                    setShowEngineListModel((prev) =>
+                      prev.filter((item) => item !== engine.name)
+                    )
+                  } else {
+                    setShowEngineListModel((prev) => [...prev, engine.name])
+                  }
                 }
-              }
-              return (
-                <div
-                  className="relative w-full border-t border-[hsla(var(--app-border))] first:border-t-0"
-                  key={i}
-                >
-                  <div className="mt-2">
-                    <div className="flex items-center justify-between px-4">
-                      <div
-                        className="flex w-full cursor-pointer items-center gap-2 py-1"
-                        onClick={onClickChevron}
-                      >
-                        {engineLogo && (
-                          <Image
-                            className="h-6 w-6 flex-shrink-0"
-                            width={48}
-                            height={48}
-                            src={engineLogo}
-                            alt="logo"
-                          />
-                        )}
-                        <h6 className="font-medium text-[hsla(var(--text-secondary))]">
-                          {getTitleByEngine(engine)}
-                        </h6>
-                      </div>
-                      <div className="-mr-2 flex gap-1">
-                        {!isLocalEngine(engine) && (
-                          <SetupRemoteModel engine={engine} />
-                        )}
-                        {!showModel ? (
-                          <Button theme="icon" onClick={onClickChevron}>
-                            <ChevronDownIcon
-                              size={14}
-                              className="text-[hsla(var(--text-secondary))]"
+                return (
+                  <div
+                    className="relative w-full border-t border-[hsla(var(--app-border))] first:border-t-0"
+                    key={i}
+                  >
+                    <div className="mt-2">
+                      <div className="flex items-center justify-between px-4">
+                        <div
+                          className="flex w-full cursor-pointer items-center gap-2 py-1"
+                          onClick={onClickChevron}
+                        >
+                          {engineLogo && (
+                            <Image
+                              className="h-6 w-6 flex-shrink-0"
+                              width={48}
+                              height={48}
+                              src={engineLogo}
+                              alt="logo"
                             />
-                          </Button>
-                        ) : (
-                          <Button theme="icon" onClick={onClickChevron}>
-                            <ChevronUpIcon
-                              size={14}
-                              className="text-[hsla(var(--text-secondary))]"
+                          )}
+                          <h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
+                            {engine.name}
+                          </h6>
+                        </div>
+                        <div className="-mr-2 flex gap-1">
+                          {engine.type === 'remote' && (
+                            <SetupRemoteModel
+                              engine={engine.name as InferenceEngine}
+                              isConfigured={
+                                (engine.engine.api_key?.length ?? 0) > 0
+                              }
                             />
-                          </Button>
-                        )}
+                          )}
+                          {!showModel ? (
+                            <Button theme="icon" onClick={onClickChevron}>
+                              <ChevronDownIcon
+                                size={14}
+                                className="text-[hsla(var(--text-secondary))]"
+                              />
+                            </Button>
+                          ) : (
+                            <Button theme="icon" onClick={onClickChevron}>
+                              <ChevronUpIcon
+                                size={14}
+                                className="text-[hsla(var(--text-secondary))]"
+                              />
+                            </Button>
+                          )}
+                        </div>
                       </div>
-                    </div>
 
-                    {isLocalEngine(engine) &&
-                      !isDownloadALocalModel &&
-                      showModel &&
-                      !searchText.length && (
-                        <ul className="pb-2">
-                          {featuredModel.map((model) => {
+                      {engine.type === 'local' &&
+                        !isDownloadALocalModel &&
+                        showModel &&
+                        !searchText.length && (
+                          <ul className="pb-2">
+                            {featuredModel.map((model) => {
+                              const isDownloading = downloadingModels.some(
+                                (md) => md === model.id
+                              )
+                              return (
+                                <li
+                                  key={model.id}
+                                  className="flex items-center justify-between gap-4 px-3 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]"
+                                >
+                                  <div className="flex items-center gap-2">
+                                    <p
+                                      className="line-clamp-1 text-[hsla(var(--text-secondary))]"
+                                      title={model.name}
+                                    >
+                                      {model.name}
+                                    </p>
+                                    <ModelLabel
+                                      metadata={model.metadata}
+                                      compact
+                                    />
+                                  </div>
+                                  <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
+                                    <span className="font-medium">
+                                      {toGibibytes(model.metadata?.size)}
+                                    </span>
+                                    {!isDownloading ? (
+                                      <DownloadCloudIcon
+                                        size={18}
+                                        className="cursor-pointer text-[hsla(var(--app-link))]"
+                                        onClick={() =>
+                                          downloadModel(
+                                            model.sources[0].url,
+                                            model.id
+                                          )
+                                        }
+                                      />
+                                    ) : (
+                                      Object.values(downloadStates)
+                                        .filter((x) => x.modelId === model.id)
+                                        .map((item) => (
+                                          <ProgressCircle
+                                            key={item.modelId}
+                                            percentage={
+                                              formatDownloadPercentage(
+                                                item?.percent,
+                                                {
+                                                  hidePercentage: true,
+                                                }
+                                              ) as number
+                                            }
+                                            size={100}
+                                          />
+                                        ))
+                                    )}
+                                  </div>
+                                </li>
+                              )
+                            })}
+                          </ul>
+                        )}
+
+                      <ul className="pb-2">
+                        {filteredDownloadedModels
+                          .filter(
+                            (x) =>
+                              x.engine === engine.name ||
+                              (x.engine === InferenceEngine.nitro &&
+                                engine.name === InferenceEngine.cortex_llamacpp)
+                          )
+                          .filter((y) => {
+                            if (isLocalEngine(y.engine) && !searchText.length) {
+                              return downloadedModels.find((c) => c.id === y.id)
+                            } else {
+                              return y
+                            }
+                          })
+                          .map((model) => {
+                            if (!showModel) return null
                             const isDownloading = downloadingModels.some(
                               (md) => md === model.id
                             )
+                            const isDownloaded = downloadedModels.some(
+                              (c) => c.id === model.id
+                            )
                             return (
                               <li
                                 key={model.id}
-                                className="flex items-center justify-between gap-4 px-3 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]"
+                                className={twMerge(
+                                  'flex items-center justify-between gap-4 px-3 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
+                                  !isConfigured
+                                    ? 'cursor-not-allowed text-[hsla(var(--text-tertiary))]'
+                                    : 'text-[hsla(var(--text-primary))]'
+                                )}
+                                onClick={() => {
+                                  if (!isConfigured && engine.type === 'remote')
+                                    return null
+                                  if (isDownloaded) {
+                                    onClickModelItem(model.id)
+                                  }
+                                }}
                               >
-                                <div className="flex items-center gap-2">
+                                <div className="flex gap-x-2">
                                   <p
-                                    className="line-clamp-1 text-[hsla(var(--text-secondary))]"
+                                    className={twMerge(
+                                      'line-clamp-1',
+                                      !isDownloaded &&
+                                        'text-[hsla(var(--text-secondary))]'
+                                    )}
                                     title={model.name}
                                   >
                                     {model.name}
@@ -523,10 +566,12 @@ const ModelDropdown = ({
                                   />
                                 </div>
                                 <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
-                                  <span className="font-medium">
-                                    {toGibibytes(model.metadata?.size)}
-                                  </span>
-                                  {!isDownloading ? (
+                                  {!isDownloaded && (
+                                    <span className="font-medium">
+                                      {toGibibytes(model.metadata?.size)}
+                                    </span>
+                                  )}
+                                  {!isDownloading && !isDownloaded ? (
                                     <DownloadCloudIcon
                                       size={18}
                                       className="cursor-pointer text-[hsla(var(--app-link))]"
@@ -559,106 +604,11 @@ const ModelDropdown = ({
                               </li>
                             )
                           })}
-                        </ul>
-                      )}
-
-                    <ul className="pb-2">
-                      {filteredDownloadedModels
-                        .filter(
-                          (x) =>
-                            x.engine === engine ||
-                            (x.engine === InferenceEngine.nitro &&
-                              engine === InferenceEngine.cortex_llamacpp)
-                        )
-                        .filter((y) => {
-                          if (isLocalEngine(y.engine) && !searchText.length) {
-                            return downloadedModels.find((c) => c.id === y.id)
-                          } else {
-                            return y
-                          }
-                        })
-                        .map((model) => {
-                          if (!showModel) return null
-                          const isDownloading = downloadingModels.some(
-                            (md) => md === model.id
-                          )
-                          const isDownloaded = downloadedModels.some(
-                            (c) => c.id === model.id
-                          )
-                          return (
-                            <li
-                              key={model.id}
-                              className={twMerge(
-                                'flex items-center justify-between gap-4 px-3 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                                !apiKey
-                                  ? 'cursor-not-allowed text-[hsla(var(--text-tertiary))]'
-                                  : 'text-[hsla(var(--text-primary))]'
-                              )}
-                              onClick={() => {
-                                if (!apiKey && !isLocalEngine(model.engine))
-                                  return null
-                                if (isDownloaded) {
-                                  onClickModelItem(model.id)
-                                }
-                              }}
-                            >
-                              <div className="flex gap-x-2">
-                                <p
-                                  className={twMerge(
-                                    'line-clamp-1',
-                                    !isDownloaded &&
-                                      'text-[hsla(var(--text-secondary))]'
-                                  )}
-                                  title={model.name}
-                                >
-                                  {model.name}
-                                </p>
-                                <ModelLabel metadata={model.metadata} compact />
-                              </div>
-                              <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
-                                {!isDownloaded && (
-                                  <span className="font-medium">
-                                    {toGibibytes(model.metadata?.size)}
-                                  </span>
-                                )}
-                                {!isDownloading && !isDownloaded ? (
-                                  <DownloadCloudIcon
-                                    size={18}
-                                    className="cursor-pointer text-[hsla(var(--app-link))]"
-                                    onClick={() =>
-                                      downloadModel(
-                                        model.sources[0].url,
-                                        model.id
-                                      )
-                                    }
-                                  />
-                                ) : (
-                                  Object.values(downloadStates)
-                                    .filter((x) => x.modelId === model.id)
-                                    .map((item) => (
-                                      <ProgressCircle
-                                        key={item.modelId}
-                                        percentage={
-                                          formatDownloadPercentage(
-                                            item?.percent,
-                                            {
-                                              hidePercentage: true,
-                                            }
-                                          ) as number
-                                        }
-                                        size={100}
-                                      />
-                                    ))
-                                )}
-                              </div>
-                            </li>
-                          )
-                        })}
-                    </ul>
+                      </ul>
+                    </div>
                   </div>
-                </div>
-              )
-            })}
+                )
+              })}
           </ScrollArea>
         </div>
       </div>
diff --git a/web/containers/SetupRemoteModel/index.tsx b/web/containers/SetupRemoteModel/index.tsx
index 1f5478d73a..25c0b3bd53 100644
--- a/web/containers/SetupRemoteModel/index.tsx
+++ b/web/containers/SetupRemoteModel/index.tsx
@@ -8,17 +8,16 @@ import { SettingsIcon, PlusIcon } from 'lucide-react'
 
 import { MainViewState } from '@/constants/screens'
 
-import { isLocalEngine } from '@/utils/modelEngine'
-
 import { extensionManager } from '@/extension'
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 
 type Props = {
   engine: InferenceEngine
+  isConfigured: boolean
 }
 
-const SetupRemoteModel = ({ engine }: Props) => {
+const SetupRemoteModel = ({ engine, isConfigured }: Props) => {
   const setSelectedSetting = useSetAtom(selectedSettingAtom)
   const setMainViewState = useSetAtom(mainViewStateAtom)
 
@@ -66,19 +65,14 @@ const SetupRemoteModel = ({ engine }: Props) => {
   }, [])
 
   const onSetupItemClick = (setting: InferenceEngine) => {
-    setMainViewState(MainViewState.Settings)
     setSelectedSetting(
       extensionHasSettings.filter((x) =>
         x.provider.toLowerCase().includes(setting)
       )[0]?.setting
     )
+    setMainViewState(MainViewState.Settings)
   }
 
-  const apiKey = !isLocalEngine(engine)
-    ? extensionHasSettings.filter((x) => x.provider === engine)[0]?.apiKey
-        .length > 1
-    : true
-
   return (
     <Button
       theme="icon"
@@ -87,7 +81,7 @@ const SetupRemoteModel = ({ engine }: Props) => {
         onSetupItemClick(engine)
       }}
     >
-      {apiKey ? (
+      {isConfigured ? (
         <SettingsIcon
           size={14}
           className="text-[hsla(var(--text-secondary))]"
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index ed704dd612..25cc7fcc89 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useRef } from 'react'
 
-import { EngineManager, Model } from '@janhq/core'
+import { EngineManager, InferenceEngine, Model } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
@@ -95,7 +95,7 @@ export function useActiveModel() {
     }
 
     localStorage.setItem(LAST_USED_MODEL_ID, model.id)
-    const engine = EngineManager.instance().get(model.engine)
+    const engine = EngineManager.instance().get(InferenceEngine.cortex)
     return engine
       ?.loadModel(model)
       .then(() => {
@@ -138,7 +138,7 @@ export function useActiveModel() {
       if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading))
         return
 
-      const engine = EngineManager.instance().get(stoppingModel.engine)
+      const engine = EngineManager.instance().get(InferenceEngine.cortex)
       return engine
         ?.unloadModel(stoppingModel)
         .catch((e) => console.error(e))
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index 2f88c5afc1..0a0d1f1416 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -17,8 +17,6 @@ import { fileUploadAtom } from '@/containers/Providers/Jotai'
 
 import { toaster } from '@/containers/Toast'
 
-import { isLocalEngine } from '@/utils/modelEngine'
-
 import useRecommendedModel from './useRecommendedModel'
 import useSetActiveThread from './useSetActiveThread'
 
@@ -32,7 +30,6 @@ import {
   threadsAtom,
   updateThreadAtom,
   setThreadModelParamsAtom,
-  isGeneratingResponseAtom,
   createNewThreadAtom,
 } from '@/helpers/atoms/Thread.atom'
 
@@ -89,16 +86,19 @@ export const useCreateNewThread = () => {
     )
 
     const overriddenSettings = {
-      ctx_len: !isLocalEngine(defaultModel?.engine)
-        ? undefined
-        : defaultContextLength,
+      ctx_len: defaultModel?.settings.ctx_len
+        ? Math.min(8192, defaultModel.settings.ctx_len)
+        : undefined,
     }
 
     // Use ctx length by default
     const overriddenParameters = {
-      max_tokens: !isLocalEngine(defaultModel?.engine)
-        ? (defaultModel?.parameters.token_limit ?? 8192)
-        : defaultContextLength,
+      max_tokens: defaultContextLength
+        ? Math.min(
+            defaultModel?.parameters.token_limit ?? 8192,
+            defaultContextLength
+          )
+        : defaultModel?.parameters.token_limit,
     }
 
     const createdAt = Date.now()
@@ -132,7 +132,6 @@ export const useCreateNewThread = () => {
     }
 
     // add the new thread on top of the thread list to the state
-    //TODO: Why do we have thread list then thread states? Should combine them
     try {
       const createdThread = await persistNewThread(thread, assistantInfo)
       if (!createdThread) throw 'Thread created failed.'
diff --git a/web/hooks/useEngineManagement.ts b/web/hooks/useEngineManagement.ts
index be690aceca..efe32701fc 100644
--- a/web/hooks/useEngineManagement.ts
+++ b/web/hooks/useEngineManagement.ts
@@ -335,6 +335,27 @@ export const installEngine = async (
   }
 }
 
+/**
+ * Add a new remote engine
+ * @returns A Promise that resolves to intall of engine.
+ */
+export const addRemoteEngine = async (engineConfig: EngineConfig) => {
+  const extension = getExtension()
+
+  if (!extension) {
+    throw new Error('Extension is not available')
+  }
+
+  try {
+    // Call the extension's method
+    const response = await extension.addRemoteEngine(engineConfig)
+    return response
+  } catch (error) {
+    console.error('Failed to install engine variant:', error)
+    throw error
+  }
+}
+
 /**
  * @param name - Inference engine name.
  * @returns A Promise that resolves to unintall of engine.
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 66b031849d..87a8414a28 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -11,6 +11,7 @@ import {
   EngineManager,
   ToolManager,
   ThreadAssistantInfo,
+  InferenceEngine,
 } from '@janhq/core'
 import { extractInferenceParams, extractModelLoadParams } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
@@ -232,9 +233,7 @@ export default function useSendChatMessage() {
     )
 
     // Request for inference
-    EngineManager.instance()
-      .get(requestBuilder.model?.engine ?? modelRequest.engine ?? '')
-      ?.inference(request)
+    EngineManager.instance().get(InferenceEngine.cortex)?.inference(request)
 
     // Reset states
     setReloadModel(false)
diff --git a/web/screens/Settings/Engines/ModalAddRemoteEngine.tsx b/web/screens/Settings/Engines/ModalAddRemoteEngine.tsx
index 79fc4dd727..b5bb02c01a 100644
--- a/web/screens/Settings/Engines/ModalAddRemoteEngine.tsx
+++ b/web/screens/Settings/Engines/ModalAddRemoteEngine.tsx
@@ -7,7 +7,7 @@ import { Button, Input, Modal, TextArea } from '@janhq/joi'
 import { PlusIcon } from 'lucide-react'
 import { z } from 'zod'
 
-import { installEngine, useGetEngines } from '@/hooks/useEngineManagement'
+import { addRemoteEngine, useGetEngines } from '@/hooks/useEngineManagement'
 
 const engineSchema = z.object({
   engineName: z.string().min(1, 'Engine name is required'),
@@ -40,9 +40,10 @@ const ModalAddRemoteEngine = () => {
   })
 
   const onSubmit = async (data: z.infer<typeof engineSchema>) => {
-    await installEngine(data.engineName, {
+    await addRemoteEngine({
       type: 'remote',
       url: data.apiUrl,
+      engine: data.engineName,
       api_key: data.apiKey,
       metadata: {
         api_key_template: data.apiKeyTemplate,
diff --git a/web/screens/Settings/Engines/RemoteEngineSettings.tsx b/web/screens/Settings/Engines/RemoteEngineSettings.tsx
index 1ee994f74e..10d3ed2273 100644
--- a/web/screens/Settings/Engines/RemoteEngineSettings.tsx
+++ b/web/screens/Settings/Engines/RemoteEngineSettings.tsx
@@ -1,7 +1,7 @@
 /* eslint-disable  @typescript-eslint/no-explicit-any */
 /* eslint-disable  react/no-unescaped-entities */
 
-import React, { useCallback, useRef } from 'react'
+import React, { useCallback, useMemo, useRef } from 'react'
 
 import {
   EngineConfig as OriginalEngineConfig,
@@ -14,29 +14,28 @@ interface EngineConfig extends OriginalEngineConfig {
 
 import { ScrollArea, Input, TextArea } from '@janhq/joi'
 
+import { useAtomValue } from 'jotai'
+
 import { ChevronRight } from 'lucide-react'
 import { twMerge } from 'tailwind-merge'
 
 import {
   updateEngine,
   useGetEngines,
-  useGetRemoteModels,
+  // useGetRemoteModels,
 } from '@/hooks/useEngineManagement'
 
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+
 const RemoteEngineSettings = ({
   engine: name,
 }: {
   engine: InferenceEngine
 }) => {
   const { engines, mutate } = useGetEngines()
-  // TODO: Check this endpoint
-  const { remoteModels } = useGetRemoteModels(name)
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
 
-  const fakeData = [
-    { name: 'Claude 3 Opus Latest' },
-    { name: 'Claude 3 Opus Latest' },
-    { name: 'Claude 3 Opus Latest' },
-  ]
+  const remoteModels = downloadedModels.filter((e) => e.engine === name)
 
   const engine =
     engines &&
@@ -110,14 +109,14 @@ const RemoteEngineSettings = ({
               </div>
 
               <div>
-                {fakeData &&
-                  fakeData?.map((item, i) => {
+                {remoteModels &&
+                  remoteModels?.map((item, i) => {
                     return (
                       <div
                         key={i}
                         className={twMerge(
                           'border border-b-0 border-[hsla(var(--app-border))] bg-[hsla(var(--tertiary-bg))] p-4 first:rounded-t-lg last:rounded-b-lg last:border-b',
-                          fakeData?.length === 1 && 'rounded-lg'
+                          remoteModels?.length === 1 && 'rounded-lg'
                         )}
                       >
                         <div className="flex flex-col items-start justify-start gap-4 sm:flex-row sm:items-center sm:justify-between">
diff --git a/web/screens/Settings/MyModels/index.tsx b/web/screens/Settings/MyModels/index.tsx
index 218f8cb62f..1b9c15943e 100644
--- a/web/screens/Settings/MyModels/index.tsx
+++ b/web/screens/Settings/MyModels/index.tsx
@@ -24,18 +24,20 @@ import ModelSearch from '@/containers/ModelSearch'
 import SetupRemoteModel from '@/containers/SetupRemoteModel'
 
 import useDropModelBinaries from '@/hooks/useDropModelBinaries'
+import { useGetEngines } from '@/hooks/useEngineManagement'
+
 import { setImportModelStageAtom } from '@/hooks/useImportModel'
 
 import {
   getLogoEngine,
   getTitleByEngine,
-  isLocalEngine,
   priorityEngine,
 } from '@/utils/modelEngine'
 
 import MyModelList from './MyModelList'
 
 import { extensionManager } from '@/extension'
+
 import {
   downloadedModelsAtom,
   showEngineListModelAtom,
@@ -52,6 +54,23 @@ const MyModels = () => {
   const [extensionHasSettings, setExtensionHasSettings] = useState<
     { name?: string; setting: string; apiKey: string; provider: string }[]
   >([])
+  const { engines } = useGetEngines()
+
+  const isLocalEngine = useCallback(
+    (engine: string) =>
+      Object.values(engines ?? {})
+        .flat()
+        .find((e) => e.name === engine)?.type === 'local' || false,
+    [engines]
+  )
+
+  const isConfigured = useCallback(
+    (engine: string) =>
+      (Object.values(engines ?? {})
+        .flat()
+        .find((e) => e.name === engine)?.api_key?.length ?? 0) > 0,
+    [engines]
+  )
 
   const filteredDownloadedModels = useMemo(
     () =>
@@ -228,7 +247,10 @@ const MyModels = () => {
                       </div>
                       <div className="flex gap-1">
                         {!isLocalEngine(engine) && (
-                          <SetupRemoteModel engine={engine} />
+                          <SetupRemoteModel
+                            engine={engine}
+                            isConfigured={isConfigured(engine)}
+                          />
                         )}
                         {!showModel ? (
                           <Button theme="icon" onClick={onClickChevron}>
diff --git a/web/screens/Settings/SettingDetail/index.tsx b/web/screens/Settings/SettingDetail/index.tsx
index f236b597e3..27a889d99d 100644
--- a/web/screens/Settings/SettingDetail/index.tsx
+++ b/web/screens/Settings/SettingDetail/index.tsx
@@ -42,17 +42,11 @@ const SettingDetail = () => {
       return <MyModels />
 
     default:
-      if (
-        !selectedSetting.includes('@janhq') &&
-        isLocalEngine(selectedSetting as InferenceEngine)
-      ) {
+      if (isLocalEngine(selectedSetting as InferenceEngine)) {
         return (
           <LocalEngineSettings engine={selectedSetting as InferenceEngine} />
         )
-      } else if (
-        !selectedSetting.includes('@janhq') &&
-        !isLocalEngine(selectedSetting as InferenceEngine)
-      ) {
+      } else if (!isLocalEngine(selectedSetting as InferenceEngine)) {
         return (
           <RemoteEngineSettings engine={selectedSetting as InferenceEngine} />
         )
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
index 57d9298e60..90135308db 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
@@ -1,4 +1,4 @@
-import { memo } from 'react'
+import { memo, useMemo } from 'react'
 
 import { Button } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -15,8 +15,10 @@ import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 const EmptyThread = () => {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setMainViewState = useSetAtom(mainViewStateAtom)
-  const showOnboardingStep =
-    downloadedModels.filter((e) => isLocalEngine(e.engine)).length === 0
+  const showOnboardingStep = useMemo(
+    () => !downloadedModels.some((e) => isLocalEngine(e.engine) || e.engine),
+    [downloadedModels]
+  )
 
   return (
     <div className="mx-auto flex h-full flex-col items-center justify-center text-center">
diff --git a/web/utils/messageRequestBuilder.ts b/web/utils/messageRequestBuilder.ts
index c3da9cbd80..b75dbecb09 100644
--- a/web/utils/messageRequestBuilder.ts
+++ b/web/utils/messageRequestBuilder.ts
@@ -167,6 +167,7 @@ export class MessageRequestBuilder {
       messages: this.normalizeMessages(this.messages),
       model: this.model,
       thread: this.thread,
+      engine: this.model.engine,
     }
   }
 }