diff --git a/src/content/docs/en/pages/main-menu/reference/build/edge-application/edge-functions.mdx b/src/content/docs/en/pages/main-menu/reference/build/edge-application/edge-functions.mdx index c7e5a2955a..141853d7b6 100644 --- a/src/content/docs/en/pages/main-menu/reference/build/edge-application/edge-functions.mdx +++ b/src/content/docs/en/pages/main-menu/reference/build/edge-application/edge-functions.mdx @@ -22,6 +22,8 @@ With Edge Functions for **Edge Application**, you can: - Deploy functions easily. - Make use of pair programming with the Chat GPT integration. - Preview the outcome of the function live on the preview deployment. +- Run AI agents directly on Azion's distributed infrastructure with ultra-low latency. +- Build complete AI ecosystems. --- @@ -48,6 +50,21 @@ By using the [Azion Runtime](/en/documentation/runtime/overview/) to develop you --- +## AI Framework Support + +Edge Functions provides support for advanced AI workflows, enabling you to: + +- Run AI agents directly on Azion's distributed infrastructure with ultra-low latency, building enterprise-grade solutions that operate at the edge with reliability and scale. +- Create sophisticated AI workflows using **LangGraph** and **LangChain** integrations, including advanced retrieval capabilities that combine vector and full-text search for complex use cases. +- Leverage native [Edge SQL](/en/documentation/products/store/edge-sql/) integration for RAG implementations and vector storage, along with MCP Servers that enable agent-to-agent collaboration using Google's Agent2Agent (A2A) protocol. +- Access state-of-the-art models like Mistral, Florence, Qwen, and others, that follow OpenAI's API standard, all while reducing complexity with a fully integrated AI infrastructure running at the edge of the network. + +Azion's **Edge AI** allows you to run AI models on Azion Runtime, and can be used together with Edge Functions to create complex AI-powered applications. + + + +--- + ## JavaScript frameworks ### Next.js diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/baai-bge-reranker-v2-m3.mdx new file mode 100644 index 0000000000..b79b971222 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/baai-bge-reranker-v2-m3.mdx @@ -0,0 +1,118 @@ +--- +title: BAAI/bge-reranker-v2-m3 +description: >- + BAAI/bge-reranker-v2-m3 is a lightweight reranker model with strong multilingual capabilities. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing' +namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3 +permalink: /documentation/products/ai/edge-ai/models/baai-bge-reranker-v2-m3/ +--- + +**BAAI/bge-reranker-v2-m3** is a lightweight reranker model with strong multilingual capabilities. It's easy to deploy and offers fast inference. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | BAAI/bge-reranker-v2-m3 | +| **Version** | Original | +| **Model Category** | Reranker | +| **Size** | 568M parameters | +| **HuggingFace Model** | [BAAI/bge-reranker-v2-m3](https://huggingface.co/BAAI/bge-reranker-v2-m3) | +| **License** | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Azion Long-term Support (LTS) | ✅ / ❌| +| Context Length | 8192 | +| Supports LoRA | ❌ | +| Input data | Text | + +## Usage + +### Rerank example + +This is an example of a basic rerank request using this model: + +```bash +curl -X POST \ + http://endpoint-url/rerank \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "BAAI/bge-reranker-v2-m3", + "query": "What is deep learning?", + "documents": [ + "Deep learning is a subset of machine learning that uses neural networks with many layers", + "The weather is nice today", + "Deep learning enables computers to learn from large amounts of data", + "I like pizza" + ] +}' +``` + +### Score example + +This is an example of a basic score request using this model: + +```bash +curl -X POST \ + http://endpoint-url/score \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "BAAI/bge-reranker-v2-m3", + "text_1": "What is deep learning?", + "text_2": [ + "Deep learning is a subset of machine learning that uses neural networks with many layers", + "The weather is nice today", + "Deep learning enables computers to learn from large amounts of data", + "I like pizza" + ] + }' +``` + +### Running with Edge Functions: + +This is an example of how to run this model using Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { + "query": "What is deep learning?", + "documents": [ + "Deep learning is a subset of machine learning that uses neural networks with many layers", + "The weather is nice today", + "Deep learning enables computers to learn from large amounts of data", + "I like pizza" + ] +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "query", + "documents" + ], + "properties": { + "query": { + "type": "string" + }, + "documents": { + "type": "array", + "items": { + "type": "string" + } + }, + "top_n": { + "type": "integer" + }, + "max_tokens_per_doc": { + "type": "integer" + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/e5-mistral-7b-instruct.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/e5-mistral-7b-instruct.mdx new file mode 100644 index 0000000000..bedade4023 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/e5-mistral-7b-instruct.mdx @@ -0,0 +1,108 @@ +--- +title: E5 Mistral 7B Instruct +description: >- + E5 Mistral 7B Instruct is optimized for English text embedding, multilingual processing, customization, and long input sequences. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing, mistral' +namespace: docs_edge_ai_models_e5_mistral_7b_instruct +permalink: /documentation/products/ai/edge-ai/models/e5-mistral-7b-instruct/ +--- + +The **E5 Mistral 7B Instruct** model is optimized for English text embedding tasks, with capabilities for multilingual processing, flexible customization, and handling long input sequences, making it suitable for complex natural language processing applications. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | E5 Mistral 7B Instruct | +| **Version** | Original | +| **Model Category** | Embedding | +| **Size** | 7B parameters | +| **HuggingFace Model** | [e5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct) | +| **OpenAI Compatible Endpoint**| [Embeddings](https://platform.openai.com/docs/api-reference/embeddings/create) | +| **License** | MIT | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Azion Long-term Support (LTS) | ❌ | +| Context Length | 32728 | +| Supports LoRA | ✅ | +| Input Data | Text | + +## Usage + +### Embedding + +This is an example of how to use this model to generate embeddings for text input: + +```bash +curl http://endpoint-url/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{ + "input": "The food was delicious and the waiter...", + "model": "intfloat/e5-mistral-7b-instruct", + "encoding_format": "float" + }' +``` + +### Running with Edge Functions: + +This is and example of how to use this model with Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("intfloat-e5-mistral-7b-instruct", { + "input": "The food was delicious and the waiter...", + "encoding_format": "float" +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "input" + ], + "properties": { + "encoding_format": { + "type": "string", + "enum": [ + "float", + "base64" + ] + }, + "dimensions": { + "type": "integer" + }, + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + } + } + ] + } + } +} +``` diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/florence-2.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/florence-2.mdx new file mode 100644 index 0000000000..754d8be6c8 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/florence-2.mdx @@ -0,0 +1,258 @@ +--- +title: Florence-2 Large +description: >- + Florence 2 Large is an advanced vision foundation model that leverages a prompt-based approach and a massive dataset to excel in various vision and vision-language tasks. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing' +namespace: docs_edge_ai_models_florence_2 +permalink: /documentation/products/ai/edge-ai/models/florence-2/ +--- + +**Florence 2** is an advanced vision foundation model that leverages a prompt-based approach and a massive dataset to excel in various vision and vision-language tasks. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | Florence 2 | +| **Version** | Large | +| **Model Category** | VLM | +| **Size** | 0.77B parameters | +| **HuggingFace Model** | [microsoft/Florence-2-large](https://huggingface.co/microsoft/Florence-2-large) | +| **OpenAI Compatible endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **License** | [MIT](https://huggingface.co/microsoft/Florence-2-large/resolve/main/LICENSE) | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Tool Calling | ❌ | +| Azion Long-term Support (LTS) | ❌ | +| Context Length | 4096 | +| Supports LoRA | ❌ | +| Input data | Text + Image | + +## Usage + +Florence uses tags according to each task it will perform. Below are all the tags with their corresponding tasks. + +### Tasks with no additional input: + +- Whole image to natural language: + +| Tag | Description | +|------|-------------| +| `` | Image level brief caption | +| `` | Image level detailed caption | +| `` | Image level very detailed caption | + +- Whole image or region to text: + +| Tag | Description | +|------|-------------| +| `` | OCR for entire image | +| `` | OCR for entire image, with bounding boxes for individual text items | + +- Whole image to regions and categories or natural language labels: + +| Tag | Description | +|------|-------------| +| `` | Proposes bounding boxes for salient objects (no labels) | +| `` | Identifies objects via bounding boxes and gives categorical labels | +| `` | Identifies objects via bounding boxes and gives natural language labels | + +#### Tasks with region input + +- Region to segment: + +| Tag | Description | +|------|-------------| +| `` | Segments salient object in a given region | + +- Region to text: + +| Tag | Description | +|------|-------------| +| `` | Gets object classification for bounding box | +| `` | Gets natural language description for contents of bounding box | + +#### Tasks with natural language input + +- Natural language to regions (one to many): + +| Tag | Description | +|------|-------------| +| `` | Given a caption, provides bounding boxes to visually ground phrases in the caption | + +- Natural language to region (one to one): + +| Tag | Description | +|------|-------------| +| `` | Detects bounding box for objects and OCR text | + +- Natural language to segment (one to one): + +| Tag | Description | +|------|-------------| +| `` | Referring Expression Segmentation - given a natural language descriptor identifies the segmented corresponding region | + +This is how a request using Florence tags should look like: + +```bash +curl -X POST http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "microsoft/Florence-2-large", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://images.unsplash.com/photo-1543373014-cfe4f4bc1cdf?q=80&w=3148&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + } + }, + {"type": "text", "text": ""} + ] + } + ] +}' +``` + +### Running with Edge Functions: + +This is an example of how to run this model with Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("microsoft-florence-2-large", { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://images.unsplash.com/photo-1543373014-cfe4f4bc1cdf?q=80&w=3148&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + } + }, + { + "type": "text", + "text": "" + } + ] + } + ] +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "const": "user" + }, + "content": { + "type": "array", + "maxItems": 2, + "items": { + "oneOf": [ + { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "const": "image_url" + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + }, + { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "const": "text" + }, + "text": { + "type": "string", + "enum": [ + "", + "", + "", + "", + "", + "", + "", + "" + ] + } + } + }, + { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "const": "text" + }, + "text": { + "type": "string", + "allOf": [ + { + "not": { + "pattern": "|||||||" + } + }, + { + "pattern": "|||||" + } + ] + } + } + } + ] + } + } + } + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/gemma-3.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/gemma-3.mdx new file mode 100644 index 0000000000..712d0d832e --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/gemma-3.mdx @@ -0,0 +1,313 @@ +--- +title: Gemma 3 +description: >- + Gemma 3 is a model designed for fast deployment on devices, offering advanced capabilities such as multilingual support, text and visual reasoning, expanded context windows, function calling, and quantized models for high performance. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing, gemma3' +namespace: docs_edge_ai_models_gemma_3 +permalink: /documentation/products/ai/edge-ai/models/gemma-3/ +--- + +**Gemma 3** is a model designed for fast deployment on devices, offering advanced capabilities such as multilingual support, text and visual reasoning, expanded context windows, function calling, and quantized models for high performance. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | Gemma3 | +| **Version** | 27B INT4 | +| **Model Category** | LLM | +| **Size** | 27B parameters | +| **HuggingFace Model** | [ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g](https://huggingface.co/ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g) | +| **OpenAI Compatible endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **License** | [Gemma](https://ai.google.dev/gemma/terms) | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Tool Calling | ❌ | +| Azion Long-term Support (LTS) | ✅ | +| Context Length | 128k | +| Supports LoRA | ✅ | +| Input data | Text + Image | + +## Usage + +### Basic chat completion + +This is a basic chat completion example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Multimodal (text + image) example + +This is a multimodal request example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` + +### Running with Edge Functions: + +This is an example of how to run this model with Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("ista-daslab-gemma-3-27b-it-gptq-4b-128g", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + }, + { + "$ref": "#/components/schemas/ImageContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + } + ] + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "ImageContent": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ImageContentItem" + } + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ImageContentItem": { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "image_url" + ] + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + } +} +``` + diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/mistral-3-small.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/mistral-3-small.mdx new file mode 100644 index 0000000000..04766d46c9 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/mistral-3-small.mdx @@ -0,0 +1,446 @@ +--- +title: Mistral 3 Small (24B AWQ) +description: >- + Mistral 3 Small provides a range of capabilities, including text generation, image analysis, embeddings, and more. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing, mistral' +namespace: docs_edge_ai_models_mistral_3_small +permalink: /documentation/products/ai/edge-ai/models/mistral-3-small/ +--- + +**Mistral 3 Small** is a language model that delivers capabilities comparable to larger models while being compact. It's ideal for conversational agents, function calling, fine-tuning, and local inference with sensitive data. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | Mistral 3 Small | +| **Version** | 24B AWQ | +| **Model Category** | Large Language Model (LLM) | +| **Size** | 24B parameters | +| **HuggingFace Model** | [casperhansen/mistral-small-24b-instruct-2501-awq](https://huggingface.co/casperhansen/mistral-small-24b-instruct-2501-awq) | +| **OpenAI Compatible Endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **License** | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Tool Calling | ✅ | +| Azion Long-term Support (LTS) | ✅ | +| Context Length | 32,000 | +| Supports LoRA | ❌ | +| Input Data | Text | + +## Usage + +### Basic chat completion + +This is an example of a basic chat completion request using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Tool Calling example + +This is an example of a tool calling request using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant with access to tools."}, + {"role": "user", "content": "What is the weather in London?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + } + ] +}' +``` + +### Multimodal (text + image) example + +This is an example of a multimodal request using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` + +## Performance + +The model has been quantized using AWQ, resulting in a 50% reduction in model size while maintaining performance. It supports a large context window of 32,000 tokens, making it suitable for processing long documents and maintaining context in conversations. + +### Running with Edge Functions: + +This is an example of how to use this model with Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { + "stream": true, + "max_tokens": 1024, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ToolMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + }, + { + "$ref": "#/components/schemas/AssistantMessageWithToolCalls" + } + ] + }, + "ToolMessage": { + "type": "object", + "required": [ + "role", + "content", + "tool_call_id" + ], + "properties": { + "role": { + "enum": [ + "tool" + ] + }, + "content": { + "type": "string" + }, + "tool_call_id": { + "type": "string" + } + } + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "AssistantMessageWithToolCalls": { + "type": "object", + "required": [ + "role", + "tool_calls" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCalls" + } + } + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ToolCalls": { + "type": "object", + "required": [ + "function", + "id", + "type" + ], + "properties": { + "function": { + "type": "object", + "required": [ + "name", + "arguments" + ], + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "string" + } + } + }, + "id": { + "type": "string" + }, + "type": { + "enum": [ + "function" + ] + } + }, + "description":"The name and arguments of a function that should be called, as generated by the model." + }, + "ToolDefinition": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "function" + ] + }, + "function": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + }, + "strict": { + "type": "boolean", + "default": false + } + } + } + }, + "description": "Definition of a tool that can be used by the model" + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/qwen-2-5-vl-3b.mdx new file mode 100644 index 0000000000..72091f1c32 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/qwen-2-5-vl-3b.mdx @@ -0,0 +1,473 @@ +--- +title: Qwen 2.5 VL AWQ 3B +description: >- + Qwen 2.5 VL AWQ 3B is a vision-language model that supports 3 bilion parameters and offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing, qwen' +namespace: docs_edge_ai_models_qwen_2_5_vl_3b +permalink: /documentation/products/ai/edge-ai/models/qwen-2-5-vl-3b/ +--- + +**Qwen 2.5 VL AWQ 3B** is a vision-language model that offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. It supports 3 bilion parameters. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | Qwen2.5 VL | +| **Version** | AWQ 3B | +| **Model Category** | VLM | +| **Size** | 3B params | +| **HuggingFace Model** | [Qwen/Qwen2.5-VL-3B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) | +| **OpenAI Compatible endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **License** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Tool Calling | ✅ | +| Azion Long-term Support (LTS) | ✅ | +| Context Length | 32k tokens | +| Supports LoRA | ✅ | +| Input data | Text + Image | + +## Usage + +### Basic chat completion + +This is a basic chat completion request example using this model: + +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Tool Calling Example + +This is a tool calling request example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant with access to tools."}, + {"role": "user", "content": "What is the weather in London?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + } + ] +}' +``` + +### Multimodal (text + image) example + +This is a multimodal request example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` +### Running with Edge Functions: + +This is an example of how to run this model with Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ToolMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + }, + { + "$ref": "#/components/schemas/ImageContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + }, + { + "$ref": "#/components/schemas/AssistantMessageWithToolCalls" + } + ] + }, + "ToolMessage": { + "type": "object", + "required": [ + "role", + "content", + "tool_call_id" + ], + "properties": { + "role": { + "enum": [ + "tool" + ] + }, + "content": { + "type": "string" + }, + "tool_call_id": { + "type": "string" + } + } + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "AssistantMessageWithToolCalls": { + "type": "object", + "required": [ + "role", + "tool_calls" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCalls" + } + } + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "ImageContent": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ImageContentItem" + } + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ImageContentItem": { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "image_url" + ] + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + }, + "ToolCalls": { + "type": "object", + "required": [ + "function", + "id", + "type" + ], + "properties": { + "function": { + "type": "object", + "required": [ + "name", + "arguments" + ], + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "string" + } + } + }, + "id": { + "type": "string" + }, + "type": { + "enum": [ + "function" + ] + } + }, + "description":"The name and arguments of a function that should be called, as generated by the model." + }, + "ToolDefinition": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "function" + ] + }, + "function": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + }, + "strict": { + "type": "boolean", + "default": false + } + } + } + }, + "description": "Definition of a tool that can be used by the model" + } + } + } +} +``` diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/qwen-2-5-vl-7b.mdx new file mode 100644 index 0000000000..b32c82d4f2 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/ai-models/qwen-2-5-vl-7b.mdx @@ -0,0 +1,473 @@ +--- +title: Qwen 2.5 VL AWQ 7B +description: >- + Qwen 2.5 VL AWQ 7B is a vision-language model that supports 7 billion parameters, offering advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing, qwen' +namespace: docs_edge_ai_models_qwen_2_5_vl_7b +permalink: /documentation/products/ai/edge-ai/models/qwen-2-5-vl-7b/ +--- + +**Qwen 2.5 VL AWQ 7B** is a vision-language model that supports 7 billion parameters, offering advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. + +## Model details + +| Category | Details | +|----------|---------| +| **Model Name** | Qwen2.5 VL | +| **Version** | AWQ 7B | +| **Model Category** | VLM | +| **Size** | 7B params | +| **HuggingFace Model** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) | +| **OpenAI Compatible endpoint** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **License** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) | + +## Capabilities + +| Feature | Details | +|---------|--------| +| Tool Calling | ✅ | +| Azion Long-term Support (LTS) | ✅ | +| Context Length | 32k tokens | +| Supports LoRA | ✅ | +| Input data | Text + Image | + +## Usage + +### Basic chat completion + +This is a basic chat completion example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Tool Calling Example + +This is a tool calling example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant with access to tools."}, + {"role": "user", "content": "What is the weather in London?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + } + ] +}' +``` + +### Multimodal (text + image) example + +This is a multimodal example using this model: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` +### Running with Edge Functions: + +This is an example of running this model with Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ToolMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + }, + { + "$ref": "#/components/schemas/ImageContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + }, + { + "$ref": "#/components/schemas/AssistantMessageWithToolCalls" + } + ] + }, + "ToolMessage": { + "type": "object", + "required": [ + "role", + "content", + "tool_call_id" + ], + "properties": { + "role": { + "enum": [ + "tool" + ] + }, + "content": { + "type": "string" + }, + "tool_call_id": { + "type": "string" + } + } + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "AssistantMessageWithToolCalls": { + "type": "object", + "required": [ + "role", + "tool_calls" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCalls" + } + } + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "ImageContent": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ImageContentItem" + } + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ImageContentItem": { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "image_url" + ] + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + }, + "ToolCalls": { + "type": "object", + "required": [ + "function", + "id", + "type" + ], + "properties": { + "function": { + "type": "object", + "required": [ + "name", + "arguments" + ], + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "string" + } + } + }, + "id": { + "type": "string" + }, + "type": { + "enum": [ + "function" + ] + } + }, + "description":"The name and arguments of a function that should be called, as generated by the model." + }, + "ToolDefinition": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "function" + ] + }, + "function": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + }, + "strict": { + "type": "boolean", + "default": false + } + } + } + }, + "description": "Definition of a tool that can be used by the model" + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/edge-ai-reference.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/edge-ai-reference.mdx new file mode 100644 index 0000000000..151e906721 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/edge-ai-reference.mdx @@ -0,0 +1,59 @@ +--- +title: Azion Edge AI +description: >- + Azion Edge AI empowers you to build and deploy intelligent applications that process data close to where it is generated. +meta_tags: 'edge ai, artificial intelligence, edge computing' +namespace: docs_edge_ai_reference +permalink: /documentation/products/ai/edge-ai/ +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +**Edge AI** empowers you to build and deploy intelligent applications that process data close to where it is generated. By combining artificial intelligence with edge computing, it eliminates the complexities of scaling and infrastructure management, enabling real-time decision-making and enhanced performance. + +With Azion Edge AI, you can seamlessly integrate AI capabilities into your applications, leveraging tools like Edge Functions, Edge Application, and the Azion API to create scalable, secure, and efficient solutions. + +Edge AI gives you access to: + +- **Run AI models on Edge Runtime**, enabling advanced AI architectures to execute directly at the edge for minimal latency and maximum performance. +- **Deploy autonomous AI agents** that analyze data and make decisions at the edge. +- **Real-time processing** with reduced latency and enhanced efficiency. +- All as part of a **complete platform**, including Edge Applications, Edge Functions, Edge SQL vector search, and more. + +--- + +## Features + +### Available Models + +Access our catalog of open-source AI models that you can run directly on Azion Runtime. These models are optimized for edge deployment with minimal resource requirements. + + + +### Model customization + +Edge AI allows you to fine-tune, train, and specialize models using **Low-Rank Adaptation (LoRA)**. This capability enables you to optimize models for specific tasks, ensuring they are both efficient and accurate for your business needs. + +### AI Agents + +Edge AI supports deploying AI agents like ReAct (Reasoning + Acting) at the edge, enabling advanced tasks such as context-aware responses, semantic search, and intelligent data processing. + +### Integration with Edge SQL + +Integrate with **Edge SQL** to enable vector search capabilities, allowing for semantic queries and hybrid search. This integration enhances AI-powered applications by providing precise, contextually relevant results and supporting efficient Retrieval-Augmented Generation (RAG) implementations. + +--- + +## Related products + +- [Edge Application](/en/documentation/products/build/edge-application/): build applications that run directly on Azion's distributed network, delivering exceptional performance and customization options. +- [Edge Functions](/en/documentation/products/build/edge-application/edge-functions/): execute code closer to end users, enhancing performance and enabling custom logic for handling requests and responses. +- [Edge SQL](/en/documentation/products/store/edge-sql/): an edge-native SQL solution designed for serverless applications, providing data storage and querying capabilities at the edge. +- [Vector Search](/en/documentation/products/store/edge-sql/vector-search/): enable semantic search engines and AI-powered recommendations through vector embeddings at the edge. + +--- + +Explore practical examples of how to implement AI solutions with Azion: + + + \ No newline at end of file diff --git a/src/content/docs/en/pages/main-menu/reference/edge-ai/models.mdx b/src/content/docs/en/pages/main-menu/reference/edge-ai/models.mdx new file mode 100644 index 0000000000..c9a38b1fb4 --- /dev/null +++ b/src/content/docs/en/pages/main-menu/reference/edge-ai/models.mdx @@ -0,0 +1,58 @@ +--- +title: Azion Edge AI Models +description: >- + Edge AI offers a diverse range of edge-optimized models for various AI domains, ensuring efficient deployment and performance. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing' +namespace: docs_edge_ai_models +permalink: /documentation/products/ai/edge-ai/models/ +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +Azion's edge-optimized models span multiple AI domains including text generation, image analysis, embeddings, and more. Each model is designed to balance performance and resource efficiency for edge deployment. + +This page provides a list of models available for use with **Edge AI**. To learn more about it, visit the [Edge AI Reference](/en/documentation/products/edge-ai/edge-ai-reference/). + +## Available Models + +### E5 Mistral 7B Instruct + +The E5 Mistral 7B Instruct model is optimized for English text embedding tasks, with capabilities for multilingual processing, flexible customization, and handling long input sequences, making it suitable for complex natural language processing applications. + + + +### Mistral 3 Small (24B AWQ) + +This is a language model that delivers capabilities comparable to larger models while being compact. It is ideal for conversational agents, function calling, fine-tuning, and local inference with sensitive data. + + + +### Gemma3 + +Gemma 3 is a model designed for fast deployment on devices, offering advanced capabilities such as multilingual support, text and visual reasoning, expanded context windows, function calling, and quantized models for high performance. + + + +### BAAI/bge-reranker-v2-m3 + +A lightweight reranker model with strong multilingual capabilities. It offers multilingual support and it's easy to deploy, with fast inference. + + + +### Florence 2 Large + +Florence 2 is an advanced vision foundation model that leverages a prompt-based approach and a massive dataset to excel in various vision and vision-language tasks. + + + +### Qwen2.5 VL AWQ 3B + +A Vision Language Model (VLM) that offers advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. + + + +### Qwen2.5 VL AWQ 7B + +A Vision Language Model (VLM) that supports 7 billion parameters, offering advanced capabilities such as visual analysis, agentic reasoning, long video comprehension, visual localization, and structured output generation. + + diff --git a/src/content/docs/en/pages/main-menu/reference/store/edge-sql/edge-sql.mdx b/src/content/docs/en/pages/main-menu/reference/store/edge-sql/edge-sql.mdx index 8ef82780fc..c0696d1424 100644 --- a/src/content/docs/en/pages/main-menu/reference/store/edge-sql/edge-sql.mdx +++ b/src/content/docs/en/pages/main-menu/reference/store/edge-sql/edge-sql.mdx @@ -112,6 +112,8 @@ curl --location 'https://api.azion.com/v4/edge_sql/databases/{id_database}/query [Vector Search](/en/documentation/products/store/edge-sql/vector-search/) is an **Azion Edge SQL** feature that enables customers to implement semantic search engines. While traditional search models aim to find exact matches, such as keyword matches, vector search models use specialized algorithms to identify similar items based on their mathematical representations, or vector embeddings. +This feature natively integrates with [Edge AI](/en/documentation/products/ai/edge-ai/) to power RAG implementations without additional infrastructure, combining SQL and vector capabilities in a single database. The system supports **LangChain Vector Store** integration for document storage and **LangChain Retriever** for advanced hybrid search combining vector and full-text search capabilities. + --- ## Interacting with Edge SQL via Terminal diff --git a/src/content/docs/en/pages/main-menu/reference/store/edge-sql/vector-search.mdx b/src/content/docs/en/pages/main-menu/reference/store/edge-sql/vector-search.mdx index d9628bd549..bb6ca18b87 100644 --- a/src/content/docs/en/pages/main-menu/reference/store/edge-sql/vector-search.mdx +++ b/src/content/docs/en/pages/main-menu/reference/store/edge-sql/vector-search.mdx @@ -16,7 +16,7 @@ By using Vector Search, you can implement various use cases: - Enhancing search systems and offering personalized recommendations by finding items with similar characteristics or based on users' preferences, such as related products in ecommerce or content in streaming platforms. - Creating text embeddings to search for semantically similar text, where words or phrases are represented as vectors. -- Building AI-based applications, leveraging Natural Language Processing (NLP) for voice assistants and chatbots. +- Building AI-based applications, leveraging Natural Language Processing (NLP) for voice assistants and chatbots, with integration to [Edge AI](/en/documentation/products/ai/edge-ai/) that enables powerful Retrieval-Augmented Generation (RAG) applications using frameworks like LangChain and LangGraph directly at the edge. Distributed across the Azion global edge network, this feature enables more relevant search results, real-time recommendations, and insights, drastically reducing latency and improving user satisfaction. All of this while maintaining data locality and reducing dependence on the centralized database. @@ -28,6 +28,7 @@ Distributed across the Azion global edge network, this feature enables more rele |---|---| | Implement Vector Search | Guide explaining the basics of [implementing Vector Search](/en/documentation/products/guides/edge-sql-vector-search/) | | Get to know Azion Edge SQL and its features | [Edge SQL reference](/en/documentation/products/store/edge-sql/) | +| Integrate with Edge AI | Learn about [Edge AI capabilities](/en/documentation/products/ai/edge-ai/) | --- diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/build/edge-application/edge-functions-edge-app.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/build/edge-application/edge-functions-edge-app.mdx index 9e0767202a..8c688c5dd9 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/build/edge-application/edge-functions-edge-app.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/build/edge-application/edge-functions-edge-app.mdx @@ -22,6 +22,8 @@ Com o Edge Functions para **Edge Application**, você pode: - Implantar funções facilmente. - Fazer uso da programação em pares com a integração com o Chat GPT. - Visualizar o resultado da função em tempo real com o *preview deployment*. +- Executar agentes de IA diretamente na infraestrutura distribuída da Azion com latência ultra-baixa. +- Construir ecossistemas de IA completos. --- @@ -49,6 +51,21 @@ Ao usar o [Azion Runtime](/pt-br/documentacao/runtime/visao-geral/) para desenvo --- +## Suporte a frameworks de AI + +As Edge Functions oferecem suporte para fluxos de trabalho de AI avançados, permitindo que você: + +- Execute agentes de AI diretamente na infraestrutura distribuída da Azion com latência ultra-baixa, construindo soluções de nível empresarial que operam no edge com confiabilidade e escalabilidade. +- Crie fluxos de trabalho de AI sofisticados usando integrações com **LangGraph** e **LangChain**, incluindo capacidades avançadas de recuperação que combinam busca vetorial e busca de texto completo para casos de uso complexos. +- Aproveite a integração nativa do [Edge SQL](/pt-br/documentacao/produtos/store/edge-sql/) para implementações de RAG e armazenamento vetorial, juntamente com Servidores MCP que permitem a colaboração entre agentes usando o protocolo Agent2Agent (A2A) do Google. +- Acesse modelos de ponta como Mistral, Florence, Qwen e outros, que seguem o padrão de API da OpenAI, tudo isso reduzindo a complexidade com uma infraestrutura de AI completamente integrada e executada no edge da rede. + +O **Edge AI** da Azion permite que você execute modelos de AI no Azion Runtime, e pode ser usado junto com as Edge Functions para criar aplicações complexas impulsionadas por AI. + + + +--- + ## Frameworks JavaScript ### Next.js diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/baai-bge-reranker-v2-m3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/baai-bge-reranker-v2-m3.mdx new file mode 100644 index 0000000000..b5af115621 --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/baai-bge-reranker-v2-m3.mdx @@ -0,0 +1,118 @@ +--- +title: BAAI/bge-reranker-v2-m3 +description: >- + BAAI/bge-reranker-v2-m3 é um modelo de reranking leve com fortes capacidades multilíngues. +meta_tags: 'edge ai, ai models, artificial intelligence, edge computing' +namespace: docs_edge_ai_models_baai_bge_reranker_v2_m3 +permalink: /documentacao/produtos/ai/edge-ai/modelos/baai-bge-reranker-v2-m3/ +--- + +**BAAI/bge-reranker-v2-m3** é um modelo de reranking leve com fortes capacidades multilíngues. Ele é fácil de implantar e oferece inferência rápida. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | BAAI/bge-reranker-v2-m3 | +| **Versão** | Original | +| **Categoria do modelo** | Reranker | +| **Tamanho** | 568M parâmetros | +| **Modelo HuggingFace** | [BAAI/bge-reranker-v2-m3](https://huggingface.co/BAAI/bge-reranker-v2-m3) | +| **Licença** | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Suporte a Longo Prazo da Azion (LTS) | ✅ / ❌| +| Comprimento do Contexto | 8192 | +| Suporta LoRA | ❌ | +| Dados de entrada | Texto | + +## Uso + +### Exemplo de reranking + +Este é um exemplo de uma requisição básica de reranking usando este modelo: + +```bash +curl -X POST \ + http://endpoint-url/rerank \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "BAAI/bge-reranker-v2-m3", + "query": "What is deep learning?", + "documents": [ + "Deep learning is a subset of machine learning that uses neural networks with many layers", + "The weather is nice today", + "Deep learning enables computers to learn from large amounts of data", + "I like pizza" + ] +}' +``` + +### Exemplo de pontuação + +Este é um exemplo de uma requisição básica de pontuação usando este modelo: + +```bash +curl -X POST \ + http://endpoint-url/score \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "BAAI/bge-reranker-v2-m3", + "text_1": "What is deep learning?", + "text_2": [ + "Deep learning is a subset of machine learning that uses neural networks with many layers", + "The weather is nice today", + "Deep learning enables computers to learn from large amounts of data", + "I like pizza" + ] + }' +``` + +### Executando com Edge Functions: + +Este é um exemplo de como executar este modelo usando Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("baai-bge-reranker-v2-m3", { + "query": "What is deep learning?", + "documents": [ + "Deep learning is a subset of machine learning that uses neural networks with many layers", + "The weather is nice today", + "Deep learning enables computers to learn from large amounts of data", + "I like pizza" + ] +}) +``` + +## Schema JSON + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "query", + "documents" + ], + "properties": { + "query": { + "type": "string" + }, + "documents": { + "type": "array", + "items": { + "type": "string" + } + }, + "top_n": { + "type": "integer" + }, + "max_tokens_per_doc": { + "type": "integer" + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/e5-mistral-7b-instruct.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/e5-mistral-7b-instruct.mdx new file mode 100644 index 0000000000..f0aeeae211 --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/e5-mistral-7b-instruct.mdx @@ -0,0 +1,108 @@ +--- +title: E5 Mistral 7B Instruct +description: >- + O E5 Mistral 7B Instruct é otimizado para embedding de texto em inglês, processamento multilíngue, customização e sequências de entrada longas. +meta_tags: 'edge ai, ai models, inteligência artificial, edge computing, mistral' +namespace: docs_edge_ai_models_e5_mistral_7b_instruct +permalink: /documentacao/produtos/ai/edge-ai/modelos/e5-mistral-7b-instruct/ +--- + +O modelo **E5 Mistral 7B Instruct** é otimizado para tarefas de embedding de texto em inglês, com capacidades para processamento multilíngue, customização flexível e manipulação de sequências de entrada longas, tornando-o adequado para aplicações complexas de processamento de linguagem natural. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | E5 Mistral 7B Instruct | +| **Versão** | Original | +| **Categoria do modelo** | Embedding | +| **Tamanho** | 7B parâmetros | +| **Modelo HuggingFace** | [e5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct) | +| **Endpoint Compatível com a OpenAI**| [Embeddings](https://platform.openai.com/docs/api-reference/embeddings/create) | +| **Licença** | MIT | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Suporte a Longo Prazo da Azion (LTS) | ❌ | +| Comprimento do Contexto | 32728 | +| Suporta LoRA | ✅ | +| Dados de entrada | Texto | + +## Uso + +### Embedding + +Este é um exemplo de como usar este modelo para gerar embeddings para entrada de texto: + +```bash +curl http://endpoint-url/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{ + "input": "The food was delicious and the waiter...", + "model": "intfloat/e5-mistral-7b-instruct", + "encoding_format": "float" + }' +``` + +### Executando com Edge Functions: + +Este é um exemplo de como usar este modelo com Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("intfloat-e5-mistral-7b-instruct", { + "input": "The food was delicious and the waiter...", + "encoding_format": "float" +}) +``` + +## JSON schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "input" + ], + "properties": { + "encoding_format": { + "type": "string", + "enum": [ + "float", + "base64" + ] + }, + "dimensions": { + "type": "integer" + }, + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "array", + "items": { + "type": "integer" + } + } + ] + } + } + ] + } + } +} +``` diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/florence-2.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/florence-2.mdx new file mode 100644 index 0000000000..d769eaca63 --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/florence-2.mdx @@ -0,0 +1,257 @@ +--- +title: Florence 2 Large +description: >- + Florence 2 Large é um modelo fundacional de visão avançado que utiliza uma abordagem baseada em prompts e um conjunto de dados massivo para se destacar em diversas tarefas de visão e linguagem visual. +meta_tags: 'edge ai, ai models, inteligência artificial, edge computing' +namespace: docs_edge_ai_models_florence_2 +permalink: /documentacao/produtos/ai/edge-ai/modelos/florence-2/ +--- + +**Florence 2** é um modelo fundacional de visão avançado que utiliza uma abordagem baseada em prompts e um conjunto de dados massivo para se destacar em diversas tarefas de visão e linguagem visual. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | Florence 2 | +| **Versão** | Large | +| **Categoria do modelo** | VLM | +| **Tamanho** | 0.77B parâmetros | +| **Modelo HuggingFace** | [microsoft/Florence-2-large](https://huggingface.co/microsoft/Florence-2-large) | +| **Endpoint Compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **Licença** | [MIT](https://huggingface.co/microsoft/Florence-2-large/resolve/main/LICENSE) | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Tool Calling | ❌ | +| Suporte a longo prazo da Azion (LTS) | ❌ | +| Tamanho do Contexto | 4096 | +| Suporta LoRA | ❌ | +| Dados de entrada | Texto + Imagem | + +## Uso +Florence utiliza tags de acordo com cada tarefa que irá realizar. Abaixo estão todas as tags com suas tarefas correspondentes. + +### Tarefas sem entrada adicional + +- Imagem inteira para linguagem natural: + +| Tag | Descrição | +|------|-------------| +| `` | Legenda breve ao nível da imagem | +| `` | Legenda detalhada ao nível da imagem | +| `` | Legenda muito detalhada ao nível da imagem | + +- Imagem inteira ou região para texto: + +| Tag | Descrição | +|------|-------------| +| `` | OCR para imagem inteira | +| `` | OCR para imagem inteira, com caixas delimitadoras para itens de texto individuais | + +- Imagem inteira para regiões e categorias ou labels de linguagem natural: + +| Tag | Descrição | +|------|-------------| +| `` | Propõe caixas delimitadoras para objetos salientes (sem rótulos) | +| `` | Identifica objetos via caixas delimitadoras e fornece rótulos categóricos | +| `` | Identifica objetos via caixas delimitadoras e fornece labels de linguagem natural | + +#### Tarefas com entrada adicional de região + +- Região para segmentação: + +| Tag | Descrição | +|------|-------------| +| `` | Segmenta objeto saliente em uma dada região | + +- Região para texto: + +| Tag | Descrição | +|------|-------------| +| `` | Obtém classificação do objeto para caixa delimitadora | +| `` | Obtém descrição de linguagem natural para os conteúdos da caixa delimitadora | + +#### Tarefas com entrada de linguagem natural + +- Linguagem natural para regiões (um para muitos): + +| Tag | Descrição | +|------|-------------| +| `` | Dada uma legenda, fornece caixas delimitadoras para ancorar visualmente frases na legenda | + +- Linguagem natural para região (um para um): + +| Tag | Descrição | +|------|-------------| +| `` | Detecta caixa delimitadora para objetos e texto OCR | + +- Linguagem natural para segmento (um para um): + +| Tag | Descrição | +|------|-------------| +| `` | Segmentação de Expressão Referente - dada uma descrição em linguagem natural, identifica a região segmentada correspondentee | + +Assim deve ser uma requisição usando as tags do Florence 2: + +```bash +curl -X POST http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "microsoft/Florence-2-large", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://images.unsplash.com/photo-1543373014-cfe4f4bc1cdf?q=80&w=3148&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + } + }, + {"type": "text", "text": ""} + ] + } + ] +}' +``` + +### Executando com Edge Functions: + +Este é um exemplo de como executar este modelo com Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("microsoft-florence-2-large", { + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://images.unsplash.com/photo-1543373014-cfe4f4bc1cdf?q=80&w=3148&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + } + }, + { + "type": "text", + "text": "" + } + ] + } + ] +}) +``` + +## Schema JSON + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "const": "user" + }, + "content": { + "type": "array", + "maxItems": 2, + "items": { + "oneOf": [ + { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "const": "image_url" + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + }, + { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "const": "text" + }, + "text": { + "type": "string", + "enum": [ + "", + "", + "", + "", + "", + "", + "", + "" + ] + } + } + }, + { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "const": "text" + }, + "text": { + "type": "string", + "allOf": [ + { + "not": { + "pattern": "|||||||" + } + }, + { + "pattern": "|||||" + } + ] + } + } + } + ] + } + } + } + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/gemma-3.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/gemma-3.mdx new file mode 100644 index 0000000000..d669832b3d --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/gemma-3.mdx @@ -0,0 +1,313 @@ +--- +title: Gemma3 +description: >- + O Gemma3 é um modelo projetado para implantação rápida em dispositivos, oferecendo recursos avançados como suporte multilíngue, raciocínio textual e visual, janelas de contexto expandidas, chamada de função e modelos quantizados para alto desempenho. +meta_tags: 'edge ai, ai models, inteligência artificial, edge computing, gemma3' +namespace: docs_edge_ai_models_gemma_3 +permalink: /documentacao/produtos/ai/edge-ai/modelos/gemma-3/ +--- + +O **Gemma 3** é um modelo projetado para implantação rápida em dispositivos, oferecendo recursos avançados como suporte multilíngue, raciocínio textual e visual, janelas de contexto expandidas, chamada de função e modelos quantizados para alto desempenho. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | Gemma 3 | +| **Versão** | 27B INT4 | +| **Categoria do modelo** | LLM | +| **Tamanho** | 27B parâmetros | +| **Modelo HuggingFace** | [ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g](https://huggingface.co/ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g) | +| **Endpoint compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **Licença** | [Gemma](https://ai.google.dev/gemma/terms) | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Tool Calling | ❌ | +| Suporte a longo prazo da Azion (LTS) | ✅ | +| Tamanho do contexto | 128k | +| Suporta LoRA | ✅ | +| Dados de entrada | Texto + Imagem | + +## Uso + +### Exemplo básico de chat completion + +Este é um exemplo básico de chat completion usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Exemplo multimodal (texto + imagem) + +Este é um exemplo de requisição multimodal usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` + +### Executando com Edge Functions: + +Este é um exemplo de como executar este modelo com Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("ista-daslab-gemma-3-27b-it-gptq-4b-128g", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## Schema JSON + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + }, + { + "$ref": "#/components/schemas/ImageContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + } + ] + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "ImageContent": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ImageContentItem" + } + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ImageContentItem": { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "image_url" + ] + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + } +} +``` + diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/mistral-3-small.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/mistral-3-small.mdx new file mode 100644 index 0000000000..91a769fa72 --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/mistral-3-small.mdx @@ -0,0 +1,446 @@ +--- +title: Mistral 3 Small (24B AWQ) +description: >- + O Mistral 3 Small oferece uma variedade de capacidades, incluindo geração de texto, análise de imagem, embeddings e mais. +meta_tags: 'edge ai, modelos ai, inteligência artificial, computação edge, mistral' +namespace: docs_edge_ai_models_mistral_3_small +permalink: /documentacao/produtos/ai/edge-ai/modelos/mistral-3-small/ +--- + +**Mistral 3 Small** é um modelo de linguagem que, embora sendo compacto, oferece capacidades comparáveis às de modelos maiores. Ele é ideal para agentes conversacionais, chamada de função, ajuste fino e inferência local com dados sensíveis. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | Mistral 3 Small | +| **Versão** | 24B AWQ | +| **Categoria do modelo** | Large Language Model (LLM) | +| **Tamanho** | 24B parâmetros | +| **Modelo HuggingFace** | [casperhansen/mistral-small-24b-instruct-2501-awq](https://huggingface.co/casperhansen/mistral-small-24b-instruct-2501-awq) | +| **Endpoint compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **Licença** | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Tool Calling | ✅ | +| Suporte a longo prazo da Azion (LTS) | ✅ | +| Tamanho do contexto | 32.000 | +| Suporta LoRA | ❌ | +| Dados de entrada | Texto | + +## Uso + +### Chat completion básico + +Este é um exemplo de uma requisição básica de chat completion usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Exemplo de Tool Calling + +Este é um exemplo de uma requisição de Tool Calling usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant with access to tools."}, + {"role": "user", "content": "What is the weather in London?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + } + ] +}' +``` + +### Exemplo multimodal (texto + imagem) + +Este é um exemplo de uma requisição multimodal usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "casperhansen/mistral-small-24b-instruct-2501-awq", + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` + +## Performance + +O modelo foi quantizado usando AWQ, resultando em uma redução de 50% no tamanho do modelo sem prejudicar o desempenho. Ele suporta uma janela de contexto grande de 32.000 tokens, tornando-o adequado para processar documentos longos e manter o contexto em conversas. + +### Executando com Edge Functions: + +Este é um exemplo de código de como usar este modelo através de Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("casperhansen-mistral-small-24b-instruct-2501-awq", { + "stream": true, + "max_tokens": 1024, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## Schema JSON + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ToolMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + }, + { + "$ref": "#/components/schemas/AssistantMessageWithToolCalls" + } + ] + }, + "ToolMessage": { + "type": "object", + "required": [ + "role", + "content", + "tool_call_id" + ], + "properties": { + "role": { + "enum": [ + "tool" + ] + }, + "content": { + "type": "string" + }, + "tool_call_id": { + "type": "string" + } + } + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "AssistantMessageWithToolCalls": { + "type": "object", + "required": [ + "role", + "tool_calls" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCalls" + } + } + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ToolCalls": { + "type": "object", + "required": [ + "function", + "id", + "type" + ], + "properties": { + "function": { + "type": "object", + "required": [ + "name", + "arguments" + ], + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "string" + } + } + }, + "id": { + "type": "string" + }, + "type": { + "enum": [ + "function" + ] + } + }, + "description":"The name and arguments of a function that should be called, as generated by the model." + }, + "ToolDefinition": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "function" + ] + }, + "function": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + }, + "strict": { + "type": "boolean", + "default": false + } + } + } + }, + "description": "Definition of a tool that can be used by the model" + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/qwen-2-5-vl-3b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/qwen-2-5-vl-3b.mdx new file mode 100644 index 0000000000..c77439f1ad --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/qwen-2-5-vl-3b.mdx @@ -0,0 +1,473 @@ +--- +title: Qwen 2.5 VL AWQ 3B +description: >- + O Qwen 2.5 VL AWQ 3B é um modelo de linguagem e visão que suporta 3 bilhões de parâmetros e oferece capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. +meta_tags: 'edge ai, modelos ai, inteligência artificial, edge computing, qwen' +namespace: docs_edge_ai_models_qwen_2_5_vl_3b +permalink: /documentacao/produtos/ai/edge-ai/modelos/qwen-2-5-vl-3b/ +--- + +O **Qwen 2.5 VL AWQ 3B** é um modelo de linguagem e visão que oferece capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. Ele suporta 3 bilhões de parâmetros. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | Qwen2.5 VL | +| **Versão** | AWQ 3B | +| **Categoria do modelo** | VLM | +| **Tamanho** | 3B parâmetros | +| **Modelo HuggingFace** | [Qwen/Qwen2.5-VL-3B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) | +| **Endpoint compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **Licença** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Tool Calling | ✅ | +| Suporte a longo prazo da Azion (LTS) | ✅ | +| Tamanho do contexto | 32k tokens | +| Suporta LoRA | ✅ | +| Dados de entrada | Texto + Imagem | + +## Uso + +### Exemplo básico de chat completion + +Este é um exemplo básico de uma requisição de chat completion usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Exemplo de Tool Calling + +Este é um exemplo de uma requisição de Tool Calling usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant with access to tools."}, + {"role": "user", "content": "What is the weather in London?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + } + ] +}' +``` + +### Exemplo multimodal (texto + imagem) + +Este é um exemplo de uma requisição multimodal usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-3B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` +### Executando com Edge Functions: + +Este é um exemplo de como executar este modelo com Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("qwen-qwen25-vl-3b-instruct-awq", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## Schema JSON + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ToolMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + }, + { + "$ref": "#/components/schemas/ImageContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + }, + { + "$ref": "#/components/schemas/AssistantMessageWithToolCalls" + } + ] + }, + "ToolMessage": { + "type": "object", + "required": [ + "role", + "content", + "tool_call_id" + ], + "properties": { + "role": { + "enum": [ + "tool" + ] + }, + "content": { + "type": "string" + }, + "tool_call_id": { + "type": "string" + } + } + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "AssistantMessageWithToolCalls": { + "type": "object", + "required": [ + "role", + "tool_calls" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCalls" + } + } + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "ImageContent": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ImageContentItem" + } + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ImageContentItem": { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "image_url" + ] + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + }, + "ToolCalls": { + "type": "object", + "required": [ + "function", + "id", + "type" + ], + "properties": { + "function": { + "type": "object", + "required": [ + "name", + "arguments" + ], + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "string" + } + } + }, + "id": { + "type": "string" + }, + "type": { + "enum": [ + "function" + ] + } + }, + "description":"The name and arguments of a function that should be called, as generated by the model." + }, + "ToolDefinition": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "function" + ] + }, + "function": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + }, + "strict": { + "type": "boolean", + "default": false + } + } + } + }, + "description": "Definition of a tool that can be used by the model" + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/qwen-2-5-vl-7b.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/qwen-2-5-vl-7b.mdx new file mode 100644 index 0000000000..e9b195a7ba --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/ai-models/qwen-2-5-vl-7b.mdx @@ -0,0 +1,473 @@ +--- +title: Qwen 2.5 VL AWQ 7B +description: >- + O Qwen 2.5 VL AWQ 7B é um modelo de linguagem e visão que suporta 7 bilhões de parâmetros, oferecendo capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. +meta_tags: 'edge ai, modelos ai, inteligência artificial, computação edge, qwen' +namespace: docs_edge_ai_models_qwen_2_5_vl_7b +permalink: /documentacao/produtos/ai/edge-ai/modelos/qwen-2-5-vl-7b/ +--- + +O **Qwen 2.5 VL AWQ 7B** é um modelo de linguagem e visão que suporta 7 bilhões de parâmetros, oferecendo capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. + +## Detalhes do modelo + +| Categoria | Detalhes | +|----------|---------| +| **Nome do modelo** | Qwen2.5 VL | +| **Versão** | AWQ 7B | +| **Categoria do modelo** | VLM | +| **Tamanho** | 7B parâmetros | +| **Modelo HuggingFace** | [Qwen/Qwen2.5-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct-AWQ) | +| **Endpoint compatível com a OpenAI** | [Chat Completions](https://platform.openai.com/docs/api-reference/chat/create) | +| **Licença** | [Apache 2.0](https://huggingface.co/datasets/choosealicense/licenses/resolve/main/markdown/apache-2.0.md) | + +## Capacidades + +| Recurso | Detalhes | +|---------|--------| +| Tool Calling | ✅ | +| Suporte a longo prazo da Azion (LTS) | ✅ | +| Tamanho do contexto | 32k tokens | +| Suporta LoRA | ✅ | +| Dados de entrada | Texto + Imagem | + +## Uso + +### Exemplo básico de chat completion + +Este é um exemplo básico de uma requisição de chat completion usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Name the european capitals"} + ] +}' +``` + +### Exemplo de Tool Calling + +Este é um exemplo de uma requisição de Tool Calling usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant with access to tools."}, + {"role": "user", "content": "What is the weather in London?"} + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state" + } + }, + "required": ["location"] + } + } + } + ] +}' +``` + +### Exemplo multimodal (texto + imagem) + +Este é um exemplo multimodal usando este modelo: + +```bash +curl http://endpoint-url/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "stream": true, + "model": "Qwen/Qwen2.5-VL-7B-Instruct-AWQ", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": [ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}} + ]} + ] +}' +``` +### Executando com Edge Functions: + +Este é um exemplo de como executar este modelo com Edge Functions: + +```ts +const modelResponse = await Azion.AI.run("qwen-qwen25-vl-7b-instruct-awq", { + "stream": true, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Name the european capitals" + } + ] +}) +``` + +## Schema JSON + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "messages" + ], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + } + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 2 + }, + "top_p": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1 + }, + "n": { + "type": "integer", + "minimum": 1, + "default": 1 + }, + "stream": { + "type": "boolean", + "default": false + }, + "max_tokens": { + "type": "integer", + "minimum": 1 + }, + "presence_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "frequency_penalty": { + "type": "number", + "minimum": -2, + "maximum": 2, + "default": 0 + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + } + }, + "components": { + "schemas": { + "Message": { + "oneOf": [ + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/AssistantMessage" + }, + { + "$ref": "#/components/schemas/ToolMessage" + } + ] + }, + "SystemMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "system" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + } + }, + "UserMessage": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "user" + ] + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/TextContentItem" + }, + { + "$ref": "#/components/schemas/ImageContentItem" + } + ] + } + } + ] + } + } + }, + "AssistantMessage": { + "oneOf": [ + { + "$ref": "#/components/schemas/AssistantMessageWithoutToolCalls" + }, + { + "$ref": "#/components/schemas/AssistantMessageWithToolCalls" + } + ] + }, + "ToolMessage": { + "type": "object", + "required": [ + "role", + "content", + "tool_call_id" + ], + "properties": { + "role": { + "enum": [ + "tool" + ] + }, + "content": { + "type": "string" + }, + "tool_call_id": { + "type": "string" + } + } + }, + "AssistantMessageWithoutToolCalls": { + "type": "object", + "required": [ + "role", + "content" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "content": { + "$ref": "#/components/schemas/TextContent" + } + }, + "not": { + "required": [ + "tool_calls" + ] + } + }, + "AssistantMessageWithToolCalls": { + "type": "object", + "required": [ + "role", + "tool_calls" + ], + "properties": { + "role": { + "type": "string", + "enum": [ + "assistant" + ] + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCalls" + } + } + } + }, + "TextContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/TextContentItem" + } + } + ], + "description": "Text content that can be provided either as a simple string or as an array of TextContentItem objects" + }, + "ImageContent": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ImageContentItem" + } + }, + "TextContentItem": { + "type": "object", + "required": [ + "type", + "text" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "text" + ] + }, + "text": { + "type": "string" + } + } + }, + "ImageContentItem": { + "type": "object", + "required": [ + "type", + "image_url" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "image_url" + ] + }, + "image_url": { + "type": "object", + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "format": "uri" + } + } + } + } + }, + "ToolCalls": { + "type": "object", + "required": [ + "function", + "id", + "type" + ], + "properties": { + "function": { + "type": "object", + "required": [ + "name", + "arguments" + ], + "properties": { + "name": { + "type": "string" + }, + "arguments": { + "type": "string" + } + } + }, + "id": { + "type": "string" + }, + "type": { + "enum": [ + "function" + ] + } + }, + "description":"The name and arguments of a function that should be called, as generated by the model." + }, + "ToolDefinition": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "function" + ] + }, + "function": { + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": true + }, + "strict": { + "type": "boolean", + "default": false + } + } + } + }, + "description": "Definition of a tool that can be used by the model" + } + } + } +} +``` \ No newline at end of file diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/edge-ai-reference.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/edge-ai-reference.mdx new file mode 100644 index 0000000000..50bc5a2d2e --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/edge-ai-reference.mdx @@ -0,0 +1,59 @@ +--- +title: Edge AI da Azion +description: >- + O Edge AI da Azion capacita você a construir e implantar aplicações inteligentes que processam dados perto de onde são gerados. +meta_tags: 'edge ai, inteligência artificial, edge computing' +namespace: docs_edge_ai_reference +permalink: /documentacao/produtos/ai/edge-ai/ +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +O **Edge AI** capacita você a construir e implantar aplicações inteligentes que processam dados perto de onde são gerados. Ao combinar inteligência artificial com edge computing, o Edge AI elimina as complexidades de escalabilidade e gerenciamento de infraestrutura, permitindo tomadas de decisão em tempo real e desempenho aprimorado. + +Com o Edge AI da Azion, você pode integrar perfeitamente capacidades de AI em suas aplicações, aproveitando ferramentas como Edge Functions, Edge Application e a API da Azion para criar soluções escaláveis, seguras e eficientes. + +O Edge AI possibilita: + +- **Executar modelos de AI no Edge Runtime**, permitindo que arquiteturas avançadas de AI sejam executadas diretamente no edge para latência mínima e desempenho máximo. +- **Implantar agentes de AI autônomos** que analisam dados e tomam decisões no edge. +- **Processamento em tempo real** com latência reduzida e eficiência aprimorada. +- Tudo como parte de uma **plataforma completa**, incluindo Edge Application, Edge Functions, busca vetorial do Edge SQL e muito mais. + +--- + +## Recursos + +### Modelos disponíveis + +Acesse nosso catálogo de modelos de AI de código aberto que você pode executar diretamente no Runtime da Azion. Esses modelos são otimizados para implantação no edge com requisitos mínimos de recursos. + + + +### Personalização de modelos + +O Edge AI permite que você ajuste, treine e especialize modelos usando **Low-Rank Adaptation (LoRA)**. Esse recurso permite que você otimize modelos para tarefas específicas, garantindo que sejam eficientes e precisos para as necessidades do seu negócio. + +### Agentes de AI + +O Edge AI suporta a implantação de agentes de AI como ReAct (Raciocínio + Ação) no edge, permitindo tarefas avançadas como respostas contextuais, pesquisa semântica e processamento inteligente de dados. + +### Integração com Edge SQL + +Integre o Edge AI com o **Edge SQL** para habilitar capacidades de busca vetorial, permitindo consultas semânticas e busca híbrida. Essa integração aprimora aplicativos alimentados por AI, fornecendo resultados precisos e contextualmente relevantes e suportando implementações eficientes de Retrieval Augmented Generation (RAG). + +--- + +## Produtos relacionados + +- [Edge Application](/pt-br/documentacao/produtos/build/edge-application/): construa aplicações que executam diretamente na rede distribuída da Azion, oferecendo desempenho e opções de personalização excepcionais. +- [Edge Functions](/pt-br/documentacao/produtos/build/edge-application/edge-functions/): execute código mais próximo dos usuários finais, melhorando o desempenho e permitindo lógica personalizada para lidar com requisições e respostas. +- [Edge SQL](/pt-br/documentacao/produtos/store/edge-sql/): uma solução SQL edge-native projetada para aplicações serverless, fornecendo capacidades de armazenamento e consulta de dados no edge. +- [Vector Search](/pt-br/documentacao/produtos/store/edge-sql/vector-search/): ative motores de busca semântica e recomendações impulsionadas por AI através de embeddings vetoriais no edge. + +--- + +Explore exemplos práticos de como implementar soluções de AI com a Azion: + + + \ No newline at end of file diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/modelos.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/modelos.mdx new file mode 100644 index 0000000000..e723dc9f90 --- /dev/null +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/edge-ai/modelos.mdx @@ -0,0 +1,58 @@ +--- +title: Modelos do Edge AI da Azion +description: >- + O Edge AI oferece uma variedade de modelos otimizados para o edge para vários domínios de AI, garantindo implantação e desempenho eficientes. +meta_tags: 'edge ai, modelos ai, inteligência artificial, edge computing' +namespace: docs_edge_ai_models +permalink: /documentacao/produtos/ai/edge-ai/modelos/ +--- + +import LinkButton from 'azion-webkit/linkbutton'; + +Os modelos otimizados para o edge da Azion abrangem múltiplos domínios de AI, incluindo geração de texto, análise de imagem, embeddings e mais. Cada modelo é projetado para equilibrar o desempenho e a eficiência de recursos para implantação no edge. + +Esta página fornece uma lista de modelos disponíveis para uso no **Edge AI**. Para saber mais, visite a página de referência do [Edge AI](/pt-br/documentacao/produtos/edge-ai/edge-ai-reference/). + +## Modelos disponíveis + +### E5 Mistral 7B Instruct + +O modelo E5 Mistral 7B Instruct é otimizado para tarefas de embedding de texto em inglês, com capacidades para processamento multilíngue, customização flexível e manipulação de sequências de entrada longas, tornando-o adequado para aplicações complexas de processamento de linguagem natural. + + + +### Mistral 3 Small (24B AWQ) + +Este é um modelo de linguagem que, embora compacto, oferece capacidades comparáveis às de modelos maiores. É ideal para agentes conversacionais, function calling, ajuste fino e inferência local com dados sensíveis. + + + +### Gemma 3 + +Gemma 3 é um modelo projetado para implantação rápida em dispositivos, oferecendo capacidades avançadas como suporte multilíngue, raciocínio textual e visual, janelas de contexto expandidas, chamada de função e modelos quantizados para alto desempenho. + + + +### BAAI/bge-reranker-v2-m3 + +Um modelo de reranking leve com fortes capacidades multilíngues. Ele é fácil de implantar, com inferência rápida. + + + +### Florence 2 Large + +Florence 2 é um modelo fundacional de visão avançado que utiliza uma abordagem baseada em prompts e um conjunto de dados massivo para se destacar em várias tarefas de visão e linguagem de visão. + + + +### Qwen2.5 VL AWQ 3B + +Um Vision Languagem Model (VLM) que oferece capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeos longos, localização visual e geração de saída estruturada. Suporta 3 bilhões de parâmetros. + + + +### Qwen2.5 VL AWQ 7B + +Um Vision Languagem Model (VLM) que suporta 7 bilhões de parâmetros, oferecendo capacidades avançadas como análise visual, raciocínio de agente, compreensão de vídeo longo, localização visual e geração de saída estruturada. + + diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/edge-sql.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/edge-sql.mdx index c37f9c800d..65d47ff188 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/edge-sql.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/edge-sql.mdx @@ -114,6 +114,8 @@ curl --location 'https://api.azion.com/v4/edge_sql/databases/{id_database}/query [Vector Search](/pt-br/documentacao/produtos/store/edge-sql/vector-search/) é um recurso do **Edge SQL da Azion** que permite aos clientes implementar mecanismos de busca semântica. Enquanto os modelos de busca tradicionais visam encontrar correspondências exatas, como correspondências de palavras-chave, os modelos de busca vetorial usam algoritmos especializados para identificar itens semelhantes com base em suas representações matemáticas, ou embeddings vetoriais. +Esta funcionalidade se integra nativamente com [Edge AI](/pt-br/documentacao/produtos/ai/edge-ai/) para impulsionar implementações de RAG sem infraestrutura adicional, combinando capacidades SQL e vetoriais em um único banco de dados. O sistema suporta a integração do **LangChain Vector Store** para armazenamento de documentos e do **LangChain Retriever** para busca híbrida avançada que combina capacidades de busca vetorial e de texto completo. + --- ## Interagindo com o Edge SQL via terminal diff --git a/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/vector-search.mdx b/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/vector-search.mdx index b80a7e80bf..212f986f2f 100644 --- a/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/vector-search.mdx +++ b/src/content/docs/pt-br/pages/menu-principal/referencia/store/edge-sql/vector-search.mdx @@ -16,7 +16,7 @@ Ao usar o Vector Search, você pode implementar vários casos de uso: - Melhorar sistemas de busca e oferecer recomendações personalizadas, encontrando itens com características semelhantes ou com base nas preferências dos usuários, como produtos relacionados em e-commerce ou conteúdo em plataformas de streaming. - Criar embeddings de texto para buscar texto semanticamente semelhante, onde palavras ou frases são representadas como vetores. -- Construir aplicações baseadas em IA, aproveitando o Processamento de Linguagem Natural (NLP) para assistentes de voz e chatbots. +- Construir aplicações baseadas em IA, aproveitando o Processamento de Linguagem Natural (NLP) para assistentes de voz e chatbots, com integração nativa ao [Edge AI](/pt-br/documentacao/produtos/ai/edge-ai/) que habilita aplicações poderosas de Retrieval-Augmented Generation (RAG) usando frameworks como LangChain e LangGraph diretamente no edge. Distribuído pela rede global de edge da Azion, esse recurso permite resultados de busca mais relevantes, recomendações em tempo real e insights, reduzindo drasticamente a latência e melhorando a satisfação do usuário. Tudo isso enquanto mantém a localidade dos dados e reduz a dependência do banco de dados centralizado. @@ -28,6 +28,7 @@ Distribuído pela rede global de edge da Azion, esse recurso permite resultados |---|---| | Implemente o Vector Search| Guia explicando os fundamentos da [implementação do Vector Search](/pt-br/documentacao/produtos/guias/edge-sql-vector-search/) | | Conheça o Edge SQL da Azion e suas funcionalidades | [Referência do Edge SQL](/pt-br/documentacao/produtos/store/edge-sql/) | +| Integre com Edge AI | Saiba mais sobre as [capacidades do Edge AI](/pt-br/documentacao/produtos/ai/edge-ai/) | --- diff --git a/src/i18n/en/nav.ts b/src/i18n/en/nav.ts index 36beca1171..b2cef58b29 100644 --- a/src/i18n/en/nav.ts +++ b/src/i18n/en/nav.ts @@ -174,6 +174,12 @@ export default [ ] }, + { + text: 'Artificial Intelligence', header: true, type: 'learn', key: 'aiRef', items: [ + { text: 'Edge AI', header: true, anchor: true, type: 'learn', key: 'reference/edgeAI', slug: '/documentation/products/ai/edge-ai/' }, + { text: 'Models', header: true, anchor: true, type: 'learn', key: 'reference/models', slug: '/documentation/products/ai/edge-ai/models/' }, + ] + }, { text: 'Work with domains', header: true, type: 'learn', key: 'menu.domains', items: [ { text: 'Configure a domain', header: true, anchor: true, type: 'learn', key: 'configureDomain', slug: '/documentation/products/guides/configure-a-domain/' }, diff --git a/src/i18n/pt-br/nav.ts b/src/i18n/pt-br/nav.ts index aabb71392a..a49ff41cd3 100644 --- a/src/i18n/pt-br/nav.ts +++ b/src/i18n/pt-br/nav.ts @@ -213,6 +213,10 @@ export default NavDictionary([ { text: 'Real-Time Metrics', key: 'reference/realTimeMetrics', slug: '/documentacao/produtos/observe/real-time-metrics/' }, { text: 'Real-Time Metrics Histórico', key: 'reference/historicalRealTimeMetrics', slug: '/documentacao/produtos/observe/real-time-metrics-historico/' }, + { text: 'Inteligência Artificial', key: 'aiRef'}, + { text: 'Edge AI', key: 'reference/edgeAI', slug: '/documentacao/produtos/ai/edge-ai/' }, + { text: 'Modelos', key: 'reference/models', slug: '/documentacao/produtos/ai/edge-ai/modelos/' }, + { text: 'Marketplace', key: 'mktpRef' }, { text: 'Marketplace', key: 'mktp', slug: '/documentacao/produtos/marketplace/' }, { text: 'Permissões', key: 'mktp/permissions', slug: '/documentacao/produtos/guias/permissoes-marketplace/' },