Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adds PDF support #11

Merged
merged 1 commit into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions Playground/Playground/ViewModels/AppViewModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,25 @@ final class AppViewModel {
var temperature = 0.5

init() {
if let existingApiKey = UserDefaults.standard.string(forKey: "apiKey") {
self.apiKey = existingApiKey
}

fetchModels()
configureChat()
fetchModels()
}

func setHeaders(_ headers: [String: String]) {
chat = LLMChatAnthropic(apiKey: apiKey, headers: headers)
}

func saveSettings() {
UserDefaults.standard.set(apiKey, forKey: "apiKey")

if let newApiKey = UserDefaults.standard.string(forKey: "apiKey") {
self.apiKey = newApiKey
}

configureChat()
}

private func configureChat() {
chat = LLMChatAnthropic(apiKey: apiKey, headers: ["anthropic-beta": "prompt-caching-2024-07-31"])
if let apiKey = UserDefaults.standard.string(forKey: "apiKey") {
self.apiKey = apiKey
}

chat = LLMChatAnthropic(apiKey: apiKey)
}

private func fetchModels() {
Expand Down
8 changes: 7 additions & 1 deletion Playground/Playground/Views/AppView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@ struct AppView: View {
NavigationLink("Tool Use") {
ToolUseView()
}

}

Section("Beta") {
NavigationLink("Prompt Caching") {
PromptCachingView()
}

NavigationLink("PDF Support") {
PDFSupportView()
}
}
}
}
Expand Down
125 changes: 125 additions & 0 deletions Playground/Playground/Views/PDFSupportView.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//
// PDFSupportView.swift
// Playground
//
// Created by Kevin Hermawan on 11/3/24.
//

import SwiftUI
import LLMChatAnthropic

struct PDFSupportView: View {
@Environment(AppViewModel.self) private var viewModel
@State private var isPreferencesPresented: Bool = false

@State private var document: String = "https://arxiv.org/pdf/1706.03762"
@State private var prompt: String = "Explain this document"

@State private var response: String = ""
@State private var inputTokens: Int = 0
@State private var outputTokens: Int = 0
@State private var totalTokens: Int = 0

var body: some View {
@Bindable var viewModelBindable = viewModel

VStack {
Form {
Section("Prompts") {
TextField("Document", text: $document)
TextField("Prompt", text: $prompt)
}

Section("Response") {
Text(response)
}

UsageSection(inputTokens: inputTokens, outputTokens: outputTokens, totalTokens: totalTokens)
}

VStack {
SendButton(stream: viewModel.stream, onSend: onSend, onStream: onStream)
}
}
.onAppear {
viewModel.setHeaders(["anthropic-beta": "pdfs-2024-09-25"])
}
.toolbar {
ToolbarItem(placement: .principal) {
NavigationTitle("PDF Support")
}

ToolbarItem(placement: .primaryAction) {
Button("Preferences", systemImage: "gearshape", action: { isPreferencesPresented.toggle() })
}
}
.sheet(isPresented: $isPreferencesPresented) {
PreferencesView()
}
}

private func onSend() {
clear()

let messages = [
ChatMessage(role: .system, content: viewModel.systemPrompt),
ChatMessage(role: .user, content: [.text(prompt), .document(document)])
]

let options = ChatOptions(temperature: viewModel.temperature)

Task {
do {
let completion = try await viewModel.chat.send(model: viewModel.selectedModel, messages: messages, options: options)

if let text = completion.content.first?.text {
self.response = text
}

if let usage = completion.usage {
self.inputTokens = usage.inputTokens
self.outputTokens = usage.outputTokens
self.totalTokens = usage.totalTokens
}
} catch {
print(String(describing: error))
}
}
}

private func onStream() {
clear()

let messages = [
ChatMessage(role: .system, content: viewModel.systemPrompt),
ChatMessage(role: .user, content: [.text(prompt), .document(document)])
]

let options = ChatOptions(temperature: viewModel.temperature)

Task {
do {
for try await chunk in viewModel.chat.stream(model: viewModel.selectedModel, messages: messages, options: options) {
if let text = chunk.delta?.text {
self.response += text
}

if let usage = chunk.usage {
self.inputTokens = usage.inputTokens
self.outputTokens = usage.outputTokens
self.totalTokens = usage.totalTokens
}
}
} catch {
print(String(describing: error))
}
}
}

private func clear() {
self.response = ""
self.inputTokens = 0
self.outputTokens = 0
self.totalTokens = 0
}
}
3 changes: 3 additions & 0 deletions Playground/Playground/Views/PromptCachingView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ struct PromptCachingView: View {
SendButton(stream: viewModel.stream, onSend: onSend, onStream: onStream)
}
}
.onAppear {
viewModel.setHeaders(["anthropic-beta": "prompt-caching-2024-07-31"])
}
.toolbar {
ToolbarItem(placement: .principal) {
NavigationTitle("Prompt Caching")
Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,31 @@ let task = Task {

To learn more about prompt caching, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).

#### PDF Support (Beta)

```swift
let chat = LLMChatAnthropic(
apiKey: "<YOUR_ANTHROPIC_API_KEY>",
headers: ["anthropic-beta": "pdfs-2024-09-25"] // Required
)

let messages = [
ChatMessage(role: .user, content: [.text("Explain this document"), .document(document)])
]

let task = Task {
do {
let completion = try await chat.send(model: "claude-3-5-sonnet", messages: messages)

print(completion.content.first?.text ?? "No response")
} catch {
print(String(describing: error))
}
}
```

To learn more about PDF support, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/pdf-support).

### Error Handling

`LLMChatAnthropic` provides structured error handling through the `LLMChatAnthropicError` enum. This enum contains three cases that represent different types of errors you might encounter:
Expand Down
34 changes: 26 additions & 8 deletions Sources/LLMChatAnthropic/ChatMessage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ public struct ChatMessage: Encodable, Sendable {
/// The role of the participant in the chat conversation.
public let role: Role

/// The content of the message, which can be text or image.
/// The content of the message, which can be text, image, or document.
public let content: [Content]

/// The cache control settings for the message. Only applicable when the role is `system`.
/// The cache control settings for the message.
public var cacheControl: CacheControl?

/// An enum that represents the role of a participant in the chat.
Expand All @@ -33,6 +33,9 @@ public struct ChatMessage: Encodable, Sendable {
/// A case that represents image content.
case image(String)

/// A case that represents document content.
case document(String)

public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)

Expand All @@ -45,7 +48,7 @@ public struct ChatMessage: Encodable, Sendable {
var sourceContainer = container.nestedContainer(keyedBy: SourceCodingKeys.self, forKey: .source)

if imageString.hasPrefix("http://") || imageString.hasPrefix("https://") {
let (base64String, mediaType) = Content.convertImageUrlToBase64(url: imageString)
let (base64String, mediaType) = Content.convertFileToBase64(url: imageString)
try sourceContainer.encode("base64", forKey: .type)
try sourceContainer.encode(mediaType, forKey: .mediaType)
try sourceContainer.encode(base64String, forKey: .data)
Expand All @@ -55,6 +58,20 @@ public struct ChatMessage: Encodable, Sendable {
try sourceContainer.encode(mediaType, forKey: .mediaType)
try sourceContainer.encode(imageString, forKey: .data)
}
case .document(let documentString):
try container.encode("document", forKey: .type)
var sourceContainer = container.nestedContainer(keyedBy: SourceCodingKeys.self, forKey: .source)

if documentString.hasPrefix("http://") || documentString.hasPrefix("https://") {
let (base64String, mediaType) = Content.convertFileToBase64(url: documentString)
try sourceContainer.encode("base64", forKey: .type)
try sourceContainer.encode(mediaType, forKey: .mediaType)
try sourceContainer.encode(base64String, forKey: .data)
} else {
try sourceContainer.encode("base64", forKey: .type)
try sourceContainer.encode("application/pdf", forKey: .mediaType)
try sourceContainer.encode(documentString, forKey: .data)
}
}
}

Expand All @@ -66,13 +83,13 @@ public struct ChatMessage: Encodable, Sendable {
case type, mediaType = "media_type", data
}

private static func convertImageUrlToBase64(url: String) -> (String, String) {
guard let imageUrl = URL(string: url), let imageData = try? Data(contentsOf: imageUrl) else {
private static func convertFileToBase64(url: String) -> (String, String) {
guard let fileUrl = URL(string: url), let fileData = try? Data(contentsOf: fileUrl) else {
return ("", "")
}

let base64String = imageData.base64EncodedString()
let mediaType = detectMediaType(from: imageData)
let base64String = fileData.base64EncodedString()
let mediaType = detectMediaType(from: fileData)

return (base64String, mediaType)
}
Expand All @@ -96,6 +113,8 @@ public struct ChatMessage: Encodable, Sendable {
return "image/gif"
} else if bytes.starts(with: [0x52, 0x49, 0x46, 0x46]) && String(data: data.subdata(in: 8..<12), encoding: .ascii) == "WEBP" {
return "image/webp"
} else if bytes.starts(with: [0x25, 0x50, 0x44, 0x46]) {
return "application/pdf"
} else {
return ""
}
Expand All @@ -120,7 +139,6 @@ public struct ChatMessage: Encodable, Sendable {
}
}


/// Creates a new instance of ``ChatMessage``.
/// - Parameters:
/// - role: The role of the participant.
Expand Down
25 changes: 25 additions & 0 deletions Sources/LLMChatAnthropic/Documentation.docc/Documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,31 @@ let task = Task {

To learn more about prompt caching, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).

#### PDF Support (Beta)

```swift
let chat = LLMChatAnthropic(
apiKey: "<YOUR_ANTHROPIC_API_KEY>",
headers: ["anthropic-beta": "pdfs-2024-09-25"] // Required
)

let messages = [
ChatMessage(role: .user, content: [.text("Explain this document"), .document(document)])
]

let task = Task {
do {
let completion = try await chat.send(model: "claude-3-5-sonnet", messages: messages)

print(completion.content.first?.text ?? "No response")
} catch {
print(String(describing: error))
}
}
```

To learn more about PDF support, check out the [Anthropic documentation](https://docs.anthropic.com/en/docs/build-with-claude/pdf-support).

### Error Handling

``LLMChatAnthropic`` provides structured error handling through the ``LLMChatAnthropicError`` enum. This enum contains three cases that represent different types of errors you might encounter:
Expand Down