Note: There were some build issues with the current implementation. Please follow these instructions for a successful implementation.
Based on our work, here's the approach to implement PDF support in macai:
// Add to imports
import PDFKit
// Add to ImageAttachment class
@Published var isPDF: Bool = false
@Published var pdfData: Data?
// In init(url:context:) method
if self.originalFileType == .pdf {
self.isPDF = true
self.loadPDF()
} else {
self.loadImage()
}
// Add PDF loading method
private func loadPDF() {
isLoading = true
DispatchQueue.global(qos: .userInitiated).async { [weak self] in
guard let self = self, let url = self.url else { return }
do {
// Load PDF data
let pdfData = try Data(contentsOf: url)
self.pdfData = pdfData
// Create thumbnail from first page
if let pdfDocument = PDFDocument(url: url),
pdfDocument.pageCount > 0,
let firstPage = pdfDocument.page(at: 0) {
let pdfRect = firstPage.bounds(for: .mediaBox)
let scale: CGFloat = 2.0
let imageSize = NSSize(width: pdfRect.width * scale, height: pdfRect.height * scale)
let thumbnail = NSImage(size: imageSize)
thumbnail.lockFocus()
NSColor.white.set()
NSRect(origin: .zero, size: imageSize).fill()
let context = NSGraphicsContext.current!.cgContext
context.scaleBy(x: scale, y: scale)
firstPage.draw(with: .mediaBox, to: context)
thumbnail.unlockFocus()
self.createThumbnail(from: thumbnail)
self.saveToEntity(pdfData: pdfData)
DispatchQueue.main.async {
self.isLoading = false
}
} else {
throw NSError(
domain: "ImageAttachment",
code: 3,
userInfo: [NSLocalizedDescriptionKey: "Failed to create PDF thumbnail"]
)
}
} catch {
DispatchQueue.main.async {
self.error = error
self.isLoading = false
}
}
}
}
// Update toBase64 method
func toBase64(convertPDFToImage: Bool = false) -> String? {
// For PDFs, conditionally convert to image
if isPDF, let pdfData = self.pdfData {
if convertPDFToImage {
// PDF -> Image conversion for OpenAI
if let pdfDocument = PDFDocument(data: pdfData),
pdfDocument.pageCount > 0,
let firstPage = pdfDocument.page(at: 0) {
let pdfRect = firstPage.bounds(for: .mediaBox)
let scale: CGFloat = 2.0
let imageSize = NSSize(width: pdfRect.width * scale, height: pdfRect.height * scale)
let pdfImage = NSImage(size: imageSize)
pdfImage.lockFocus()
NSColor.white.set()
NSRect(origin: .zero, size: imageSize).fill()
let context = NSGraphicsContext.current!.cgContext
context.scaleBy(x: scale, y: scale)
firstPage.draw(with: .mediaBox, to: context)
pdfImage.unlockFocus()
let resizedImage = resizeImageIfNeeded(pdfImage)
if let tiffData = resizedImage.tiffRepresentation,
let bitmapImage = NSBitmapImageRep(data: tiffData),
let pngData = bitmapImage.representation(using: .png, properties: [:]) {
return pngData.base64EncodedString()
}
}
return nil
} else {
// Return PDF data directly for services that support PDFs
return pdfData.base64EncodedString()
}
}
// For images, convert to JPEG and return as base64
guard let image = self.image else { return nil }
let resizedImage = resizeImageIfNeeded(image)
guard let tiffData = resizedImage.tiffRepresentation,
let bitmapImage = NSBitmapImageRep(data: tiffData),
let jpegData = bitmapImage.representation(using: .jpeg, properties: [.compressionFactor: 0.8])
else {
return nil
}
return jpegData.base64EncodedString()
}
// Add function to check file type
func getFileExtension() -> String {
if isPDF {
return "pdf"
} else {
return getFormatString(from: originalFileType)
}
}Add this method to get the ImageAttachment for a PDF:
// Load the full ImageAttachment object from CoreData for PDF conversion
private func loadImageAttachmentFromCoreData(uuid: UUID) -> ImageAttachment? {
let viewContext = PersistenceController.shared.container.viewContext
let fetchRequest: NSFetchRequest<ImageEntity> = ImageEntity.fetchRequest()
fetchRequest.predicate = NSPredicate(format: "id == %@", uuid as CVarArg)
fetchRequest.fetchLimit = 1
do {
let results = try viewContext.fetch(fetchRequest)
if let imageEntity = results.first {
return ImageAttachment(imageEntity: imageEntity)
}
}
catch {
print("Error fetching ImageAttachment from CoreData: \(error)")
}
return nil
}Update the prepareRequest method to convert PDFs to images for OpenAI:
// When processing message content
if fileType == "pdf" {
// Convert PDF to image and send as PNG
if let attachment = self.loadImageAttachmentFromCoreData(uuid: uuid),
let pdfAsImageBase64 = attachment.toBase64(convertPDFToImage: true) {
contentArray.append([
"type": "image_url",
"image_url": ["url": "data:image/png;base64,\(pdfAsImageBase64)"],
])
// Add a note about the PDF conversion
if contentArray.isEmpty {
contentArray.append([
"type": "text",
"text": "Note: PDF document has been converted to an image."
])
}
}
}- Allow PDFs in file selection dialogs
- Add explanatory messages about PDF conversion when using OpenAI
- Update tooltips to reflect PDF support
- This approach allows PDFs to work with both Claude (which supports PDFs natively) and OpenAI (through conversion to images)
- Make sure to use NSBitmapImageRep.FileType.png instead of .png for correct enum usage
- Convert only the first page of PDFs to images for OpenAI
- Add appropriate error handling throughout the implementation
- Update the UI to give users clear feedback about PDF handling
Test with both OpenAI/ChatGPT and Claude:
- Single page PDF with OpenAI (should show as image)
- Multi-page PDF with OpenAI (should show first page as image)
- PDF with Claude (should send as PDF file)
- Test both file picker and drag-and-drop