shareup
diff --git a/‎Sources/SHLLM/ActorLock.swift
+64 b/‎Sources/SHLLM/ActorLock.swift
+64
diff --git a/‎Sources/SHLLM/LLM.swift
+198 b/‎Sources/SHLLM/LLM.swift
+198
diff --git a/‎Sources/SHLLM/ModelProtocol.swift
+17 b/‎Sources/SHLLM/ModelProtocol.swift
+17
diff --git a/‎Sources/SHLLM/Models/CodeLlama.swift
+26 b/‎Sources/SHLLM/Models/CodeLlama.swift
+26
diff --git a/‎Sources/SHLLM/Models/DeepSeekR1.swift
+26 b/‎Sources/SHLLM/Models/DeepSeekR1.swift
+26
diff --git a/‎Sources/SHLLM/Models/Gemma.swift
+26 b/‎Sources/SHLLM/Models/Gemma.swift
+26
diff --git a/‎Sources/SHLLM/Models/Gemma2-2B.swift
+26 b/‎Sources/SHLLM/Models/Gemma2-2B.swift
+26
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See http://swift.org/LICENSE.txt for license information
+// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+/// Lock intended for use within an actor in order to prevent reentrancy in actor methods which
+/// themselves contain suspension points.
+public actor ActorLock {
+    private var busy = false
+    private var queue: ArraySlice<CheckedContinuation<Void, Never>> = []
+
+    public init() {}
+
+    public func withLock<
+        T: Sendable,
+        E
+    >(_ body: @Sendable () async throws(E) -> T) async throws(E) -> T {
+        while busy {
+            await withCheckedContinuation { cc in
+                queue.append(cc)
+            }
+        }
+        busy = true
+        defer {
+            busy = false
+            if let next = queue.popFirst() {
+                next.resume(returning: ())
+            } else {
+                queue = [] // reallocate buffer if it's empty
+            }
+        }
+        return try await body()
+    }
+}
+
+/// Small concurrency-compatible wrapper to provide only locked, non-reentrant access to its
+/// value.
+public final class AsyncLockedValue<Wrapped: Sendable> {
+    @usableFromInline let lock = ActorLock()
+    /// Don't use this from outside this class. Is internal to be inlinable.
+    @usableFromInline var value: Wrapped
+    public init(_ value: Wrapped) {
+        self.value = value
+    }
+
+    @discardableResult @inlinable
+    public func withLock<
+        Result: Sendable,
+        E
+    >(_ block: @Sendable (inout Wrapped) async throws(E) -> Result) async throws(E)
+        -> Result
+    {
+        try await lock.withLock { () throws(E) -> Result in try await block(&value) }
+    }
+}
+
+extension AsyncLockedValue: @unchecked Sendable where Wrapped: Sendable {}
@@ -0,0 +1,198 @@
+import Foundation
+import struct Hub.Config
+import MLX
+import MLXLLM
+import MLXLMCommon
+import Tokenizers
+
+public final class LLM {
+    private let directory: URL
+    private let context: ModelContext
+    private let configuration: ModelConfiguration
+
+    static func cohere(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: CohereModel.init
+        )
+    }
+
+    static func gemma(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: GemmaModel.init
+        )
+    }
+
+    static func gemma2(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: Gemma2Model.init
+        )
+    }
+
+    static func internLM2(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: InternLM2Model.init
+        )
+    }
+
+    static func llama(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: LlamaModel.init
+        )
+    }
+
+    static func openELM(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: OpenELMModel.init
+        )
+    }
+
+    static func phi(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: PhiModel.init
+        )
+    }
+
+    static func phi3(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: Phi3Model.init
+        )
+    }
+
+    static func phiMoE(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: PhiMoEModel.init
+        )
+    }
+
+    static func qwen2(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: Qwen2Model.init
+        )
+    }
+
+    static func smolLM(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: LlamaModel.init
+        )
+    }
+
+    static func starcoder2(directory: URL) async throws -> LLM {
+        try await Self(
+            directory: directory,
+            modelInit: Starcoder2Model.init
+        )
+    }
+
+    private init<Configuration: Decodable>(
+        directory: URL,
+        modelInit: (Configuration) -> some LanguageModel
+    ) async throws {
+        self.directory = directory
+        let decoder = JSONDecoder()
+
+        let config = try Data(
+            contentsOf: directory.appending(
+                path: "config.json",
+                directoryHint: .notDirectory
+            )
+        )
+
+        let baseConfig = try decoder.decode(
+            BaseConfiguration.self,
+            from: config
+        )
+
+        let modelConfig = try decoder.decode(
+            Configuration.self,
+            from: config
+        )
+        let model = modelInit(modelConfig)
+
+        try loadWeights(
+            modelDirectory: directory,
+            model: model,
+            quantization: baseConfig.quantization
+        )
+
+        guard let tokenizerConfigJSON = try JSONSerialization.jsonObject(
+            with: try Data(contentsOf: directory.appending(
+                path: "tokenizer_config.json",
+                directoryHint: .notDirectory
+            ))
+        ) as? [NSString: Any] else {
+            throw SHLLMError.invalidOrMissingConfig(
+                "tokenizer_config.json"
+            )
+        }
+
+        let tokenizerConfig = Config(tokenizerConfigJSON)
+
+        guard let tokenizerDataJSON = try JSONSerialization.jsonObject(
+            with: try Data(contentsOf: directory.appending(
+                path: "tokenizer.json",
+                directoryHint: .notDirectory
+            ))
+        ) as? [NSString: Any] else {
+            throw SHLLMError.invalidOrMissingConfig(
+                "tokenizer.json"
+            )
+        }
+
+        let tokenizerData = Config(tokenizerDataJSON)
+
+        let tokenizer = try PreTrainedTokenizer(
+            tokenizerConfig: tokenizerConfig,
+            tokenizerData: tokenizerData
+        )
+
+        configuration = ModelConfiguration(
+            directory: directory,
+            overrideTokenizer: nil,
+            defaultPrompt: "You are a helpful assistant."
+        )
+
+        context = ModelContext(
+            configuration: configuration,
+            model: model,
+            processor: LLMUserInputProcessor(
+                tokenizer: tokenizer,
+                configuration: configuration
+            ),
+            tokenizer: tokenizer
+        )
+    }
+}
+
+extension LLM {
+    func request(
+        _ input: UserInput,
+        maxTokenCount: Int = 1024 * 1024
+    ) async throws -> String {
+        let input = try await context.processor.prepare(input: input)
+
+        let result = try MLXLMCommon.generate(
+            input: input,
+            parameters: .init(),
+            context: context
+        ) { tokens in
+            if tokens.count >= maxTokenCount {
+                .stop
+            } else {
+                .more
+            }
+        }
+
+        return result.output
+    }
+}
@@ -0,0 +1,17 @@
+import Foundation
+import MLXLMCommon
+
+public protocol ModelProtocol {
+    var llm: AsyncLockedValue<LLM> { get async }
+}
+
+public extension ModelProtocol {
+    func request(
+        _ input: UserInput,
+        maxTokenCount: Int = 1024 * 1024
+    ) async throws -> String {
+        try await llm.withLock { llm in
+            try await llm.request(input, maxTokenCount: maxTokenCount)
+        }
+    }
+}
@@ -0,0 +1,26 @@
+import Foundation
+
+public actor CodeLlama: ModelProtocol {
+    public let llm: AsyncLockedValue<LLM>
+
+    public init(directory: URL) async throws {
+        let llm = try await LLM.llama(directory: directory)
+        self.llm = .init(llm)
+    }
+}
+
+extension CodeLlama {
+    static var bundleDirectory: URL {
+        get throws {
+            let dir = "CodeLlama-13b-Instruct-hf-4bit-MLX"
+            guard let url = Bundle.shllm.url(
+                forResource: dir,
+                withExtension: nil,
+                subdirectory: "Resources"
+            ) else {
+                throw SHLLMError.directoryNotFound(dir)
+            }
+            return url
+        }
+    }
+}
@@ -0,0 +1,26 @@
+import Foundation
+
+public actor DeepSeekR1: ModelProtocol {
+    public let llm: AsyncLockedValue<LLM>
+
+    public init(directory: URL) async throws {
+        let llm = try await LLM.qwen2(directory: directory)
+        self.llm = .init(llm)
+    }
+}
+
+extension DeepSeekR1 {
+    static var bundleDirectory: URL {
+        get throws {
+            let dir = "DeepSeek-R1-Distill-Qwen-7B-4bit"
+            guard let url = Bundle.shllm.url(
+                forResource: dir,
+                withExtension: nil,
+                subdirectory: "Resources"
+            ) else {
+                throw SHLLMError.directoryNotFound(dir)
+            }
+            return url
+        }
+    }
+}
@@ -0,0 +1,26 @@
+import Foundation
+
+public actor Gemma: ModelProtocol {
+    public let llm: AsyncLockedValue<LLM>
+
+    public init(directory: URL) async throws {
+        let llm = try await LLM.gemma(directory: directory)
+        self.llm = .init(llm)
+    }
+}
+
+extension Gemma {
+    static var bundleDirectory: URL {
+        get throws {
+            let dir = "quantized-gemma-2b-it"
+            guard let url = Bundle.shllm.url(
+                forResource: dir,
+                withExtension: nil,
+                subdirectory: "Resources"
+            ) else {
+                throw SHLLMError.directoryNotFound(dir)
+            }
+            return url
+        }
+    }
+}
@@ -0,0 +1,26 @@
+import Foundation
+
+public actor Gemma2_2B: ModelProtocol {
+    public let llm: AsyncLockedValue<LLM>
+
+    public init(directory: URL) async throws {
+        let llm = try await LLM.gemma2(directory: directory)
+        self.llm = .init(llm)
+    }
+}
+
+extension Gemma2_2B {
+    static var bundleDirectory: URL {
+        get throws {
+            let dir = "gemma-2-2b-it-4bit"
+            guard let url = Bundle.shllm.url(
+                forResource: dir,
+                withExtension: nil,
+                subdirectory: "Resources"
+            ) else {
+                throw SHLLMError.directoryNotFound(dir)
+            }
+            return url
+        }
+    }
+}