|
| 1 | +import Foundation |
| 2 | + |
| 3 | +/// A protocol for items that can be searched |
| 4 | +protocol Searchable { |
| 5 | + /// Extract searchable fields with their weights |
| 6 | + func searchableFields() -> [SearchableField] |
| 7 | +} |
| 8 | + |
| 9 | +/// A field that can be searched with an associated weight |
| 10 | +struct SearchableField { |
| 11 | + let content: String |
| 12 | + let weight: Double |
| 13 | + let allowFuzzyMatch: Bool |
| 14 | + |
| 15 | + init(content: String, weight: Double, allowFuzzyMatch: Bool = true) { |
| 16 | + self.content = content |
| 17 | + self.weight = weight |
| 18 | + self.allowFuzzyMatch = allowFuzzyMatch |
| 19 | + } |
| 20 | +} |
| 21 | + |
| 22 | +/// Configuration for the search engine |
| 23 | +struct SearchConfiguration { |
| 24 | + /// Maximum allowed edit distance for fuzzy matching |
| 25 | + let maxEditDistance: Int |
| 26 | + |
| 27 | + /// Minimum similarity threshold (0-1) for fuzzy matches |
| 28 | + let minSimilarityThreshold: Double |
| 29 | + |
| 30 | + /// Multiplier for exact matches |
| 31 | + let exactMatchMultiplier: Double |
| 32 | + |
| 33 | + /// Multiplier for prefix matches |
| 34 | + let prefixMatchMultiplier: Double |
| 35 | + |
| 36 | + /// Multiplier for word prefix matches |
| 37 | + let wordPrefixMatchMultiplier: Double |
| 38 | + |
| 39 | + /// Multiplier for fuzzy matches (applied to similarity score) |
| 40 | + let fuzzyMatchMultiplier: Double |
| 41 | + |
| 42 | + static let `default` = SearchConfiguration( |
| 43 | + maxEditDistance: 2, |
| 44 | + minSimilarityThreshold: 0.7, |
| 45 | + exactMatchMultiplier: 2.0, |
| 46 | + prefixMatchMultiplier: 1.5, |
| 47 | + wordPrefixMatchMultiplier: 0.8, |
| 48 | + fuzzyMatchMultiplier: 0.6 |
| 49 | + ) |
| 50 | +} |
| 51 | + |
| 52 | +/// A generic search engine that performs weighted fuzzy search |
| 53 | +struct SearchEngine<Item: Searchable> { |
| 54 | + |
| 55 | + /// Search result with relevance score |
| 56 | + struct SearchResult { |
| 57 | + let item: Item |
| 58 | + let score: Double |
| 59 | + } |
| 60 | + |
| 61 | + let configuration: SearchConfiguration |
| 62 | + |
| 63 | + init(configuration: SearchConfiguration = .default) { |
| 64 | + self.configuration = configuration |
| 65 | + } |
| 66 | + |
| 67 | + /// Search items with weighted fuzzy matching |
| 68 | + func search(query: String, in items: [Item]) -> [Item] { |
| 69 | + let normalizedQuery = query.lowercased().trimmingCharacters(in: .whitespacesAndNewlines) |
| 70 | + |
| 71 | + // Empty query returns all items |
| 72 | + guard !normalizedQuery.isEmpty else { |
| 73 | + return items |
| 74 | + } |
| 75 | + |
| 76 | + // Calculate scores for all items |
| 77 | + let results: [SearchResult] = items.compactMap { item in |
| 78 | + let score = calculateScore(for: item, query: normalizedQuery) |
| 79 | + return score > 0 ? SearchResult(item: item, score: score) : nil |
| 80 | + } |
| 81 | + |
| 82 | + // Sort by score (highest first) and return items |
| 83 | + return results |
| 84 | + .sorted { $0.score > $1.score } |
| 85 | + .map { $0.item } |
| 86 | + } |
| 87 | + |
| 88 | + /// Calculate weighted score for an item based on query match |
| 89 | + private func calculateScore(for item: Item, query: String) -> Double { |
| 90 | + let fields = item.searchableFields() |
| 91 | + |
| 92 | + return fields.reduce(0.0) { totalScore, field in |
| 93 | + totalScore + calculateFieldScore( |
| 94 | + field: field.content.lowercased(), |
| 95 | + query: query, |
| 96 | + weight: field.weight, |
| 97 | + allowFuzzy: field.allowFuzzyMatch |
| 98 | + ) |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + /// Calculate score for a single field |
| 103 | + private func calculateFieldScore(field: String, query: String, weight: Double, allowFuzzy: Bool) -> Double { |
| 104 | + // Exact match |
| 105 | + if field == query { |
| 106 | + return weight * configuration.exactMatchMultiplier |
| 107 | + } |
| 108 | + |
| 109 | + // Contains match |
| 110 | + if field.contains(query) { |
| 111 | + // Higher score if it starts with the query |
| 112 | + if field.hasPrefix(query) { |
| 113 | + return weight * configuration.prefixMatchMultiplier |
| 114 | + } |
| 115 | + return weight |
| 116 | + } |
| 117 | + |
| 118 | + // Fuzzy match if allowed |
| 119 | + if allowFuzzy { |
| 120 | + // Check each word in the field |
| 121 | + let fieldWords = field.split(separator: " ").map(String.init) |
| 122 | + for word in fieldWords { |
| 123 | + // Word starts with query |
| 124 | + if word.hasPrefix(query) { |
| 125 | + return weight * configuration.wordPrefixMatchMultiplier |
| 126 | + } |
| 127 | + |
| 128 | + // Calculate similarity |
| 129 | + let similarity = calculateSimilarity(word, query) |
| 130 | + if similarity >= configuration.minSimilarityThreshold { |
| 131 | + return weight * similarity * configuration.fuzzyMatchMultiplier |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + // Try full field fuzzy match for short queries |
| 136 | + if query.count <= 10 { |
| 137 | + let similarity = calculateSimilarity(field, query) |
| 138 | + if similarity >= configuration.minSimilarityThreshold { |
| 139 | + return weight * similarity * configuration.fuzzyMatchMultiplier * 0.7 |
| 140 | + } |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + return 0 |
| 145 | + } |
| 146 | + |
| 147 | + /// Calculate similarity between two strings using normalized edit distance |
| 148 | + private func calculateSimilarity(_ str1: String, _ str2: String) -> Double { |
| 149 | + let distance = levenshteinDistance(str1, str2) |
| 150 | + let maxLength = max(str1.count, str2.count) |
| 151 | + |
| 152 | + // Don't allow too many edits relative to string length |
| 153 | + if distance > min(configuration.maxEditDistance, maxLength / 3) { |
| 154 | + return 0 |
| 155 | + } |
| 156 | + |
| 157 | + return 1.0 - (Double(distance) / Double(maxLength)) |
| 158 | + } |
| 159 | + |
| 160 | + /// Calculate Levenshtein edit distance between two strings |
| 161 | + private func levenshteinDistance(_ str1: String, _ str2: String) -> Int { |
| 162 | + let str1Array = Array(str1) |
| 163 | + let str2Array = Array(str2) |
| 164 | + |
| 165 | + // Create matrix |
| 166 | + var matrix = Array(repeating: Array(repeating: 0, count: str2Array.count + 1), count: str1Array.count + 1) |
| 167 | + |
| 168 | + // Initialize first row and column |
| 169 | + for i in 0...str1Array.count { |
| 170 | + matrix[i][0] = i |
| 171 | + } |
| 172 | + for j in 0...str2Array.count { |
| 173 | + matrix[0][j] = j |
| 174 | + } |
| 175 | + |
| 176 | + // Fill matrix |
| 177 | + for i in 1...str1Array.count { |
| 178 | + for j in 1...str2Array.count { |
| 179 | + let cost = str1Array[i-1] == str2Array[j-1] ? 0 : 1 |
| 180 | + matrix[i][j] = min( |
| 181 | + matrix[i-1][j] + 1, // deletion |
| 182 | + matrix[i][j-1] + 1, // insertion |
| 183 | + matrix[i-1][j-1] + cost // substitution |
| 184 | + ) |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + return matrix[str1Array.count][str2Array.count] |
| 189 | + } |
| 190 | +} |
0 commit comments