|
| 1 | +# A simple, framework agnostic parser for |
| 2 | +# handling multipart/form-data content-type in Nim |
| 3 | +# |
| 4 | +# (c) 2024 George Lemon | MIT License |
| 5 | +# Made by Humans from OpenPeeps |
| 6 | +# https://github.com/supranim/multipart |
| 7 | + |
| 8 | +import std/[os, streams, strutils, |
| 9 | + parseutils, options, oids, sequtils] |
| 10 | + |
| 11 | +import pkg/checksums/md5 |
| 12 | + |
| 13 | +type |
| 14 | + MultipartHeader* = enum |
| 15 | + contentDisposition = "content-disposition" |
| 16 | + contentType = "content-type" |
| 17 | + |
| 18 | + MultipartDataType* = enum |
| 19 | + MultipartFile |
| 20 | + MultipartText |
| 21 | + |
| 22 | + MultipartFileSigantureState* = enum |
| 23 | + stateInvalidMagic |
| 24 | + stateMoreMagic |
| 25 | + stateValidMagic |
| 26 | + |
| 27 | + MultipartHeaderTuple* = tuple[key: MultipartHeader, value: seq[(string, string)]] |
| 28 | + |
| 29 | + MultipartFileCallback* = |
| 30 | + proc(boundary: ptr Boundary, pos: int, c: ptr char): bool {.closure.} |
| 31 | + ## A callback that runs while parsing a `MultipartFile` boundary |
| 32 | + |
| 33 | + MultipartFileCallbackSignature* = proc(boundary: ptr Boundary, pos: int, c: ptr char): MultipartFileSigantureState {.closure.} |
| 34 | + ## A callback to collect magic numbers signature |
| 35 | + ## while writing the temporary file |
| 36 | + |
| 37 | + MultipartTextCallback* = proc(boundary: ptr Boundary, data: ptr string): bool {.closure.} |
| 38 | + ## A callback that returns data of a `MultipartText`. |
| 39 | + ## |
| 40 | + ## This callback can be used for on-the-fly validation of |
| 41 | + ## string-based data from input fields |
| 42 | + |
| 43 | + # BoundaryEndCallback* = proc(boundary: Boundary): bool {.nimcall.} |
| 44 | + # A callback that runs after parsing a boundary |
| 45 | + BoundaryState* = enum |
| 46 | + boundaryInit |
| 47 | + boundaryAdded |
| 48 | + ## marks Boundary as added to `boundaries` sequenece |
| 49 | + boundaryRemoved |
| 50 | + ## can be set by external validators via `MultipartFileCallback` |
| 51 | + ## once invalidated, the `Boundary` will be moved to `invalidBoundaries` |
| 52 | + |
| 53 | + Boundary* = object |
| 54 | + state: BoundaryState |
| 55 | + fieldName*: string |
| 56 | + case dataType*: MultipartDataType |
| 57 | + of MultipartFile: |
| 58 | + fileId*, fileName*, fileType*, filePath*: string |
| 59 | + fileContent*: File |
| 60 | + else: |
| 61 | + value*: string |
| 62 | + |
| 63 | + Multipart* = object |
| 64 | + tmpDirectory: string |
| 65 | + # Will use a temporary path to store files at |
| 66 | + # `getTempDir() / getMD5(getAppDir())` |
| 67 | + boundaryLine: string |
| 68 | + # Holds the boundary line retrieved from a |
| 69 | + # `Content-type` header |
| 70 | + fileCallback*: ptr MultipartFileCallback |
| 71 | + ## A `MultipartFileCallback` that runs while in |
| 72 | + ## `MultipartFile` boundary |
| 73 | + fileSignatureCallback*: ptr MultipartFileCallbackSignature |
| 74 | + ## Collects magic numbers that required for verifying |
| 75 | + ## the file type. The callback must return one of |
| 76 | + ## `MultipartFileSigantureState` states. Use `stateMoreMagic` |
| 77 | + ## to run `fileSignatureCallback` again for colelcting more bytes. |
| 78 | + ## |
| 79 | + ## If the magic numbers are correct use `stateValidMagic` |
| 80 | + ## to stop the callback and continue writing the file. |
| 81 | + ## |
| 82 | + ## `stateInvalidMagic` will mark the boundary as invalid, |
| 83 | + ## skip it to `invalidBoundaries` and stops the callback. |
| 84 | + ## |
| 85 | + ## `stateValidMagic` will continue writing the file on disk |
| 86 | + ## and stops the signature callback |
| 87 | + boundaries: seq[Boundary] |
| 88 | + # A sequence of Boundary objects |
| 89 | + invalidBoundaries*: seq[Boundary] |
| 90 | + # A sequence of removed boundaries |
| 91 | + |
| 92 | + MultipartInvalidHeader* = object of CatchableError |
| 93 | + |
| 94 | +proc parseHeader(line: string): MultipartHeaderTuple = |
| 95 | + result.value = @[] |
| 96 | + var key: string |
| 97 | + var i = 0 |
| 98 | + i = line.parseUntil(key, ':') |
| 99 | + inc(i) # skip : |
| 100 | + result.key = parseEnum[MultipartHeader](key.toLowerAscii) |
| 101 | + if result.key == contentType: |
| 102 | + i += line.skipWhitespace(i) |
| 103 | + result.value.add (line.substr(i), newStringOfCap(0)) |
| 104 | + else: |
| 105 | + var v: string |
| 106 | + while i < line.len and line[i] notin Newlines: |
| 107 | + i += line.skipWhitespace(i) |
| 108 | + i += line.parseUntil(v, {';'}, i) |
| 109 | + if v == "form-data": |
| 110 | + setLen(v, 0) # skip form-data |
| 111 | + else: |
| 112 | + let kv = v.split('=', 1) |
| 113 | + add result.value, (kv[0], kv[1].unescape) |
| 114 | + inc(i) |
| 115 | + |
| 116 | +template skipWhitespaces = |
| 117 | + while true: |
| 118 | + case curr |
| 119 | + of Whitespace: |
| 120 | + curr = body.readChar() |
| 121 | + else: break |
| 122 | + |
| 123 | +template skipNewlines = |
| 124 | + while true: |
| 125 | + case curr |
| 126 | + of '\r', '\n': |
| 127 | + curr = body.readChar |
| 128 | + else: break |
| 129 | + |
| 130 | +const |
| 131 | + contentDispositionLen = len($contentDisposition) |
| 132 | + contentTypeLen = len($contentType) |
| 133 | + |
| 134 | +template runFileCallback(someBoundary) {.dirty.} = |
| 135 | + if mp.fileCallback != nil: |
| 136 | + if mp.fileCallback[](someBoundary, |
| 137 | + someBoundary.fileContent.getFilePos(), curr.addr): |
| 138 | + discard |
| 139 | + else: |
| 140 | + someBoundary.fileContent.close() |
| 141 | + skipUntilNextBoundary = true |
| 142 | + break |
| 143 | + |
| 144 | +template parseBoundary {.dirty.} = |
| 145 | + var currBoundary: string |
| 146 | + add currBoundary, curr |
| 147 | + curr = body.readChar() |
| 148 | + let len = len(boundary) |
| 149 | + add currBoundary, curr |
| 150 | + case curr: |
| 151 | + of '-': |
| 152 | + if body.peekStr(len).startsWith(boundary): |
| 153 | + add currBoundary, body.readStr(len) |
| 154 | + curr = body.readChar() |
| 155 | + skipWhitespaces() |
| 156 | + if body.peekStr(2) == "--": |
| 157 | + while not body.atEnd: |
| 158 | + discard body.readChar() # consume remaining chars |
| 159 | + break |
| 160 | + else: |
| 161 | + var headers: seq[MultipartHeaderTuple] |
| 162 | + while true: |
| 163 | + if "c" & body.peekStr(contentDispositionLen - 1).toLowerAscii == $contentDisposition: |
| 164 | + var heading: string |
| 165 | + add heading, curr |
| 166 | + add heading, body.readStr(contentDispositionLen) |
| 167 | + curr = body.readChar() |
| 168 | + while curr notin Newlines: |
| 169 | + add heading, curr |
| 170 | + curr = body.readChar() |
| 171 | + add headers, parseHeader(heading) |
| 172 | + # curr = body.readChar() # new line |
| 173 | + skipNewlines() |
| 174 | + elif "c" & body.peekStr(contentTypeLen - 1).toLowerAscii == $contentType: |
| 175 | + var heading: string |
| 176 | + add heading, curr |
| 177 | + add heading, body.readStr(contentTypeLen) |
| 178 | + curr = body.readChar() |
| 179 | + while curr notin Newlines: |
| 180 | + add heading, curr |
| 181 | + curr = body.readChar() |
| 182 | + add headers, parseheader(heading) |
| 183 | + skipNewlines() |
| 184 | + else: break |
| 185 | + skipNewlines() |
| 186 | + if prevStreamBoundary.isSome: |
| 187 | + prevStreamBoundary.get[].fileContent.close() |
| 188 | + prevStreamBoundary = none(ptr Boundary) |
| 189 | + if headers.len == 2: |
| 190 | + let fileId = $genOid() |
| 191 | + let filepath = mp.tmpDirectory / fileId |
| 192 | + add mp.boundaries, |
| 193 | + Boundary( |
| 194 | + dataType: MultipartFile, |
| 195 | + fileId: fileId, |
| 196 | + fieldName: headers[0].value[0][1], |
| 197 | + fileName: headers[0].value[1][1], |
| 198 | + fileType: headers[1].value[0][0], |
| 199 | + filePath: filepath, |
| 200 | + fileContent: open(filepath, fmWrite) |
| 201 | + ) |
| 202 | + prevStreamBoundary = some(mp.boundaries[^1].addr) |
| 203 | + write(prevStreamBoundary.get[].fileContent, curr) |
| 204 | + runFileCallback(prevStreamBoundary.get) |
| 205 | + elif headers.len == 1: |
| 206 | + var inputBoundary = |
| 207 | + Boundary( |
| 208 | + dataType: MultipartText, |
| 209 | + fieldName: headers[0].value[0][1] |
| 210 | + ) |
| 211 | + add inputBoundary.value, curr |
| 212 | + add mp.boundaries, inputBoundary |
| 213 | + setLen(currBoundary, 0) |
| 214 | + else: |
| 215 | + if prevStreamBoundary.isSome: |
| 216 | + write(prevStreamBoundary.get[].fileContent, currBoundary) |
| 217 | + else: |
| 218 | + add mp.boundaries[^1].value, currBoundary |
| 219 | + setLen(currBoundary, 0) |
| 220 | + else: discard |
| 221 | + if prevStreamBoundary.isSome: |
| 222 | + write(prevStreamBoundary.get[].fileContent, currBoundary) |
| 223 | + setLen(currBoundary, 0) |
| 224 | + |
| 225 | +# |
| 226 | +# Public API |
| 227 | +# |
| 228 | +proc initMultipart*(contentType: string, |
| 229 | + fileCallback: ptr MultipartFileCallback = nil, |
| 230 | + tmpDir = "" |
| 231 | +): Multipart = |
| 232 | + ## Initializes an instance of `Multipart` |
| 233 | + result.tmpDirectory = |
| 234 | + if tmpDir.len > 0: tmpDir |
| 235 | + else: getTempDir() / getMD5(getAppDir()) |
| 236 | + result.boundaryLine = contentType |
| 237 | + if fileCallback != nil: |
| 238 | + result.fileCallback = fileCallback |
| 239 | + |
| 240 | +proc parse*(mp: var Multipart, body: sink string, tmpDir = "") = |
| 241 | + ## Parse and return a `Multipart` instance |
| 242 | + var |
| 243 | + i = 0 |
| 244 | + prevStreamBoundary: Option[ptr Boundary] |
| 245 | + multipartType: string |
| 246 | + multipartBoundary: string |
| 247 | + i += mp.boundaryLine.parseUntil(multipartType, {';'}, i) |
| 248 | + i += mp.boundaryLine.skipWhitespace(i) |
| 249 | + i += mp.boundaryLine.parseUntil(multipartBoundary, {'\c', '\l'}, i) |
| 250 | + let boundary = multipartBoundary.split("boundary=")[1] |
| 251 | + discard existsOrCreateDir(mp.tmpDirectory) |
| 252 | + var |
| 253 | + body = newStringStream(body) |
| 254 | + skipUntilNextBoundary: bool |
| 255 | + curr: char |
| 256 | + while not atEnd(body): |
| 257 | + if skipUntilNextBoundary: |
| 258 | + while curr != '-' and (body.atEnd == false): |
| 259 | + curr = body.readChar() |
| 260 | + parseBoundary() |
| 261 | + skipUntilNextBoundary = false |
| 262 | + else: |
| 263 | + curr = body.readChar() |
| 264 | + case curr |
| 265 | + of Newlines: |
| 266 | + if prevStreamBoundary.isSome: |
| 267 | + write(prevStreamBoundary.get[].fileContent, curr) |
| 268 | + runFileCallback(prevStreamBoundary.get) |
| 269 | + of '-': |
| 270 | + parseBoundary() |
| 271 | + else: |
| 272 | + let currBoundary: ptr Boundary = addr(mp.boundaries[^1]) |
| 273 | + if currBoundary != nil: |
| 274 | + case currBoundary[].dataType |
| 275 | + of MultipartFile: |
| 276 | + write(currBoundary[].fileContent, curr) |
| 277 | + runFileCallback(currBoundary) |
| 278 | + of MultipartText: |
| 279 | + add currBoundary[].value, curr |
| 280 | + if prevStreamBoundary.isSome: |
| 281 | + prevStreamBoundary.get[].fileContent.close() |
| 282 | + body.close() |
| 283 | + |
| 284 | +proc getTempDir*(mp: Multipart): string = |
| 285 | + ## Returns the temporary directory path |
| 286 | + mp.tmpDirectory |
| 287 | + |
| 288 | +proc getPath*(boundary: Boundary): string = |
| 289 | + ## Return the file path of a `Boundary` object |
| 290 | + ## if the boundary data type is `MultipartDataType` |
| 291 | + ## Check type using `getType` |
| 292 | + result = boundary.filePath |
| 293 | + |
| 294 | +proc getMagicNumbers*(boundary: Boundary): seq[byte] = |
| 295 | + ## Returns the magic numbers collected while parsing the `boundary` |
| 296 | + |
| 297 | +proc len*(mp: Multipart): int = mp.boundaries.len |
| 298 | + |
| 299 | +iterator items*(mp: Multipart): Boundary = |
| 300 | + ## Iterate over available boundaries in |
| 301 | + ## the `Multipart` instance |
| 302 | + for b in mp.boundaries: |
| 303 | + yield b |
0 commit comments