Skip to content

Commit a2f9c83

Browse files
committed
init
Signed-off-by: George Lemon <georgelemon@protonmail.com>
1 parent 4bba76d commit a2f9c83

5 files changed

Lines changed: 380 additions & 11 deletions

File tree

README.md

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,35 @@
11
<p align="center">
2-
<img src="https://github.com/openpeeps/PKG/blob/main/.github/logo.png" width="90px"><br>
3-
OpenPeeps repository template for developing libraries,<br>projects and other cool things. 👑 Written in Nim language
2+
<img src="https://github.com/openpeeps/multipart/blob/main/.github/logo.png" width="90px"><br>
3+
A simple, fault tolerant multipart parser 👑 Written in Nim language
44
</p>
55

66
<p align="center">
7-
<code>nimble install {PKG}</code>
7+
<code>nimble install multipart</code>
88
</p>
99

1010
<p align="center">
1111
<a href="https://github.com/">API reference</a><br>
12-
<img src="https://github.com/openpeeps/pistachio/workflows/test/badge.svg" alt="Github Actions"> <img src="https://github.com/openpeeps/pistachio/workflows/docs/badge.svg" alt="Github Actions">
12+
<img src="https://github.com/openpeeps/multipart/workflows/test/badge.svg" alt="Github Actions"> <img src="https://github.com/openpeeps/multipart/workflows/docs/badge.svg" alt="Github Actions">
1313
</p>
1414

1515
## 😍 Key Features
16-
- [x] Open Source | `MIT` License
17-
- [x] Written in Nim language
16+
- [x] Framework agnostic
17+
- [x] On-the-Fly Content validation via Callbacks
18+
- [x] Fault tolerant. Skips invalid boundaries
1819

1920
## Examples
20-
...
21+
todo. See `/tests`
22+
23+
### Need a input validator?
24+
If you're looking for a full input validator you can use `openpeeps/bag` package to validate input data, forms,
25+
including `multipart/form-data`. Give a try https://github.com/openpeeps/bag
2126

2227
### ❤ Contributions & Support
23-
- 🐛 Found a bug? [Create a new Issue](/issues)
24-
- 👋 Wanna help? [Fork it!](/fork)
28+
- 🐛 Found a bug? [Create a new Issue](https://github.com/openpeeps/multipart/issues)
29+
- 👋 Wanna help? [Fork it!](https://github.com/openpeeps/multipart/fork)
2530
- 😎 [Get €20 in cloud credits from Hetzner](https://hetzner.cloud/?ref=Hm0mYGM9NxZ4)
2631
- 🥰 [Donate via PayPal address](https://www.paypal.com/donate/?hosted_button_id=RJK3ZTDWPL55C)
2732

2833
### 🎩 License
29-
{PKG} | MIT license. [Made by Humans from OpenPeeps](https://github.com/openpeeps).<br>
30-
Copyright &copy; 2023 OpenPeeps & Contributors &mdash; All rights reserved.
34+
`multipart` | MIT license. [Made by Humans from OpenPeeps](https://github.com/openpeeps).<br>
35+
Copyright &copy; 2024 OpenPeeps & Contributors &mdash; All rights reserved.

multipart.nimble

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Package
2+
3+
version = "0.1.0"
4+
author = "George Lemon"
5+
description = "A simple multipart parser"
6+
license = "MIT"
7+
srcDir = "src"
8+
# bin = @["multipart"]
9+
# binDir = "bin"
10+
11+
# Dependencies
12+
13+
requires "nim >= 2.0.0"
14+
requires "checksums"

src/multipart.nim

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
# A simple, framework agnostic parser for
2+
# handling multipart/form-data content-type in Nim
3+
#
4+
# (c) 2024 George Lemon | MIT License
5+
# Made by Humans from OpenPeeps
6+
# https://github.com/supranim/multipart
7+
8+
import std/[os, streams, strutils,
9+
parseutils, options, oids, sequtils]
10+
11+
import pkg/checksums/md5
12+
13+
type
14+
MultipartHeader* = enum
15+
contentDisposition = "content-disposition"
16+
contentType = "content-type"
17+
18+
MultipartDataType* = enum
19+
MultipartFile
20+
MultipartText
21+
22+
MultipartFileSigantureState* = enum
23+
stateInvalidMagic
24+
stateMoreMagic
25+
stateValidMagic
26+
27+
MultipartHeaderTuple* = tuple[key: MultipartHeader, value: seq[(string, string)]]
28+
29+
MultipartFileCallback* =
30+
proc(boundary: ptr Boundary, pos: int, c: ptr char): bool {.closure.}
31+
## A callback that runs while parsing a `MultipartFile` boundary
32+
33+
MultipartFileCallbackSignature* = proc(boundary: ptr Boundary, pos: int, c: ptr char): MultipartFileSigantureState {.closure.}
34+
## A callback to collect magic numbers signature
35+
## while writing the temporary file
36+
37+
MultipartTextCallback* = proc(boundary: ptr Boundary, data: ptr string): bool {.closure.}
38+
## A callback that returns data of a `MultipartText`.
39+
##
40+
## This callback can be used for on-the-fly validation of
41+
## string-based data from input fields
42+
43+
# BoundaryEndCallback* = proc(boundary: Boundary): bool {.nimcall.}
44+
# A callback that runs after parsing a boundary
45+
BoundaryState* = enum
46+
boundaryInit
47+
boundaryAdded
48+
## marks Boundary as added to `boundaries` sequenece
49+
boundaryRemoved
50+
## can be set by external validators via `MultipartFileCallback`
51+
## once invalidated, the `Boundary` will be moved to `invalidBoundaries`
52+
53+
Boundary* = object
54+
state: BoundaryState
55+
fieldName*: string
56+
case dataType*: MultipartDataType
57+
of MultipartFile:
58+
fileId*, fileName*, fileType*, filePath*: string
59+
fileContent*: File
60+
else:
61+
value*: string
62+
63+
Multipart* = object
64+
tmpDirectory: string
65+
# Will use a temporary path to store files at
66+
# `getTempDir() / getMD5(getAppDir())`
67+
boundaryLine: string
68+
# Holds the boundary line retrieved from a
69+
# `Content-type` header
70+
fileCallback*: ptr MultipartFileCallback
71+
## A `MultipartFileCallback` that runs while in
72+
## `MultipartFile` boundary
73+
fileSignatureCallback*: ptr MultipartFileCallbackSignature
74+
## Collects magic numbers that required for verifying
75+
## the file type. The callback must return one of
76+
## `MultipartFileSigantureState` states. Use `stateMoreMagic`
77+
## to run `fileSignatureCallback` again for colelcting more bytes.
78+
##
79+
## If the magic numbers are correct use `stateValidMagic`
80+
## to stop the callback and continue writing the file.
81+
##
82+
## `stateInvalidMagic` will mark the boundary as invalid,
83+
## skip it to `invalidBoundaries` and stops the callback.
84+
##
85+
## `stateValidMagic` will continue writing the file on disk
86+
## and stops the signature callback
87+
boundaries: seq[Boundary]
88+
# A sequence of Boundary objects
89+
invalidBoundaries*: seq[Boundary]
90+
# A sequence of removed boundaries
91+
92+
MultipartInvalidHeader* = object of CatchableError
93+
94+
proc parseHeader(line: string): MultipartHeaderTuple =
95+
result.value = @[]
96+
var key: string
97+
var i = 0
98+
i = line.parseUntil(key, ':')
99+
inc(i) # skip :
100+
result.key = parseEnum[MultipartHeader](key.toLowerAscii)
101+
if result.key == contentType:
102+
i += line.skipWhitespace(i)
103+
result.value.add (line.substr(i), newStringOfCap(0))
104+
else:
105+
var v: string
106+
while i < line.len and line[i] notin Newlines:
107+
i += line.skipWhitespace(i)
108+
i += line.parseUntil(v, {';'}, i)
109+
if v == "form-data":
110+
setLen(v, 0) # skip form-data
111+
else:
112+
let kv = v.split('=', 1)
113+
add result.value, (kv[0], kv[1].unescape)
114+
inc(i)
115+
116+
template skipWhitespaces =
117+
while true:
118+
case curr
119+
of Whitespace:
120+
curr = body.readChar()
121+
else: break
122+
123+
template skipNewlines =
124+
while true:
125+
case curr
126+
of '\r', '\n':
127+
curr = body.readChar
128+
else: break
129+
130+
const
131+
contentDispositionLen = len($contentDisposition)
132+
contentTypeLen = len($contentType)
133+
134+
template runFileCallback(someBoundary) {.dirty.} =
135+
if mp.fileCallback != nil:
136+
if mp.fileCallback[](someBoundary,
137+
someBoundary.fileContent.getFilePos(), curr.addr):
138+
discard
139+
else:
140+
someBoundary.fileContent.close()
141+
skipUntilNextBoundary = true
142+
break
143+
144+
template parseBoundary {.dirty.} =
145+
var currBoundary: string
146+
add currBoundary, curr
147+
curr = body.readChar()
148+
let len = len(boundary)
149+
add currBoundary, curr
150+
case curr:
151+
of '-':
152+
if body.peekStr(len).startsWith(boundary):
153+
add currBoundary, body.readStr(len)
154+
curr = body.readChar()
155+
skipWhitespaces()
156+
if body.peekStr(2) == "--":
157+
while not body.atEnd:
158+
discard body.readChar() # consume remaining chars
159+
break
160+
else:
161+
var headers: seq[MultipartHeaderTuple]
162+
while true:
163+
if "c" & body.peekStr(contentDispositionLen - 1).toLowerAscii == $contentDisposition:
164+
var heading: string
165+
add heading, curr
166+
add heading, body.readStr(contentDispositionLen)
167+
curr = body.readChar()
168+
while curr notin Newlines:
169+
add heading, curr
170+
curr = body.readChar()
171+
add headers, parseHeader(heading)
172+
# curr = body.readChar() # new line
173+
skipNewlines()
174+
elif "c" & body.peekStr(contentTypeLen - 1).toLowerAscii == $contentType:
175+
var heading: string
176+
add heading, curr
177+
add heading, body.readStr(contentTypeLen)
178+
curr = body.readChar()
179+
while curr notin Newlines:
180+
add heading, curr
181+
curr = body.readChar()
182+
add headers, parseheader(heading)
183+
skipNewlines()
184+
else: break
185+
skipNewlines()
186+
if prevStreamBoundary.isSome:
187+
prevStreamBoundary.get[].fileContent.close()
188+
prevStreamBoundary = none(ptr Boundary)
189+
if headers.len == 2:
190+
let fileId = $genOid()
191+
let filepath = mp.tmpDirectory / fileId
192+
add mp.boundaries,
193+
Boundary(
194+
dataType: MultipartFile,
195+
fileId: fileId,
196+
fieldName: headers[0].value[0][1],
197+
fileName: headers[0].value[1][1],
198+
fileType: headers[1].value[0][0],
199+
filePath: filepath,
200+
fileContent: open(filepath, fmWrite)
201+
)
202+
prevStreamBoundary = some(mp.boundaries[^1].addr)
203+
write(prevStreamBoundary.get[].fileContent, curr)
204+
runFileCallback(prevStreamBoundary.get)
205+
elif headers.len == 1:
206+
var inputBoundary =
207+
Boundary(
208+
dataType: MultipartText,
209+
fieldName: headers[0].value[0][1]
210+
)
211+
add inputBoundary.value, curr
212+
add mp.boundaries, inputBoundary
213+
setLen(currBoundary, 0)
214+
else:
215+
if prevStreamBoundary.isSome:
216+
write(prevStreamBoundary.get[].fileContent, currBoundary)
217+
else:
218+
add mp.boundaries[^1].value, currBoundary
219+
setLen(currBoundary, 0)
220+
else: discard
221+
if prevStreamBoundary.isSome:
222+
write(prevStreamBoundary.get[].fileContent, currBoundary)
223+
setLen(currBoundary, 0)
224+
225+
#
226+
# Public API
227+
#
228+
proc initMultipart*(contentType: string,
229+
fileCallback: ptr MultipartFileCallback = nil,
230+
tmpDir = ""
231+
): Multipart =
232+
## Initializes an instance of `Multipart`
233+
result.tmpDirectory =
234+
if tmpDir.len > 0: tmpDir
235+
else: getTempDir() / getMD5(getAppDir())
236+
result.boundaryLine = contentType
237+
if fileCallback != nil:
238+
result.fileCallback = fileCallback
239+
240+
proc parse*(mp: var Multipart, body: sink string, tmpDir = "") =
241+
## Parse and return a `Multipart` instance
242+
var
243+
i = 0
244+
prevStreamBoundary: Option[ptr Boundary]
245+
multipartType: string
246+
multipartBoundary: string
247+
i += mp.boundaryLine.parseUntil(multipartType, {';'}, i)
248+
i += mp.boundaryLine.skipWhitespace(i)
249+
i += mp.boundaryLine.parseUntil(multipartBoundary, {'\c', '\l'}, i)
250+
let boundary = multipartBoundary.split("boundary=")[1]
251+
discard existsOrCreateDir(mp.tmpDirectory)
252+
var
253+
body = newStringStream(body)
254+
skipUntilNextBoundary: bool
255+
curr: char
256+
while not atEnd(body):
257+
if skipUntilNextBoundary:
258+
while curr != '-' and (body.atEnd == false):
259+
curr = body.readChar()
260+
parseBoundary()
261+
skipUntilNextBoundary = false
262+
else:
263+
curr = body.readChar()
264+
case curr
265+
of Newlines:
266+
if prevStreamBoundary.isSome:
267+
write(prevStreamBoundary.get[].fileContent, curr)
268+
runFileCallback(prevStreamBoundary.get)
269+
of '-':
270+
parseBoundary()
271+
else:
272+
let currBoundary: ptr Boundary = addr(mp.boundaries[^1])
273+
if currBoundary != nil:
274+
case currBoundary[].dataType
275+
of MultipartFile:
276+
write(currBoundary[].fileContent, curr)
277+
runFileCallback(currBoundary)
278+
of MultipartText:
279+
add currBoundary[].value, curr
280+
if prevStreamBoundary.isSome:
281+
prevStreamBoundary.get[].fileContent.close()
282+
body.close()
283+
284+
proc getTempDir*(mp: Multipart): string =
285+
## Returns the temporary directory path
286+
mp.tmpDirectory
287+
288+
proc getPath*(boundary: Boundary): string =
289+
## Return the file path of a `Boundary` object
290+
## if the boundary data type is `MultipartDataType`
291+
## Check type using `getType`
292+
result = boundary.filePath
293+
294+
proc getMagicNumbers*(boundary: Boundary): seq[byte] =
295+
## Returns the magic numbers collected while parsing the `boundary`
296+
297+
proc len*(mp: Multipart): int = mp.boundaries.len
298+
299+
iterator items*(mp: Multipart): Boundary =
300+
## Iterate over available boundaries in
301+
## the `Multipart` instance
302+
for b in mp.boundaries:
303+
yield b

tests/config.nims

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
switch("path", "$projectDir/../src")

0 commit comments

Comments
 (0)