@@ -21,6 +21,23 @@ use serde::de::Error as DeError;
2121use serde:: ser:: SerializeStruct ;
2222use serde:: { Deserialize , Deserializer , Serialize , Serializer , de} ;
2323
24+ /// The maximum size of a regex after compilation.
25+ /// This is the same as the `Regex` crate's default at the time of writing.
26+ ///
27+ /// Note: This number is mentioned in our user-facing docs at the "String operators" in the function
28+ /// reference.
29+ const MAX_REGEX_SIZE_AFTER_COMPILATION : usize = 10 * 1024 * 1024 ;
30+
31+ /// We also need a separate limit for the size of regexes before compilation. Even though the
32+ /// `Regex` crate promises that using its `size_limit` option (which we set to the other limit,
33+ /// `MAX_REGEX_SIZE_AFTER_COMPILATION`) would prevent excessive resource usage, this doesn't seem to
34+ /// be the case. Since we compile regexes in envd, we need strict limits to prevent envd OOMs.
35+ /// See <https://github.com/MaterializeInc/database-issues/issues/9907> for an example.
36+ ///
37+ /// Note: This number is mentioned in our user-facing docs at the "String operators" in the function
38+ /// reference.
39+ const MAX_REGEX_SIZE_BEFORE_COMPILATION : usize = 1 * 1024 * 1024 ;
40+
2441/// A hashable, comparable, and serializable regular expression type.
2542///
2643/// The [`regex::Regex`] type, the de facto standard regex type in Rust, does
@@ -58,7 +75,7 @@ impl Regex {
5875 /// A simple constructor for the default setting of `dot_matches_new_line: true`.
5976 /// See <https://www.postgresql.org/docs/current/functions-matching.html#POSIX-MATCHING-RULES>
6077 /// "newline-sensitive matching"
61- pub fn new ( pattern : & str , case_insensitive : bool ) -> Result < Regex , Error > {
78+ pub fn new ( pattern : & str , case_insensitive : bool ) -> Result < Regex , RegexCompilationError > {
6279 Self :: new_dot_matches_new_line ( pattern, case_insensitive, true )
6380 }
6481
@@ -67,10 +84,16 @@ impl Regex {
6784 pattern : & str ,
6885 case_insensitive : bool ,
6986 dot_matches_new_line : bool ,
70- ) -> Result < Regex , Error > {
87+ ) -> Result < Regex , RegexCompilationError > {
88+ if pattern. len ( ) > MAX_REGEX_SIZE_BEFORE_COMPILATION {
89+ return Err ( RegexCompilationError :: PatternTooLarge {
90+ pattern_size : pattern. len ( ) ,
91+ } ) ;
92+ }
7193 let mut regex_builder = RegexBuilder :: new ( pattern) ;
7294 regex_builder. case_insensitive ( case_insensitive) ;
7395 regex_builder. dot_matches_new_line ( dot_matches_new_line) ;
96+ regex_builder. size_limit ( MAX_REGEX_SIZE_AFTER_COMPILATION ) ;
7497 Ok ( Regex {
7598 case_insensitive,
7699 dot_matches_new_line,
@@ -86,6 +109,36 @@ impl Regex {
86109 }
87110}
88111
112+ /// Error type for regex compilation failures.
113+ #[ derive( Debug , Clone ) ]
114+ pub enum RegexCompilationError {
115+ /// Wrapper for regex crate's Error type.
116+ RegexError ( Error ) ,
117+ /// Regex pattern size exceeds MAX_REGEX_SIZE_BEFORE_COMPILATION.
118+ PatternTooLarge { pattern_size : usize } ,
119+ }
120+
121+ impl fmt:: Display for RegexCompilationError {
122+ fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
123+ match self {
124+ RegexCompilationError :: RegexError ( e) => write ! ( f, "{}" , e) ,
125+ RegexCompilationError :: PatternTooLarge {
126+ pattern_size : patter_size,
127+ } => write ! (
128+ f,
129+ "regex pattern too large ({} bytes, max {} bytes)" ,
130+ patter_size, MAX_REGEX_SIZE_BEFORE_COMPILATION
131+ ) ,
132+ }
133+ }
134+ }
135+
136+ impl From < Error > for RegexCompilationError {
137+ fn from ( e : Error ) -> Self {
138+ RegexCompilationError :: RegexError ( e)
139+ }
140+ }
141+
89142impl PartialEq < Regex > for Regex {
90143 fn eq ( & self , other : & Regex ) -> bool {
91144 self . pattern ( ) == other. pattern ( )
0 commit comments