- 
                Notifications
    You must be signed in to change notification settings 
- Fork 3
Fix OOM issues with super large iCalendar files #92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
af8f8f6
              9692929
              84e33f3
              fd65367
              dad980b
              6f52535
              beaf42b
              73380e7
              0da87e2
              4ef91b5
              bb47bc0
              1b17033
              fca4cc1
              20483d9
              6652463
              3910eef
              a257c2c
              7948036
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| /* | ||
| * This file is part of bitfireAT/synctools which is released under GPLv3. | ||
| * Copyright © All Contributors. See the LICENSE and AUTHOR files in the root directory for details. | ||
| * SPDX-License-Identifier: GPL-3.0-or-later | ||
| */ | ||
|  | ||
| package at.bitfire.synctools.icalendar.validation | ||
|  | ||
| import org.junit.Test | ||
| import java.io.Reader | ||
| import java.util.UUID | ||
|  | ||
| class ICalPreprocessorInstrumentedTest { | ||
|  | ||
| class VCalendarReaderGenerator(val eventCount: Int = Int.MAX_VALUE) : Reader() { | ||
|         
                  ArnyminerZ marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| private var stage = 0 // 0 = header, 1 = events, 2 = footer, 3 = done | ||
| private var eventIdx = 0 | ||
| private var current: String? = null | ||
| private var pos = 0 | ||
|  | ||
| override fun reset() { | ||
| stage = 0 | ||
| eventIdx = 0 | ||
| current = null | ||
| pos = 0 | ||
| } | ||
|  | ||
| override fun read(cbuf: CharArray, off: Int, len: Int): Int { | ||
| var charsRead = 0 | ||
| while (charsRead < len) { | ||
| if (current == null || pos >= current!!.length) { | ||
| current = when (stage) { | ||
| 0 -> { | ||
| stage = 1 | ||
| """ | ||
| BEGIN:VCALENDAR | ||
| PRODID:-//xyz Corp//NONSGML PDA Calendar Version 1.0//EN | ||
| VERSION:2.0 | ||
| """.trimIndent() + "\n" | ||
| } | ||
| 1 -> { | ||
| if (eventIdx < eventCount) { | ||
| val event = """ | ||
| BEGIN:VEVENT | ||
| DTSTAMP:19960704T120000Z | ||
| UID:${UUID.randomUUID()} | ||
| ORGANIZER:mailto:[email protected] | ||
| DTSTART:19960918T143000Z | ||
| DTEND:19960920T220000Z | ||
| STATUS:CONFIRMED | ||
| CATEGORIES:CONFERENCE | ||
| SUMMARY:Event $eventIdx | ||
| DESCRIPTION:Event $eventIdx description | ||
| END:VEVENT | ||
| """.trimIndent() + "\n" | ||
| eventIdx++ | ||
| event | ||
| } else { | ||
| stage = 2 | ||
| null | ||
| } | ||
| } | ||
| 2 -> { | ||
| stage = 3 | ||
| "END:VCALENDAR\n" | ||
| } | ||
| else -> return if (charsRead == 0) -1 else charsRead | ||
| } | ||
| pos = 0 | ||
| if (current == null) continue // move to next stage | ||
| } | ||
| val charsLeft = current!!.length - pos | ||
| val toRead = minOf(len - charsRead, charsLeft) | ||
| current!!.toCharArray(pos, pos + toRead).copyInto(cbuf, off + charsRead) | ||
| pos += toRead | ||
| charsRead += toRead | ||
| } | ||
| return charsRead | ||
| } | ||
|  | ||
| override fun close() { | ||
| // No resources to release | ||
| current = null | ||
| } | ||
| } | ||
|  | ||
| @Test | ||
| fun testParse_SuperLargeFiles() { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this test (class) do? As I understand it, this test basically tests whether the input is put into the memory as one chunk (which would fail with OOM) or whether it's processed in streaming mode so that it doesn't load too big files into the memory. So I think there are two things that can be tested: 
 However, when the input is not read at all, why then generate an iCalendar at all? We could as well pass an infinite stream of the same character. 
 This is currently not done by the tests. I noticed it because if we pass  And everything for the two cases that 
 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
 
 Yes, and it's purposefully placed in the instrumented test class so that it's run in an emulator, which causes the OOM. Otherwise we are not actually overflowing the emulator but our computer. That's why there's the instrumented suffix, and actually, also exists  
 Mmmmh yeah, good point | ||
| val preprocessor = ICalPreprocessor() | ||
| val reader = VCalendarReaderGenerator() | ||
| preprocessor.preprocessStream(reader) | ||
| // no exception called | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -6,13 +6,16 @@ | |
|  | ||
| package at.bitfire.synctools.icalendar.validation | ||
|  | ||
| import androidx.annotation.VisibleForTesting | ||
|  | ||
| /** | ||
| * Fixes durations with day offsets with the 'T' prefix. | ||
| * See also https://github.com/bitfireAT/ical4android/issues/77 | ||
| */ | ||
| class FixInvalidDayOffsetPreprocessor : StreamPreprocessor() { | ||
| class FixInvalidDayOffsetPreprocessor : StreamPreprocessor { | ||
|  | ||
| override fun regexpForProblem() = Regex( | ||
| @VisibleForTesting | ||
| val regexpForProblem = Regex( | ||
| 
      Comment on lines
    
      +17
     to 
      +18
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be enough to use  | ||
| // Examples: | ||
| // TRIGGER:-P2DT | ||
| // TRIGGER:-PT2D | ||
|  | @@ -21,11 +24,11 @@ class FixInvalidDayOffsetPreprocessor : StreamPreprocessor() { | |
| setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE) | ||
| ) | ||
|  | ||
| override fun fixString(original: String): String { | ||
| var iCal: String = original | ||
| override fun fixString(lines: String): String { | ||
| var iCal: String = lines | ||
|  | ||
| // Find all instances matching the defined expression | ||
| val found = regexpForProblem().findAll(iCal).toList() | ||
| val found = regexpForProblem.findAll(iCal).toList() | ||
|  | ||
| // ... and repair them. Use reversed order so that already replaced occurrences don't interfere with the following matches. | ||
| for (match in found.reversed()) { | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -7,15 +7,21 @@ | |
| package at.bitfire.synctools.icalendar.validation | ||
|  | ||
| import androidx.annotation.VisibleForTesting | ||
| import com.google.common.io.CharSource | ||
| import com.google.errorprone.annotations.MustBeClosed | ||
| import net.fortuna.ical4j.model.Calendar | ||
| import net.fortuna.ical4j.model.Property | ||
| import net.fortuna.ical4j.transform.rfc5545.CreatedPropertyRule | ||
| import net.fortuna.ical4j.transform.rfc5545.DateListPropertyRule | ||
| import net.fortuna.ical4j.transform.rfc5545.DatePropertyRule | ||
| import net.fortuna.ical4j.transform.rfc5545.Rfc5545PropertyRule | ||
| import java.io.BufferedReader | ||
| import java.io.IOException | ||
| import java.io.Reader | ||
| import java.io.StringReader | ||
| import java.util.logging.Level | ||
| import java.util.logging.Logger | ||
| import javax.annotation.WillCloseWhenClosed | ||
|  | ||
| /** | ||
| * Applies some rules to increase compatibility of parsed (incoming) iCalendars: | ||
|  | @@ -27,6 +33,9 @@ import java.util.logging.Logger | |
| */ | ||
| class ICalPreprocessor { | ||
|  | ||
| private val logger | ||
| get() = Logger.getLogger(javaClass.name) | ||
|  | ||
| private val propertyRules = arrayOf( | ||
| CreatedPropertyRule(), // make sure CREATED is UTC | ||
|  | ||
|  | @@ -40,18 +49,64 @@ class ICalPreprocessor { | |
| FixInvalidDayOffsetPreprocessor() // fix things like DURATION:PT2D | ||
| ) | ||
|  | ||
| /** | ||
| * Applies [streamPreprocessors] to some given [lines] by calling `fixString()` repeatedly on each of them. | ||
| * @param lines original iCalendar object as string. This may not contain the full iCalendar, | ||
| * but only a part of it. | ||
| * @return The repaired iCalendar object as string. | ||
| */ | ||
| @VisibleForTesting | ||
| fun applyPreprocessors(lines: String): String { | ||
| var newString = lines | ||
| for (preprocessor in streamPreprocessors) | ||
| newString = preprocessor.fixString(newString) | ||
| return newString | ||
| } | ||
|  | ||
| /** | ||
| * Applies [streamPreprocessors] to a given [Reader] that reads an iCalendar object | ||
| * in order to repair some things that must be fixed before parsing. | ||
| * | ||
| * @param original original iCalendar object | ||
| * @return the potentially repaired iCalendar object | ||
| * The original reader content is processed in chunks of [chunkSize] lines to avoid loading | ||
| * the whole content into memory at once. If the given [Reader] does not support `reset()`, | ||
| * the whole content will be loaded into memory anyway. | ||
| * | ||
| * Closing the returned [Reader] will also close the [original] reader if needed. | ||
| * | ||
| * @param original original iCalendar object. Will be closed after processing. | ||
| * @param chunkSize number of lines to process in one chunk. Default is `1000`. | ||
| * @return A reader that emits the potentially repaired iCalendar object. | ||
| * The returned [Reader] must be closed by the caller. | ||
| */ | ||
| fun preprocessStream(original: Reader): Reader { | ||
| var reader = original | ||
| for (preprocessor in streamPreprocessors) | ||
| reader = preprocessor.preprocess(reader) | ||
| return reader | ||
| @MustBeClosed | ||
| fun preprocessStream(@WillCloseWhenClosed original: Reader, chunkSize: Int = 1_000): Reader { | ||
| val resetSupported = try { | ||
| original.reset() | ||
| true | ||
| 
      Comment on lines
    
      +83
     to 
      +85
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just noticed that we don't need support for  So we can just remove the  | ||
| } catch(_: IOException) { | ||
| // reset is not supported. String will be loaded into memory completely | ||
| false | ||
| } | ||
|  | ||
| if (resetSupported) { | ||
| val chunkedFixedLines = BufferedReader(original) | ||
| .lineSequence() | ||
|         
                  rfc2822 marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| .chunked(chunkSize) | ||
| // iCalendar uses CRLF: https://www.rfc-editor.org/rfc/rfc5545#section-3.1 | ||
| // but we only use \n because in tests line breaks are LF-only. | ||
| // Since CRLF already contains LF, this is not an issue. | ||
| .map { chunk -> | ||
| val fixed = applyPreprocessors(chunk.joinToString("\n")) | ||
| CharSource.wrap(fixed) | ||
| } | ||
| .asIterable() | ||
| // we don't close 'original' here because CharSource.concat() will read from it lazily | ||
| return CharSource.concat(chunkedFixedLines).openStream() | ||
| } else { | ||
| // The reader doesn't support reset, so we need to load the whole content into memory | ||
| logger.warning("Reader does not support reset(). Reading complete iCalendar into memory.") | ||
| return StringReader(applyPreprocessors(original.use { it.readText() })) | ||
| } | ||
| } | ||
|  | ||
|  | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -6,51 +6,14 @@ | |
|  | ||
| package at.bitfire.synctools.icalendar.validation | ||
|  | ||
| import java.io.IOException | ||
| import java.io.Reader | ||
| import java.io.StringReader | ||
| import java.util.Scanner | ||
|  | ||
| abstract class StreamPreprocessor { | ||
|  | ||
| abstract fun regexpForProblem(): Regex? | ||
| interface StreamPreprocessor { | ||
|  | ||
| /** | ||
| * Fixes an iCalendar string. | ||
| * | ||
| * @param original The complete iCalendar string | ||
| * @return The complete iCalendar string, but fixed | ||
| * @param lines The iCalendar lines to fix. Those may be the full iCalendar file, or just a part of it. | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should explicitly mention that  | ||
| * @return The fixed version of [lines]. | ||
| */ | ||
| abstract fun fixString(original: String): String | ||
|  | ||
| fun preprocess(reader: Reader): Reader { | ||
| var result: String? = null | ||
|  | ||
| val resetSupported = try { | ||
| reader.reset() | ||
| true | ||
| } catch(_: IOException) { | ||
| false | ||
| } | ||
|  | ||
| if (resetSupported) { | ||
| val regex = regexpForProblem() | ||
| // reset is supported, no need to copy the whole stream to another String (unless we have to fix the TZOFFSET) | ||
| if (regex == null || Scanner(reader).findWithinHorizon(regex.toPattern(), 0) != null) { | ||
| reader.reset() | ||
| result = fixString(reader.readText()) | ||
| } | ||
| } else | ||
| // reset not supported, always generate a new String that will be returned | ||
| result = fixString(reader.readText()) | ||
|  | ||
| if (result != null) | ||
| // modified or reset not supported, return new stream | ||
| return StringReader(result) | ||
|  | ||
| // not modified, return original iCalendar | ||
| reader.reset() | ||
| return reader | ||
| } | ||
| fun fixString(lines: String): String | ||
|  | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As there's no Android dependency, it should be possible to make this test a unit test (faster and easier to run than an instrumentation test).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See above