cvat-ai · zhiltsov-max · Jul 25, 2024 · Jul 25, 2024 · Jul 30, 2024 · Aug 1, 2024
@@ -0,0 +1,4 @@
+### Added
+
+- A server setting to disable media chunks on the local filesystem
- A server setting to disable media chunks on the local filesystem
+- A server setting to disable permanent media chunks on local filesystem
- A server setting to disable media chunks on the local filesystem
+- A server setting to disable permanent media chunks on local filesystem
+  (<https://github.com/cvat-ai/cvat/pull/8272>)
diff --git a/changelog.d/20240812_161734_mzhiltso_job_chunks.md b/changelog.d/20240812_161734_mzhiltso_job_chunks.md
@@ -0,0 +1,4 @@
+### Changed
+
+- Jobs now have separate chunk ids starting from 0, instead of using ones from the task
+  (<https://github.com/cvat-ai/cvat/pull/8272>)
diff --git a/changelog.d/20240812_161912_mzhiltso_job_chunks.md b/changelog.d/20240812_161912_mzhiltso_job_chunks.md
@@ -0,0 +1,6 @@
+### Fixed
+
+- Various memory leaks in video reading on the server
+  (<https://github.com/cvat-ai/cvat/pull/8272>)
+- Job assignees will not receive frames from adjacent jobs in the boundary chunks
+  (<https://github.com/cvat-ai/cvat/pull/8272>)
@@ -25,7 +25,7 @@ const frameDataCache: Record<string, {
     latestFrameDecodeRequest: number | null;
     latestContextImagesRequest: number | null;
     provider: FrameDecoder;
-    prefetchAnalizer: PrefetchAnalyzer;
+    prefetchAnalyzer: PrefetchAnalyzer;
     decodedBlocksCacheSize: number;
     activeChunkRequest: Promise<void> | null;
     activeContextRequest: Promise<Record<number, ImageBitmap>> | null;
@@ -208,24 +208,26 @@ export class FrameData {
 class PrefetchAnalyzer {
     #chunkSize: number;
     #requestedFrames: number[];
+    #startFrame: number;
 
-    constructor(chunkSize) {
+    constructor(chunkSize, startFrame) {
         this.#chunkSize = chunkSize;
         this.#requestedFrames = [];
+        this.#startFrame = startFrame;
     }
 
     shouldPrefetchNext(current: number, isPlaying: boolean, isChunkCached: (chunk) => boolean): boolean {
         if (isPlaying) {
             return true;
         }
 
-        const currentChunk = Math.floor(current / this.#chunkSize);
+        const currentChunk = Math.floor((current - this.#startFrame) / this.#chunkSize);
         const { length } = this.#requestedFrames;
         const isIncreasingOrder = this.#requestedFrames
             .every((val, index) => index === 0 || val > this.#requestedFrames[index - 1]);
         if (
             length && (isIncreasingOrder && current > this.#requestedFrames[length - 1]) &&
-            (current % this.#chunkSize) >= Math.ceil(this.#chunkSize / 2) &&
+            ((current - this.#startFrame) % this.#chunkSize) >= Math.ceil(this.#chunkSize / 2) &&
             !isChunkCached(currentChunk + 1)
         ) {
             // is increasing order including the current frame
@@ -262,19 +264,20 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
             imageData: ImageBitmap | Blob;
         } | Blob>((resolve, reject) => {
             const {
-                provider, prefetchAnalizer, chunkSize, stopFrame, decodeForward, forwardStep, decodedBlocksCacheSize,
+                provider, prefetchAnalyzer, chunkSize, startFrame, stopFrame,
+                decodeForward, forwardStep, decodedBlocksCacheSize,
             } = frameDataCache[this.jobID];
 
             const requestId = +_.uniqueId();
-            const chunkNumber = Math.floor(this.number / chunkSize);
+            const chunkNumber = Math.floor((this.number - startFrame) / chunkSize);
             const frame = provider.frame(this.number);
 
             function findTheNextNotDecodedChunk(searchFrom: number): number {
                 let firstFrameInNextChunk = searchFrom + forwardStep;
-                let nextChunkNumber = Math.floor(firstFrameInNextChunk / chunkSize);
+                let nextChunkNumber = Math.floor((firstFrameInNextChunk - startFrame) / chunkSize);
                 while (nextChunkNumber === chunkNumber) {
                     firstFrameInNextChunk += forwardStep;
-                    nextChunkNumber = Math.floor(firstFrameInNextChunk / chunkSize);
+                    nextChunkNumber = Math.floor((firstFrameInNextChunk - startFrame) / chunkSize);
                 }
 
                 if (provider.isChunkCached(nextChunkNumber)) {
@@ -286,15 +289,15 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
 
             if (frame) {
                 if (
-                    prefetchAnalizer.shouldPrefetchNext(
+                    prefetchAnalyzer.shouldPrefetchNext(
                         this.number,
                         decodeForward,
                         (chunk) => provider.isChunkCached(chunk),
                     ) && decodedBlocksCacheSize > 1 && !frameDataCache[this.jobID].activeChunkRequest
                 ) {
                     const nextChunkNumber = findTheNextNotDecodedChunk(this.number);
                     const predecodeChunksMax = Math.floor(decodedBlocksCacheSize / 2);
-                    if (nextChunkNumber * chunkSize <= stopFrame &&
+                    if (startFrame + nextChunkNumber * chunkSize <= stopFrame &&
                         nextChunkNumber <= chunkNumber + predecodeChunksMax
                     ) {
                         frameDataCache[this.jobID].activeChunkRequest = new Promise((resolveForward) => {
@@ -316,8 +319,8 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
                                 provider.cleanup(1);
                                 provider.requestDecodeBlock(
                                     chunk,
-                                    nextChunkNumber * chunkSize,
-                                    Math.min(stopFrame, (nextChunkNumber + 1) * chunkSize - 1),
+                                    startFrame + nextChunkNumber * chunkSize,
+                                    Math.min(stopFrame, startFrame + (nextChunkNumber + 1) * chunkSize - 1),
                                     () => {},
                                     releasePromise,
                                     releasePromise,
@@ -334,7 +337,7 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
                     renderHeight: this.height,
                     imageData: frame,
                 });
-                prefetchAnalizer.addRequested(this.number);
+                prefetchAnalyzer.addRequested(this.number);
                 return;
             }
 
@@ -355,7 +358,7 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
                         renderHeight: this.height,
                         imageData: currentFrame,
                     });
-                    prefetchAnalizer.addRequested(this.number);
+                    prefetchAnalyzer.addRequested(this.number);
                     return;
                 }
 
@@ -378,8 +381,8 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
                             provider
                                 .requestDecodeBlock(
                                     chunk,
-                                    chunkNumber * chunkSize,
-                                    Math.min(stopFrame, (chunkNumber + 1) * chunkSize - 1),
+                                    startFrame + chunkNumber * chunkSize,
+                                    Math.min(stopFrame, startFrame + (chunkNumber + 1) * chunkSize - 1),
                                     (_frame: number, bitmap: ImageBitmap | Blob) => {
                                         if (decodeForward) {
                                             // resolve immediately only if is not playing
@@ -395,7 +398,7 @@ Object.defineProperty(FrameData.prototype.data, 'implementation', {
                                                 renderHeight: this.height,
                                                 imageData: bitmap,
                                             });
-                                            prefetchAnalizer.addRequested(this.number);
+                                            prefetchAnalyzer.addRequested(this.number);
                                         }
                                     }, () => {
                                         frameDataCache[this.jobID].activeChunkRequest = null;
@@ -612,9 +615,10 @@ export async function getFrame(
                 blockType,
                 chunkSize,
                 decodedBlocksCacheSize,
+                startFrame,
                 dimension,
             ),
-            prefetchAnalizer: new PrefetchAnalyzer(chunkSize),
+            prefetchAnalyzer: new PrefetchAnalyzer(chunkSize, startFrame),
             decodedBlocksCacheSize,
             activeChunkRequest: null,
             activeContextRequest: null,

@@ -100,11 +100,13 @@ export class FrameDecoder {
     private renderHeight: number;
     private zipWorker: Worker | null;
     private videoWorker: Worker | null;
+    private startFrame: number;
 
     constructor(
         blockType: BlockType,
         chunkSize: number,
         cachedBlockCount: number,
+        startFrame: number,
         dimension: DimensionType = DimensionType.DIMENSION_2D,
     ) {
         this.mutex = new Mutex();
@@ -118,6 +120,7 @@ export class FrameDecoder {
         this.renderWidth = 1920;
         this.renderHeight = 1080;
         this.chunkSize = chunkSize;
+        this.startFrame = startFrame;
         this.blockType = blockType;
 
         this.decodedChunks = {};
@@ -203,7 +206,7 @@ export class FrameDecoder {
     }
 
     frame(frameNumber: number): ImageBitmap | Blob | null {
-        const chunkNumber = Math.floor(frameNumber / this.chunkSize);
+        const chunkNumber = Math.floor((frameNumber - this.startFrame) / this.chunkSize);
         if (chunkNumber in this.decodedChunks) {
             return this.decodedChunks[chunkNumber][frameNumber];
         }
@@ -262,7 +265,7 @@ export class FrameDecoder {
                 throw new RequestOutdatedError();
             }
 
-            const chunkNumber = Math.floor(start / this.chunkSize);
+            const chunkNumber = Math.floor((start - this.startFrame) / this.chunkSize);
             this.orderedStack = [chunkNumber, ...this.orderedStack];
             this.cleanup();
             const decodedFrames: Record<number, ImageBitmap | Blob> = {};

@@ -31,8 +31,8 @@
 
 from cvat.apps.dataset_manager.formats.utils import get_label_color
 from cvat.apps.dataset_manager.util import add_prefetch_fields
-from cvat.apps.engine.frame_provider import FrameProvider
-from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, DimensionType, Job,
+from cvat.apps.engine.frame_provider import TaskFrameProvider, FrameQuality, FrameOutputType
+from cvat.apps.engine.models import (AttributeSpec, AttributeType, DimensionType, Job,
                                      JobType, Label, LabelType, Project, SegmentType, ShapeType,
                                      Task)
 from cvat.apps.engine.rq_job_handler import RQJobMetaField
@@ -240,7 +240,7 @@ def start(self) -> int:
 
     @property
     def stop(self) -> int:
-        return len(self)
+        return max(0, len(self) - 1)
 
     def _get_queryset(self):
         raise NotImplementedError()
@@ -376,7 +376,7 @@ def _export_tag(self, tag):
     def _export_track(self, track, idx):
         track['shapes'] = list(filter(lambda x: not self._is_frame_deleted(x['frame']), track['shapes']))
         tracked_shapes = TrackManager.get_interpolated_shapes(
-            track, 0, self.stop, self._annotation_ir.dimension)
+            track, 0, self.stop + 1, self._annotation_ir.dimension)
         for tracked_shape in tracked_shapes:
             tracked_shape["attributes"] += track["attributes"]
             tracked_shape["track_id"] = track["track_id"] if self._use_server_track_ids else idx
@@ -432,7 +432,7 @@ def get_frame(idx):
 
         anno_manager = AnnotationManager(self._annotation_ir)
         for shape in sorted(
-            anno_manager.to_shapes(self.stop, self._annotation_ir.dimension,
+            anno_manager.to_shapes(self.stop + 1, self._annotation_ir.dimension,
                 # Skip outside, deleted and excluded frames
                 included_frames=included_frames,
                 include_outside=False,
@@ -763,7 +763,7 @@ def start(self) -> int:
     @property
     def stop(self) -> int:
         segment = self._db_job.segment
-        return segment.stop_frame + 1
+        return segment.stop_frame
 
     @property
     def db_instance(self):
@@ -1333,7 +1333,7 @@ def add_task(self, task, files):
 
 @attrs(frozen=True, auto_attribs=True)
 class ImageSource:
-    db_data: Data
+    db_task: Task
     is_video: bool = attrib(kw_only=True)
 
 class ImageProvider:
@@ -1362,25 +1362,29 @@ def video_frame_loader(_):
                 # optimization for videos: use numpy arrays instead of bytes
                 # some formats or transforms can require image data
                 return self._frame_provider.get_frame(frame_index,
-                    quality=FrameProvider.Quality.ORIGINAL,
-                    out_type=FrameProvider.Type.NUMPY_ARRAY)[0]
+                    quality=FrameQuality.ORIGINAL,
+                    out_type=FrameOutputType.NUMPY_ARRAY
+                ).data
+
             return dm.Image(data=video_frame_loader, **image_kwargs)
         else:
             def image_loader(_):
                 self._load_source(source_id, source)
 
                 # for images use encoded data to avoid recoding
                 return self._frame_provider.get_frame(frame_index,
-                    quality=FrameProvider.Quality.ORIGINAL,
-                    out_type=FrameProvider.Type.BUFFER)[0].getvalue()
+                    quality=FrameQuality.ORIGINAL,
+                    out_type=FrameOutputType.BUFFER
+                ).data.getvalue()
+
             return dm.ByteImage(data=image_loader, **image_kwargs)
 
     def _load_source(self, source_id: int, source: ImageSource) -> None:
         if self._current_source_id == source_id:
             return
 
         self._unload_source()
-        self._frame_provider = FrameProvider(source.db_data)
+        self._frame_provider = TaskFrameProvider(source.db_task)
         self._current_source_id = source_id
 
     def _unload_source(self) -> None:
@@ -1396,7 +1400,7 @@ def __init__(self, sources: Dict[int, ImageSource]) -> None:
         self._images_per_source = {
             source_id: {
                 image.id: image
-                for image in source.db_data.images.prefetch_related('related_files')
+                for image in source.db_task.data.images.prefetch_related('related_files')
             }
             for source_id, source in sources.items()
         }
@@ -1405,7 +1409,7 @@ def get_image_for_frame(self, source_id: int, frame_id: int, **image_kwargs):
         source = self._sources[source_id]
 
         point_cloud_path = osp.join(
-            source.db_data.get_upload_dirname(), image_kwargs['path'],
+            source.db_task.data.get_upload_dirname(), image_kwargs['path'],
         )
 
         image = self._images_per_source[source_id][frame_id]
@@ -1518,11 +1522,18 @@ def __init__(
         is_video = instance_meta['mode'] == 'interpolation'
         ext = ''
         if is_video:
-            ext = FrameProvider.VIDEO_FRAME_EXT
+            ext = TaskFrameProvider.VIDEO_FRAME_EXT
 
         if dimension == DimensionType.DIM_3D or include_images:
+            if isinstance(instance_data, TaskData):
+                db_task = instance_data.db_instance
+            elif isinstance(instance_data, JobData):
+                db_task = instance_data.db_instance.segment.task
+            else:
+                assert False
+
             self._image_provider = IMAGE_PROVIDERS_BY_DIMENSION[dimension](
-                {0: ImageSource(instance_data.db_data, is_video=is_video)}
+                {0: ImageSource(db_task, is_video=is_video)}
             )
 
         for frame_data in instance_data.group_by_frame(include_empty=True):
@@ -1604,13 +1615,13 @@ def __init__(
         if self._dimension == DimensionType.DIM_3D or include_images:
             self._image_provider = IMAGE_PROVIDERS_BY_DIMENSION[self._dimension](
                 {
-                    task.id: ImageSource(task.data, is_video=task.mode == 'interpolation')
+                    task.id: ImageSource(task, is_video=task.mode == 'interpolation')
                     for task in project_data.tasks
                 }
             )
 
         ext_per_task: Dict[int, str] = {
-            task.id: FrameProvider.VIDEO_FRAME_EXT if is_video else ''
+            task.id: TaskFrameProvider.VIDEO_FRAME_EXT if is_video else ''
             for task in project_data.tasks
             for is_video in [task.mode == 'interpolation']
         }

@@ -27,7 +27,7 @@
                                                 import_dm_annotations,
                                                 match_dm_item)
 from cvat.apps.dataset_manager.util import make_zip_archive
-from cvat.apps.engine.frame_provider import FrameProvider
+from cvat.apps.engine.frame_provider import FrameQuality, FrameOutputType, make_frame_provider
 
 from .registry import dm_env, exporter, importer
 
@@ -1371,16 +1371,19 @@ def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callb
     dumper.close_document()
 
 def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None):
+    frame_provider = make_frame_provider(instance_data.db_instance)
+
     ext = ''
     if instance_data.meta[instance_data.META_FIELD]['mode'] == 'interpolation':
-        ext = FrameProvider.VIDEO_FRAME_EXT
-
-    frame_provider = FrameProvider(instance_data.db_data)
-    frames = frame_provider.get_frames(
-        instance_data.start, instance_data.stop,
-        frame_provider.Quality.ORIGINAL,
-        frame_provider.Type.BUFFER)
-    for frame_id, (frame_data, _) in zip(instance_data.rel_range, frames):
+        ext = frame_provider.VIDEO_FRAME_EXT
+
+    frames = frame_provider.iterate_frames(
+        start_frame=instance_data.start,
+        stop_frame=instance_data.stop,
+        quality=FrameQuality.ORIGINAL,
+        out_type=FrameOutputType.BUFFER,
+    )
+    for frame_id, frame in zip(instance_data.rel_range, frames):
         if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \
             or frame_id in instance_data.deleted_frames:
             continue
@@ -1389,7 +1392,7 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj
         img_path = osp.join(img_dir, frame_name + ext)
         os.makedirs(osp.dirname(img_path), exist_ok=True)
         with open(img_path, 'wb') as f:
-            f.write(frame_data.getvalue())
+            f.write(frame.data.getvalue())
 
 def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False):
     with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: