Skip to content

Commit 3ae0565

Browse files
committed
Semi-solution with comments describing why it's not perfect
1 parent 7a0a515 commit 3ae0565

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

backend/btrixcloud/operator/crawls.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,16 +1539,28 @@ async def update_crawl_state(
15391539
print(f"status.stopReason: {status.stopReason}", flush=True)
15401540

15411541
print(f"stats.size initial: {stats.size}", flush=True)
1542+
print(f"status.filesAdded: {status.filesAdded}", flush=True)
15421543
print(f"status.filesAddedSize: {status.filesAddedSize}", flush=True)
15431544

15441545
# need to add size of previously completed WACZ files as well!
15451546
# TODO: This sometimes results in the crawl's stats.size being
15461547
# twice as large as expected when pausing crawls, as stats.size
1547-
# is not necessarily decremented once WACZ files are uploaded
1548-
# This then can have a downstream effects on the storage quota check
1549-
stats.size += status.filesAddedSize
1550-
1551-
print(f"stats.size after adding filesAddedSize: {stats.size}", flush=True)
1548+
# isn't decremented once WACZ files are uploaded, so there's a
1549+
# period of time where uploaded WACZs can be counted twice during
1550+
# pausing
1551+
if status.stopReason not in PAUSED_STATES:
1552+
# This is close to a solution except it results in pauses after
1553+
# the first showing a smaller-than-expected size because it
1554+
# no longer counts files added previous to resuming the crawl
1555+
# Kind of seems like what we need here is a way of distinguishing
1556+
# files added prior to previous pauses (which we want to continue
1557+
# to add) from those that were just added
1558+
stats.size += status.filesAddedSize
1559+
print(f"stats.size after adding filesAddedSize: {stats.size}", flush=True)
1560+
else:
1561+
print(
1562+
"not adding filesAddedSize to stats.size, crawl is pausing", flush=True
1563+
)
15521564

15531565
# update status
15541566
status.pagesDone = stats.done

0 commit comments

Comments
 (0)