Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ To convert rm files to other formats, you can use [rmc](https://github.com/rickl

### Unreleased

New features:

- Read and write image asset blocks and path-like scene item blocks introduced
by native notebook image insertion, preserving undecoded payloads for
round-trip safety.

### v0.8.0

New features:
Expand Down
11 changes: 11 additions & 0 deletions src/rmscene/scene_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,17 @@ class Line(SceneItem):
color_rgba: tp.Optional[tuple[int, int, int, int]] = None


@dataclass
class Path(SceneItem):
"""Path-like item introduced by newer reMarkable software.

The exact payload is not fully decoded yet, but preserving it here means
files containing these blocks can be read and written without losing data.
"""

data: bytes


## Text


Expand Down
60 changes: 59 additions & 1 deletion src/rmscene/scene_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import logging
import math
import re
import typing as tp
from abc import ABC, abstractmethod
from collections.abc import Iterable, Iterator
Expand Down Expand Up @@ -167,6 +168,38 @@ def to_stream(self, writer: TaggedBlockWriter):
writer.write_int_pair(5, self.paper_size)


@dataclass
class SceneAssetBlock(Block):
"""Asset reference block, used by native notebook image insertion.

Newer reMarkable software writes a top-level block containing the referenced
asset filename, for example a JPG stored next to the page ``.rm`` file.
Most fields are still unnamed, so this block preserves the raw payload while
exposing the filename for callers that need to locate the asset.
"""

BLOCK_TYPE: tp.ClassVar = 0x0E
_FILENAME_RE: tp.ClassVar = re.compile(rb"([\w-]+\.(?:jpe?g|png|webp))")

data: bytes
filename: tp.Optional[str] = None

def version_info(self, _) -> tuple[int, int]:
return (3, 3)

@classmethod
def from_stream(cls, stream: TaggedBlockReader) -> SceneAssetBlock:
with stream.read_subblock(1) as block_info:
data = stream.data.read_bytes(block_info.size)
match = cls._FILENAME_RE.search(data)
filename = match.group(1).decode() if match else None
return SceneAssetBlock(data=data, filename=filename)

def to_stream(self, writer: TaggedBlockWriter):
with writer.write_subblock(1):
writer.data.write_bytes(self.data)


@dataclass
class AuthorIdsBlock(Block):
BLOCK_TYPE: tp.ClassVar = 0x09
Expand Down Expand Up @@ -486,6 +519,8 @@ def from_stream(cls, stream: TaggedBlockReader) -> SceneItemBlock:
subclass = SceneTextItemBlock
elif block_type == SceneTombstoneItemBlock.BLOCK_TYPE:
subclass = SceneTombstoneItemBlock
elif block_type == ScenePathItemBlock.BLOCK_TYPE:
subclass = ScenePathItemBlock
else:
raise ValueError(
"unknown scene type %d in %s" % (block_type, stream.current_block)
Expand Down Expand Up @@ -659,6 +694,29 @@ def value_to_stream(self, writer: TaggedBlockWriter, value: si.Line):
# XXX missing "PathItemBlock"? with ITEM_TYPE 0x04


class ScenePathItemBlock(SceneItemBlock):
"""Path-like scene item used by newer notebook features.

These blocks appear alongside native image assets. The item header follows
the standard scene-item shape, but the value payload is not decoded yet.
Preserve it so read/write round-trips keep the page intact.
"""

BLOCK_TYPE: tp.ClassVar = 0x0F
ITEM_TYPE: tp.ClassVar = 0x07

def version_info(self, writer: TaggedBlockWriter) -> tuple[int, int]:
return (2, 2)

@classmethod
def value_from_stream(cls, reader: TaggedBlockReader) -> si.Path:
data = reader.data.read_bytes(reader.bytes_remaining_in_block())
return si.Path(data)

def value_to_stream(self, writer: TaggedBlockWriter, value: si.Path):
writer.data.write_bytes(value.data)


class SceneTextItemBlock(SceneItemBlock):
BLOCK_TYPE: tp.ClassVar = 0x06
ITEM_TYPE: tp.ClassVar = 0x05
Expand Down Expand Up @@ -909,7 +967,7 @@ def build_tree(tree: SceneTree, blocks: Iterable[Block]):
)
item = replace(b.item, value=tree[node_id])
tree.add_item(item, b.parent_id)
elif isinstance(b, (SceneLineItemBlock, SceneGlyphItemBlock)):
elif isinstance(b, (SceneLineItemBlock, SceneGlyphItemBlock, ScenePathItemBlock)):
# Add this entry to children of parent_id
tree.add_item(b.item, b.parent_id)
elif isinstance(b, SceneInfo):
Expand Down
170 changes: 170 additions & 0 deletions tests/test_scene_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,176 @@ def test_blocks_keep_unknown_data_in_value_subblock():
assert block.extra_value_data == bytes.fromhex("8f 0101")


def test_read_scene_asset_block():
data_hex = """
62000000 0003030e
1c5d000000
010c57000000
4be9f7ad407a129d3e4dc2316b84c825
1c33000000
1f029006
2c2a000000
2801
35663830333635642d393732652d343135642d383033302d3666346465653133616339322e6a7067
2c0a000000
1f0000
2c02000000
1100
"""
buf = BytesIO(HEADER_V6 + bytes.fromhex(data_hex))

block = next(read_blocks(buf))

assert block == SceneAssetBlock(
data=bytes.fromhex(
"""
010c57000000
4be9f7ad407a129d3e4dc2316b84c825
1c33000000
1f029006
2c2a000000
2801
35663830333635642d393732652d343135642d383033302d3666346465653133616339322e6a7067
2c0a000000
1f0000
2c02000000
1100
"""
),
filename="5f80365d-972e-415d-8030-6f4dee13ac92.jpg",
)


def test_scene_asset_block_roundtrip():
block = SceneAssetBlock(
data=bytes.fromhex(
"""
010c57000000
4be9f7ad407a129d3e4dc2316b84c825
1c33000000
1f029006
2c2a000000
2801
35663830333635642d393732652d343135642d383033302d3666346465653133616339322e6a7067
2c0a000000
1f0000
2c02000000
1100
"""
),
filename="5f80365d-972e-415d-8030-6f4dee13ac92.jpg",
)

buf = BytesIO()
write_blocks(buf, [block], options={"version": "3.3"})

assert bytes.fromhex(
"""
62000000 0003030e
1c5d000000
010c57000000
4be9f7ad407a129d3e4dc2316b84c825
1c33000000
1f029006
2c2a000000
2801
35663830333635642d393732652d343135642d383033302d3666346465653133616339322e6a7067
2c0a000000
1f0000
2c02000000
1100
"""
) in buf.getvalue()


def test_read_scene_path_item_block_with_raw_value():
data_hex = """
a4000000 0002020f
1f029605
2f01eb06
3f02bf05
4f0000
5400000000
6c8b000000
07
1c19000000
1f029206
2c10000000
4be9f7ad407a129d3e4dc2316b84c825
2f018d07
3c41000000
10002071428094e341000000000000000035684d448094e3410000803f0000000035684d44d97245440000803f0000803f00207142d9724544000000000000803f
4c19000000
06000000000100000002000000020000000300000000000000
5f02b906
"""
buf = BytesIO(HEADER_V6 + bytes.fromhex(data_hex))

block = next(read_blocks(buf))

assert isinstance(block, ScenePathItemBlock)
assert block.parent_id == CrdtId(2, 662)
assert block.item.item_id == CrdtId(1, 875)
assert block.item.left_id == CrdtId(2, 703)
assert block.item.right_id == CrdtId(0, 0)
assert block.item.deleted_length == 0
assert isinstance(block.item.value, si.Path)
assert len(block.item.value.data) == 138


def test_scene_path_item_block_roundtrip():
data = bytes.fromhex(
"""
1c19000000
1f029206
2c10000000
4be9f7ad407a129d3e4dc2316b84c825
2f018d07
3c41000000
10002071428094e341000000000000000035684d448094e3410000803f0000000035684d44d97245440000803f0000803f00207142d9724544000000000000803f
4c19000000
06000000000100000002000000020000000300000000000000
5f02b906
"""
)
block = ScenePathItemBlock(
parent_id=CrdtId(2, 662),
item=CrdtSequenceItem(
item_id=CrdtId(1, 875),
left_id=CrdtId(2, 703),
right_id=CrdtId(0, 0),
deleted_length=0,
value=si.Path(data),
),
)

buf = BytesIO()
write_blocks(buf, [block], options={"version": "3.3"})

assert bytes.fromhex(
"""
a4000000 0002020f
1f029605
2f01eb06
3f02bf05
4f0000
5400000000
6c8b000000
07
1c19000000
1f029206
2c10000000
4be9f7ad407a129d3e4dc2316b84c825
2f018d07
3c41000000
10002071428094e341000000000000000035684d448094e3410000803f0000000035684d44d97245440000803f0000803f00207142d9724544000000000000803f
4c19000000
06000000000100000002000000020000000300000000000000
5f02b906
"""
) in buf.getvalue()


def test_error_in_block_contained():
# First block will cause a parsing error at `0xff`. Second block should
# still be parsed.
Expand Down