Skip to content

Commit c844d65

Browse files
refactor: rename operator split to chunk
1 parent 95c4783 commit c844d65

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

graphgen/bases/base_splitter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Callable, Iterable, List, Literal, Optional, Union
55

66
from graphgen.bases.datatypes import Chunk
7-
from graphgen.utils import logger
7+
from graphgen.utils.log import logger
88

99

1010
class BaseSplitter(ABC):
@@ -33,7 +33,7 @@ def split_text(self, text: str) -> List[str]:
3333
"""
3434
Split the input text into smaller chunks.
3535
36-
:param text: The input text to be split.
36+
:param text: The input text to be chunk.
3737
:return: A list of text chunks.
3838
"""
3939

@@ -111,7 +111,7 @@ def _merge_splits(self, splits: Iterable[str], separator: str) -> List[str]:
111111
def _split_text_with_regex(
112112
text: str, separator: str, keep_separator: Union[bool, Literal["start", "end"]]
113113
) -> List[str]:
114-
# Now that we have the separator, split the text
114+
# Now that we have the separator, chunk the text
115115
if separator:
116116
if keep_separator:
117117
# The parentheses in the pattern keep the delimiters in the result.

graphgen/models/splitter/character_splitter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(
1717

1818
def split_text(self, text: str) -> List[str]:
1919
"""Split incoming text and return chunks."""
20-
# First we naively split the large input into a bunch of smaller ones.
20+
# First we naively chunk the large input into a bunch of smaller ones.
2121
separator = (
2222
self._separator if self._is_separator_regex else re.escape(self._separator)
2323
)

graphgen/models/splitter/markdown_splitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66

77

88
class MarkdownTextRefSplitter(RecursiveCharacterSplitter):
9-
"""Attempts to split the text along Markdown-formatted headings."""
9+
"""Attempts to chunk the text along Markdown-formatted headings."""
1010

1111
def __init__(self, **kwargs: Any) -> None:
1212
"""Initialize a MarkdownTextRefSplitter."""
1313
separators = [
14-
# First, try to split along Markdown headings (starting with level 2)
14+
# First, try to chunk along Markdown headings (starting with level 2)
1515
"\n#{1,6} ",
1616
# Note the alternative syntax for headings (below) is not handled here
1717
# Heading level 2

graphgen/models/splitter/recursive_character_splitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
class RecursiveCharacterSplitter(BaseSplitter):
88
"""Splitting text by recursively look at characters.
99
10-
Recursively tries to split by different characters to find one that works.
10+
Recursively tries to chunk by different characters to find one that works.
1111
"""
1212

1313
def __init__(
@@ -88,7 +88,7 @@ def __init__(
8888
def _split_text_with_regex_from_end(
8989
self, text: str, separator: str, keep_separator: bool
9090
) -> List[str]:
91-
# Now that we have the separator, split the text
91+
# Now that we have the separator, chunk the text
9292
if separator:
9393
if keep_separator:
9494
# The parentheses in the pattern keep the delimiters in the result.

0 commit comments

Comments
 (0)